Ejemplo n.º 1
0
def learning(sample, method, parameters):
    if method == "cpc":
        binNumber, alpha = parameters
        learner = otagr.ContinuousPC(sample, binNumber, alpha)

        ndag = learner.learnDAG()

        TTest = otagr.ContinuousTTest(sample, alpha)
        jointDistributions = []
        for i in range(ndag.getSize()):
            d = 1 + ndag.getParents(i).getSize()
            if d == 1:
                bernsteinCopula = ot.Uniform(0.0, 1.0)
            else:
                K = TTest.GetK(len(sample), d)
                indices = [int(n) for n in ndag.getParents(i)]
                indices = [i] + indices
                bernsteinCopula = ot.EmpiricalBernsteinCopula(
                    sample.getMarginal(indices), K, False)
            jointDistributions.append(bernsteinCopula)

        bn = named_dag_to_bn(ndag)

    elif method == "elidan":
        #print(sample.getDescription())
        max_parents, n_restart_hc = parameters
        copula, dag = hc.hill_climbing(sample, max_parents, n_restart_hc)[0:2]
        #bn = dag_to_bn(dag, Tstruct.names())
        bn = dag_to_bn(dag, sample.getDescription())
    else:
        print("Wrong entry for method argument !")

    return bn
Ejemplo n.º 2
0
    def learning(self, sample):
        if self.method == "cpc":
            learner = otagr.ContinuousPC(sample, self.parameters['binNumber'],
                                         self.parameters['alpha'])

            start = time.time()
            ndag = learner.learnDAG()
            end = time.time()

            # TTest = otagr.ContinuousTTest(sample, self.parameters['alpha'])
            # jointDistributions = []
            # for i in range(ndag.getSize()):
            # d = 1+ndag.getParents(i).getSize()
            # if d == 1:
            # bernsteinCopula = ot.Uniform(0.0, 1.0)
            # else:
            # K = TTest.GetK(len(sample), d)
            # indices = [int(n) for n in ndag.getParents(i)]
            # indices = [i] + indices
            # bernsteinCopula = ot.EmpiricalBernsteinCopula(sample.getMarginal(indices), K, False)
            # jointDistributions.append(bernsteinCopula)

        elif self.method == "cbic":
            #print(sample.getDescription())
            max_parents = self.parameters['max_parents']
            n_restart_hc = self.parameters['hc_restart']
            cmode = self.parameters['cmode']
            learner = otagr.TabuList(sample, max_parents, n_restart_hc, 5)
            learner.setCMode(cmode)
            start = time.time()
            ndag = learner.learnDAG()
            end = time.time()
            #bn = dag_to_bn(dag, Tstruct.names())

        elif self.method == "cmiic":
            cmode = self.parameters['cmode']
            kmode = self.parameters['kmode']
            learner = otagr.ContinuousMIIC(sample)
            learner.setCMode(cmode)
            learner.setKMode(kmode)
            learner.setAlpha(self.kalpha)
            # learner.setBeta(self.kbeta)
            start = time.time()
            ndag = learner.learnDAG()
            end = time.time()
            # bn = gu.named_dag_to_bn(ndag)

        elif self.method == "dmiic":
            # learner.setBeta(self.kbeta)
            ndag, start, end = dsc.learnDAG(sample)
            # bn = gu.named_dag_to_bn(ndag)

        elif self.method == "lgbn":
            start = time.time()
            end = time.time()

        else:
            print("Wrong entry for method argument !")

        return ndag, end - start
Ejemplo n.º 3
0
def CBN_PC(data, result_structure_path):
    print("CBN with PC")

    skeleton_path = result_structure_path.joinpath("skeleton")
    skeleton_path.mkdir(parents=True, exist_ok=True)

    pdag_path = result_structure_path.joinpath("pdag")
    pdag_path.mkdir(parents=True, exist_ok=True)

    dag_path = result_structure_path.joinpath("dag")
    dag_path.mkdir(parents=True, exist_ok=True)

    skeleton_file_name = "skeleton_" + str(size).zfill(7) + ".dot"
    skeleton_done = skeleton_path.joinpath(skeleton_file_name).exists()

    pdag_file_name = "pdag_" + str(size).zfill(7) + ".dot"
    pdag_done = pdag_path.joinpath(pdag_file_name).exists()

    dag_file_name = "dag_" + str(size).zfill(7) + ".dot"
    dag_done = dag_path.joinpath(dag_file_name).exists()

    alpha = 0.01
    conditioningSet = 4

    learner = otagr.ContinuousPC(data, conditioningSet, alpha)
    learner.setVerbosity(True)

    if not skeleton_done:
        skel = learner.learnSkeleton()
        gu.write_graph(
            skel,
            skeleton_path.joinpath("skeleton_" + str(size).zfill(7) + ".dot"))

    if not pdag_done:
        pdag = learner.learnPDAG()
        gu.write_graph(
            pdag, pdag_path.joinpath("pdag_" + str(size).zfill(7) + ".dot"))

    if not dag_done:
        dag = learner.learnDAG()
        gu.write_graph(dag,
                       dag_path.joinpath("dag_" + str(size).zfill(7) + ".dot"))
    else:
        dag, names = gu.read_graph(
            dag_path.joinpath("dag_" + str(size).zfill(7) + ".dot"))
        dag = otagr.NamedDAG(dag, names)

    print("Learning parameters")
    factories = [
        ot.KernelSmoothing(ot.Epanechnikov()),
        ot.BernsteinCopulaFactory()
    ]
    ot.Log.SetFile("log")
    ot.Log.Show(ot.Log.INFO)
    model = otagr.ContinuousBayesianNetworkFactory(factories, dag, alpha,
                                                   conditioningSet,
                                                   False).build(data)
    ot.Log.Show(ot.Log.INFO)
    return model
Ejemplo n.º 4
0
def testAsiaDirichlet():
    data = ot.Sample.ImportFromTextFile(
        os.path.join(os.path.dirname(__file__), "asia_dirichlet_5000.csv"),
        ",")
    alpha = 0.1
    binNumber = 3
    learner = otagrum.ContinuousPC(data, binNumber, alpha)
    learner.setVerbosity(True)
    pdag = learner.learnPDAG()
    # print(pdag)
    print(learner.PDAGtoDot(pdag))
    dag = learner.learnDAG()
    print(dag.toDot())
Ejemplo n.º 5
0
def testSpecificInstance():
    size = 1000
    data = generateDataForSpecificInstance(size)
    alpha = 0.1
    binNumber = 3
    learner = otagrum.ContinuousPC(data, binNumber, alpha)

    # skel = learner.learnSkeleton()
    # print(skel.toDot())

    dag = learner.learnDAG()
    print(dag.toDot())
    sys.stdout.flush()
Ejemplo n.º 6
0
def CPC_learning(data, maxCondSet=5, alpha=0.1):
    # Try an estimation of the coefficients distribution using
    # univariate kernel smoothing for the marginals and PC to learn the structure
    # of dependence parameterized by Bernstein copula
    dimension = data.getDimension()
    print("Build CPC coefficients distribution")
    t0 = time()
    print("    Learning structure")
    t1 = time()
    learner = otagrum.ContinuousPC(data, maxCondSet, alpha)
    dag = learner.learnDAG()
    with open("dags/new_dag_CPC_{}.dot".format(alpha), "w") as f:
        f.write(dag.toDot())
    print("    t=", time() - t1, "s")

    cbn = CBN_parameter_learning(data, dag)
    print("t=", time() - t0, "s")
    # distribution = ot.ComposedDistribution(marginals, cbn)
    return cbn
Ejemplo n.º 7
0
def learn(data, alpha=0.1, verbose=False):
    with timer("Initiating"):
        learn = otagr.ContinuousPC(data, alpha=alpha)
        learn.setVerbosity(verbose)
        learn.setOptimalPolicy(True)
    with timer("Learning skeleton"):
        sk = learn.inferSkeleton()
    print("Nodes : {} , Edges : {}".format(sk.size(), sk.sizeEdges()))
    if sk.size() < 40:
        gnb.showDot(learn.skeletonToDot(sk), size="20", format="png")
    else:
        print(sk.edges())
    with timer("Learning VStructures"):
        mg = learn.inferPDAG(sk)
    if mg.size() < 40:
        gnb.showDot(mg.toDot(), size="20", format="png")
    else:
        print(sk.edges())

    with timer("Learning complete DAG"):
        dag = learn.learnDAG()
    return dag
Ejemplo n.º 8
0
n_nodes = []
times_cpc = []
times_elidan = []
for i in range(start_size, end_size + 1, step):
    print("Number of node :", i, flush=True)

    n_nodes.append(i)
    n_arc = int(density * (i - 1))
    bn = generator.generate(i, n_arc)
    TNdag = otagr.NamedDAG(bn.dag(), bn.names())

    data = ut.generate_gaussian_data(TNdag, sample_size,
                                     float(args.correlation))

    learner = otagr.ContinuousPC(data, mcss, alpha)
    start = time.time()
    LNdagCPC = learner.learnDAG()
    end = time.time()
    times_cpc.append(end - start)

    start = time.time()
    LNdagElidan = hc.hill_climbing(data, max_parents, n_restart_hc)[1]
    end = time.time()
    times_elidan.append(end - start)

    #LNdagCPC = [[ut.named_dag_to_bn(LNdagCPC)]]
    #LNdagElidan = [[ut.dag_to_bn(LNdagElidan, data.getDescription())]]

    #cpc_scores = ut.structural_scores(ut.named_dag_to_bn(TNdag), LNdagCPC)
    #elidan_scores = ut.structural_scores(ut.named_dag_to_bn(TNdag), LNdagElidan)
Ejemplo n.º 9
0
f = figure_path.joinpath("pairs_ref_" + dataset_name + ".pdf")
pairs(data_draw, f)


#####################LEARNING CBN MODEL############################

# LEARNING STRUCTURE #

# CBIC Algorithm
learner = otagr.TabuList(data_ref, 2, 10, 2) # Using CPC algorithm
cbic_dag = learner.learnDAG() # Learning DAG
write_graph(cbic_dag, structure_path.joinpath("cbic_dag_" + dataset_name + ".dot"))


# CPC Algorithm
learner = otagr.ContinuousPC(data_ref, 4, 0.05) # Using CPC algorithm
cpc_dag = learner.learnDAG() # Learning DAG
write_graph(cpc_dag, structure_path.joinpath("cpc_dag_" + dataset_name + ".dot"))


# CMIIC ALGORITHM

alphas = np.arange(10, 501, 5)/1000
# alphas = [0.04, 0.05]
fig, ax = plt.subplots()

x_major_ticks = np.arange(0, 0.5, 0.05)
x_minor_ticks = np.arange(0, 0.5, 0.01)

y_major_ticks = np.arange(0, 25, 5)
y_minor_ticks = np.arange(0, 25, 1)
Ejemplo n.º 10
0
from __future__ import print_function

from time import time

import openturns as ot
import pyAgrum as gum

import otagrum

def generateDataForSpecificInstance(size):
  R = ot.CorrelationMatrix(3)
  R[0, 1] = 0.5
  R[0, 2] = 0.45
  collection = [ot.FrankCopula(3.0), ot.NormalCopula(R), ot.ClaytonCopula(2.0)]
  copula = ot.ComposedCopula(collection)
  return copula.getSample(size)


size = 1000
data = generateDataForSpecificInstance(size)

alpha = 0.9
binNumber = 3

t0 = time()
learner = otagrum.ContinuousPC(data, binNumber, alpha)
t1 = time() - t0

#skel = learner.getSkeleton()
Ejemplo n.º 11
0
import openturns as ot
import otagrum as otagr

X = ot.Sample.ImportFromCSVFile("advised.csv")
binNumber = 2
alpha = 0.9
learner = otagr.ContinuousPC(X, binNumber, alpha)
res = learner.learnDAG()
print(res.toDot())
Ejemplo n.º 12
0
import pyAgrum as gum
import openturns as ot
import otagrum as otagr

print('Importing data')
data = ot.Sample.ImportFromTextFile(
    '../data/Standard_coefficients_0100000.csv', ';')
data = data[0:20000]
data = data.getMarginal(range(0, 12))

print('Initializing the learners')
learners = {
    'cbic': otagr.TabuList(data, 3, 1, 2),
    'cpc': otagr.ContinuousPC(data, 4, 0.01),
    'cmiic': otagr.ContinuousMIIC(data)
}

dags = {}
for (name, learner) in learners.items():
    print('Learning with ', name)
    dags[name] = learner.learnDAG()

for (name, dag) in dags.items():
    dot = dag.toDot()
    with open("dag_{}.dot".format(name), "w") as f:
        f.write(dot)
Ejemplo n.º 13
0
Tstruct_file_path = "data/cbn2/struct.txt"

data_set_name = data_set_path.split('/')[-1].split('.')[0]
Tstruct_file_name = Tstruct_file_path.split('/')[-1].split('.')[0]

with open(Tstruct_file_path, 'r') as file:
    arcs = file.read().replace('\n', '')
Tstruct = gum.fastBN(arcs)

data = np.loadtxt(data_set_path, delimiter=',', skiprows=1)
sizes = np.linspace(1000, len(data), 10, dtype=int)
alpha = 0.1
binNumber = 3
list_g = []

for size in sizes:
    print(size)
    sample = data[np.random.randint(0, len(data), size=size)]
    sample = ot.Sample(sample)
    learner = otagr.ContinuousPC(sample, binNumber, alpha)
    list_g.append(learner.learnDAG())

#bn = gum.BayesNet()
#for name in Tstruct.names():
#    bn.add(gum.LabelizedVariable(name))
#for arc in g.arcs():
#    bn.addArc(arc[0], arc[1])

#comparison = GraphicalBNComparator(Tstruct, bn)
#print(comparison.scores())
Ejemplo n.º 14
0
# %%
# Having a CBN, we can now sample from it.

# %%
ot.RandomGenerator.SetSeed(10)  # Set random seed
sample = cbn.getSample(1000)
train = sample[:-100]
test = sample[-100:]

# %%
# Learning the structure with continuous PC:
# Now that we have data, we can use it to learn the structure with the continuous PC algorithm.

# %%
learner = otagrum.ContinuousPC(sample, maxConditioningSetSize=5, alpha=0.1)

# %%
# We first learn the skeleton, that is the undirected structure.

# %%
skeleton = learner.learnSkeleton()

# %%
skeleton

# %%
# Then we look for the v-structures, leading to a Partially Directed Acyclic Graph (PDAG)

# %%
pdag = learner.learnPDAG()