Beispiel #1
0
    def loadLearnedStructures(self):
        sizes = np.linspace(self.begin_size,
                            self.end_size,
                            self.n_points,
                            dtype=int)
        parameters = '_'.join(
            [str(v).replace('.', '') for v in self.parameters.values()])
        path = os.path.join(self.result_dir, 'structures', self.method,
                            parameters + '_' + self.result_domain_str)
        list_structures = []
        for i in range(self.n_restart):
            list_by_size = []
            for size in sizes:
                name = 'sample' + str(i + 1).zfill(2) + '_size' + str(size)
                dag, var_names = gu.read_graph(
                    os.path.join(path, name + '.dot'))
                ndag = otagr.NamedDAG(dag, var_names)
                list_by_size.append(ndag)
                # with open(os.path.join(path, name + '.dot'), 'r') as file:
                # arcs = file.read().replace('\n', '')
                # ndag = gu.fastNamedDAG(arcs)
                # list_by_size.append(ndag)
            list_structures.append(list_by_size)

        return np.reshape(list_structures,
                          (self.n_restart, self.n_points)).transpose()
Beispiel #2
0
 def load_struct(self):
     # with open(self.structure_dir + self.data_structure, 'r') as file:
     # arcs = file.read().replace('\n', '')
     # return gu.fastNamedDAG(arcs)
     dag, names = gu.read_graph(self.structure_dir + self.data_structure +
                                '.dot')
     return otagr.NamedDAG(dag, names)
Beispiel #3
0
def fastNamedDAG(dotlike):
    dag = gum.DAG()
    names = []
    for string in dotlike.split(';'):
        if not string:
            continue
        lastId = 0
        notfirst = False
        for substring in string.split('->'):
            forward = True
            for name in substring.split('<-'):
                if name not in names:
                    idVar = dag.addNode()
                    names.append(name)
                else:
                    idVar = names.index(name)
                if notfirst:
                    if forward:
                        dag.addArc(lastId, idVar)
                        forward = False
                    else:
                        dag.addArc(idVar, lastId)
                else:
                    notfirst = True
                    forward = False
                lastId = idVar
    return otagr.NamedDAG(dag, names)
Beispiel #4
0
def CBN_PC(data, result_structure_path):
    print("CBN with PC")

    skeleton_path = result_structure_path.joinpath("skeleton")
    skeleton_path.mkdir(parents=True, exist_ok=True)

    pdag_path = result_structure_path.joinpath("pdag")
    pdag_path.mkdir(parents=True, exist_ok=True)

    dag_path = result_structure_path.joinpath("dag")
    dag_path.mkdir(parents=True, exist_ok=True)

    skeleton_file_name = "skeleton_" + str(size).zfill(7) + ".dot"
    skeleton_done = skeleton_path.joinpath(skeleton_file_name).exists()

    pdag_file_name = "pdag_" + str(size).zfill(7) + ".dot"
    pdag_done = pdag_path.joinpath(pdag_file_name).exists()

    dag_file_name = "dag_" + str(size).zfill(7) + ".dot"
    dag_done = dag_path.joinpath(dag_file_name).exists()

    alpha = 0.01
    conditioningSet = 4

    learner = otagr.ContinuousPC(data, conditioningSet, alpha)
    learner.setVerbosity(True)

    if not skeleton_done:
        skel = learner.learnSkeleton()
        gu.write_graph(
            skel,
            skeleton_path.joinpath("skeleton_" + str(size).zfill(7) + ".dot"))

    if not pdag_done:
        pdag = learner.learnPDAG()
        gu.write_graph(
            pdag, pdag_path.joinpath("pdag_" + str(size).zfill(7) + ".dot"))

    if not dag_done:
        dag = learner.learnDAG()
        gu.write_graph(dag,
                       dag_path.joinpath("dag_" + str(size).zfill(7) + ".dot"))
    else:
        dag, names = gu.read_graph(
            dag_path.joinpath("dag_" + str(size).zfill(7) + ".dot"))
        dag = otagr.NamedDAG(dag, names)

    print("Learning parameters")
    factories = [
        ot.KernelSmoothing(ot.Epanechnikov()),
        ot.BernsteinCopulaFactory()
    ]
    ot.Log.SetFile("log")
    ot.Log.Show(ot.Log.INFO)
    model = otagr.ContinuousBayesianNetworkFactory(factories, dag, alpha,
                                                   conditioningSet,
                                                   False).build(data)
    ot.Log.Show(ot.Log.INFO)
    return model
Beispiel #5
0
    def generate_data(self):
        Path(self.data_dir).mkdir(parents=True, exist_ok=True)

        # If not the good length remove all
        ldir = os.listdir(self.data_dir)
        for l in ldir:
            with open(os.path.join(self.data_dir, l), 'r') as f:
                if len(f.read().split('\n')) < (self.data_size + 2):
                    os.remove(os.path.join(self.data_dir, l))

        n_existing_sample = len(os.listdir(self.data_dir))

        Tstruct = self.load_struct()
        ndag = otagr.NamedDAG(Tstruct)

        for i in range(n_existing_sample, self.data_number):
            sample = dg.generate_data(ndag, self.data_size,
                                      self.data_distribution,
                                      **self.data_parameters)
            data_file_name = "sample" + str(i + 1).zfill(2)
            sample.exportToCSVFile(
                os.path.join(self.data_dir, data_file_name) + ".csv", ',')
def learnDAG(sample, dis_method='quantile', nbins=5, threshold=25):
    # data = pd.read_csv(file_name, nrows=size)

    names = list(sample.getDescription())

    csvfile = tf.NamedTemporaryFile(delete=False)
    csvfilename = csvfile.name + '.csv'
    csvfile.close()

    sample.exportToCSVFile(csvfilename, ',')

    start = time.time()
    discretizer = skbn.BNDiscretizer(defaultDiscretizationMethod=dis_method,
                                     defaultNumberOfBins=nbins,
                                     discretizationThreshold=threshold)

    variables = [
        discretizer.createVariable(name, sample.getMarginal([name]))
        for name in names
    ]

    bn = gum.BayesNet()
    for variable in variables:
        bn.add(variable)

    learner = gum.BNLearner(csvfilename, bn)
    learner.useMIIC()
    learner.useNMLCorrection()

    dag = learner.learnDAG()
    ndag = otagr.NamedDAG(dag, names)

    end = time.time()

    os.remove(csvfilename)

    return ndag, start, end
Beispiel #7
0
        if not path.isdir(res_directory):
            os.mkdir(res_directory)

generator = gum.BNGenerator()
gum.initRandom(10)

n_nodes = []
times_cpc = []
times_elidan = []
for i in range(start_size, end_size + 1, step):
    print("Number of node :", i, flush=True)

    n_nodes.append(i)
    n_arc = int(density * (i - 1))
    bn = generator.generate(i, n_arc)
    TNdag = otagr.NamedDAG(bn.dag(), bn.names())

    data = ut.generate_gaussian_data(TNdag, sample_size,
                                     float(args.correlation))

    learner = otagr.ContinuousPC(data, mcss, alpha)
    start = time.time()
    LNdagCPC = learner.learnDAG()
    end = time.time()
    times_cpc.append(end - start)

    start = time.time()
    LNdagElidan = hc.hill_climbing(data, max_parents, n_restart_hc)[1]
    end = time.time()
    times_elidan.append(end - start)
Beispiel #8
0
Tstruct_file = structure + ".txt"
struct_directory = "../../data/structures/"

data_directory = path.join(data_directory, structure)
if not path.isdir(data_directory):
    os.mkdir(data_directory)

if args.distribution == "gaussian" or args.distribution == "student":
    r_subdir = 'r' + str(args.correlation).replace('.', '')
    data_directory = path.join(data_directory, r_subdir)
    if not path.isdir(data_directory):
        os.mkdir(data_directory)

# If not the good length remove all
ldir = os.listdir(data_directory)
if ldir:
    with open(path.join(data_directory, ldir[0]), 'r') as f:
        if len(f.read().split('\n')) != (sample_size + 2):
            for l in ldir:
                os.remove(path.join(data_directory, l))
            
n_existing_sample = len(os.listdir(data_directory))

Tstruct = load.load_struct(path.join(struct_directory, Tstruct_file))
ndag=otagr.NamedDAG(Tstruct)

for i in range(n_existing_sample, n_sample):
    sample = dg.generate_data(ndag, sample_size, args.distribution, correlation)
    sample.exportToCSVFile(path.join(data_directory, data_file_name) + \
                           '_' + str(i+1).zfill(2) + ".csv", ',')
Beispiel #9
0
import pyAgrum as gum
import matplotlib.pyplot as plt

ds_size = 10000
distribution = 'student'
restarts = 20

S = list(range(1000, 10100, 100))

names = ['X', 'Y']

dag = gum.DAG()
dag.addNodes(2)
# dag.addArc(0,1)

ndag = otagrum.NamedDAG(dag, names)

D = [dg.generate_data(ndag, ds_size, distribution, r=0.8) for _ in range(restarts)]

I = []
for size in S:
    print("Size: ", size)
    info = 0
    for i,data in enumerate(D):
        print("Restart: ", i+1)
        cmi = otagrum.CorrectedMutualInformation(data[:size])
        cmi.setKMode(otagrum.CorrectedMutualInformation.KModeTypes_NoCorr)
        info += cmi.compute2PtCorrectedInformation(0, 1)
    I.append(info/restarts)

plt.plot(S, I)
Beispiel #10
0
#!/usr/bin/env python

import openturns as ot
import otagrum
import pyAgrum as gum
import sys

proto = "A->B->C->D;E->A->C<-E"
bn = gum.BayesNet.fastPrototype(proto)
print("      proto : ", proto)
print("         BN : ", bn)

ndag = otagrum.NamedDAG(bn)
print("       size : ", ndag.getSize())
print("       desc : ", ndag.getDescription())
print("      nodes : ", ndag.getTopologicalOrder())
for nod in ndag.getTopologicalOrder():
    print(" parents(", nod, ") : ", ndag.getParents(nod))
    print("children(", nod, ") : ", ndag.getChildren(nod))

if False:
    marginals = [ot.Uniform(0.0, 1.0) for i in range(order.getSize())]
    copulas = list()
    for i in range(order.getSize()):
        d = 1 + ndag.getParents(i).getSize()
        print("i=", i, ", d=", d)
        if d == 1:
            copulas.append(ot.IndependentCopula(1))
        else:
            R = ot.CorrelationMatrix(d)
            for i in range(d):
 def load_struct(self):
     dag, names = gu.read_graph(self.structure_dir + self.data_structure +
                                '.dot')
     return otagr.NamedDAG(dag, names)
import graph_utils as gu
import elidan.hill_climbing as hc

data = ot.Sample.ImportFromTextFile("../data/samples/dirichlet/alarm/sample01.csv", ',')

# learner = cmiic.ContinuousMIIC(data,
                               # cmode=cmiic.CModeType.Bernstein,
                               # kmode=cmiic.KModeType.Naive)
learner = otagrum.TabuList(data, 4, 5, 5)
learner.setVerbosity(True)
dag = learner.learnDAG()
gu.write_graph(dag, "output_tabulist_gaussian.dot")

# learner = otagrum.TabuList(data, 4, 5, 5)
# learner.setCMode(otagrum.CorrectedMutualInformation.CModeTypes_Bernstein)
# learner.setVerbosity(True)
# dag = learner.learnDAG()
# gu.write_graph(dag, "output_tabulist_bernstein.dot")

t = hc.hill_climbing(data)
names = list(data.getDescription())
gu.write_graph(otagrum.NamedDAG(t[1], names), "output_hc.dot")
print("Final score hc: ", t[2])
# learner.use3off2()
# pdag = learner.learnMixedStructure()
# dag = learner.learnStructure()

# gnb.showDot(learner._ContinuousMIIC__skeleton.toDot())
# gnb.showDot(pdag.toDot())
# gnb.showDot(dag.toDot())
Beispiel #13
0
mapping = {}
mapping['A'] = dag.addNode()  # Add node A
mapping['B'] = dag.addNode()  # Add node B
mapping['C'] = dag.addNode()  # Add node C
mapping['D'] = dag.addNode()  # Add node D

# %%
dag.addArc(mapping['A'], mapping['C'])  # Arc A -> C
dag.addArc(mapping['B'], mapping['C'])  # Arc B -> C
dag.addArc(mapping['C'], mapping['D'])  # Arc C -> D

# %%
dag

# %%
structure = otagrum.NamedDAG(dag, list(mapping.keys()))

# %%
showDot(structure.toDot())

# %%
# Parameters of the CBN ... and a collection of marginals and local conditional copulas.

# %%
m_list = [ot.Uniform(0.0, 1.0)
          for i in range(structure.getSize())]  # Local marginals
lcc_list = []  # Local Conditional Copulas
for i in range(structure.getSize()):
    dim_lcc = structure.getParents(i).getSize() + 1
    R = ot.CorrelationMatrix(dim_lcc)
    for j in range(dim_lcc):