def loadLearnedStructures(self): sizes = np.linspace(self.begin_size, self.end_size, self.n_points, dtype=int) parameters = '_'.join( [str(v).replace('.', '') for v in self.parameters.values()]) path = os.path.join(self.result_dir, 'structures', self.method, parameters + '_' + self.result_domain_str) list_structures = [] for i in range(self.n_restart): list_by_size = [] for size in sizes: name = 'sample' + str(i + 1).zfill(2) + '_size' + str(size) dag, var_names = gu.read_graph( os.path.join(path, name + '.dot')) ndag = otagr.NamedDAG(dag, var_names) list_by_size.append(ndag) # with open(os.path.join(path, name + '.dot'), 'r') as file: # arcs = file.read().replace('\n', '') # ndag = gu.fastNamedDAG(arcs) # list_by_size.append(ndag) list_structures.append(list_by_size) return np.reshape(list_structures, (self.n_restart, self.n_points)).transpose()
def load_struct(self): # with open(self.structure_dir + self.data_structure, 'r') as file: # arcs = file.read().replace('\n', '') # return gu.fastNamedDAG(arcs) dag, names = gu.read_graph(self.structure_dir + self.data_structure + '.dot') return otagr.NamedDAG(dag, names)
def fastNamedDAG(dotlike): dag = gum.DAG() names = [] for string in dotlike.split(';'): if not string: continue lastId = 0 notfirst = False for substring in string.split('->'): forward = True for name in substring.split('<-'): if name not in names: idVar = dag.addNode() names.append(name) else: idVar = names.index(name) if notfirst: if forward: dag.addArc(lastId, idVar) forward = False else: dag.addArc(idVar, lastId) else: notfirst = True forward = False lastId = idVar return otagr.NamedDAG(dag, names)
def CBN_PC(data, result_structure_path): print("CBN with PC") skeleton_path = result_structure_path.joinpath("skeleton") skeleton_path.mkdir(parents=True, exist_ok=True) pdag_path = result_structure_path.joinpath("pdag") pdag_path.mkdir(parents=True, exist_ok=True) dag_path = result_structure_path.joinpath("dag") dag_path.mkdir(parents=True, exist_ok=True) skeleton_file_name = "skeleton_" + str(size).zfill(7) + ".dot" skeleton_done = skeleton_path.joinpath(skeleton_file_name).exists() pdag_file_name = "pdag_" + str(size).zfill(7) + ".dot" pdag_done = pdag_path.joinpath(pdag_file_name).exists() dag_file_name = "dag_" + str(size).zfill(7) + ".dot" dag_done = dag_path.joinpath(dag_file_name).exists() alpha = 0.01 conditioningSet = 4 learner = otagr.ContinuousPC(data, conditioningSet, alpha) learner.setVerbosity(True) if not skeleton_done: skel = learner.learnSkeleton() gu.write_graph( skel, skeleton_path.joinpath("skeleton_" + str(size).zfill(7) + ".dot")) if not pdag_done: pdag = learner.learnPDAG() gu.write_graph( pdag, pdag_path.joinpath("pdag_" + str(size).zfill(7) + ".dot")) if not dag_done: dag = learner.learnDAG() gu.write_graph(dag, dag_path.joinpath("dag_" + str(size).zfill(7) + ".dot")) else: dag, names = gu.read_graph( dag_path.joinpath("dag_" + str(size).zfill(7) + ".dot")) dag = otagr.NamedDAG(dag, names) print("Learning parameters") factories = [ ot.KernelSmoothing(ot.Epanechnikov()), ot.BernsteinCopulaFactory() ] ot.Log.SetFile("log") ot.Log.Show(ot.Log.INFO) model = otagr.ContinuousBayesianNetworkFactory(factories, dag, alpha, conditioningSet, False).build(data) ot.Log.Show(ot.Log.INFO) return model
def generate_data(self): Path(self.data_dir).mkdir(parents=True, exist_ok=True) # If not the good length remove all ldir = os.listdir(self.data_dir) for l in ldir: with open(os.path.join(self.data_dir, l), 'r') as f: if len(f.read().split('\n')) < (self.data_size + 2): os.remove(os.path.join(self.data_dir, l)) n_existing_sample = len(os.listdir(self.data_dir)) Tstruct = self.load_struct() ndag = otagr.NamedDAG(Tstruct) for i in range(n_existing_sample, self.data_number): sample = dg.generate_data(ndag, self.data_size, self.data_distribution, **self.data_parameters) data_file_name = "sample" + str(i + 1).zfill(2) sample.exportToCSVFile( os.path.join(self.data_dir, data_file_name) + ".csv", ',')
def learnDAG(sample, dis_method='quantile', nbins=5, threshold=25): # data = pd.read_csv(file_name, nrows=size) names = list(sample.getDescription()) csvfile = tf.NamedTemporaryFile(delete=False) csvfilename = csvfile.name + '.csv' csvfile.close() sample.exportToCSVFile(csvfilename, ',') start = time.time() discretizer = skbn.BNDiscretizer(defaultDiscretizationMethod=dis_method, defaultNumberOfBins=nbins, discretizationThreshold=threshold) variables = [ discretizer.createVariable(name, sample.getMarginal([name])) for name in names ] bn = gum.BayesNet() for variable in variables: bn.add(variable) learner = gum.BNLearner(csvfilename, bn) learner.useMIIC() learner.useNMLCorrection() dag = learner.learnDAG() ndag = otagr.NamedDAG(dag, names) end = time.time() os.remove(csvfilename) return ndag, start, end
if not path.isdir(res_directory): os.mkdir(res_directory) generator = gum.BNGenerator() gum.initRandom(10) n_nodes = [] times_cpc = [] times_elidan = [] for i in range(start_size, end_size + 1, step): print("Number of node :", i, flush=True) n_nodes.append(i) n_arc = int(density * (i - 1)) bn = generator.generate(i, n_arc) TNdag = otagr.NamedDAG(bn.dag(), bn.names()) data = ut.generate_gaussian_data(TNdag, sample_size, float(args.correlation)) learner = otagr.ContinuousPC(data, mcss, alpha) start = time.time() LNdagCPC = learner.learnDAG() end = time.time() times_cpc.append(end - start) start = time.time() LNdagElidan = hc.hill_climbing(data, max_parents, n_restart_hc)[1] end = time.time() times_elidan.append(end - start)
Tstruct_file = structure + ".txt" struct_directory = "../../data/structures/" data_directory = path.join(data_directory, structure) if not path.isdir(data_directory): os.mkdir(data_directory) if args.distribution == "gaussian" or args.distribution == "student": r_subdir = 'r' + str(args.correlation).replace('.', '') data_directory = path.join(data_directory, r_subdir) if not path.isdir(data_directory): os.mkdir(data_directory) # If not the good length remove all ldir = os.listdir(data_directory) if ldir: with open(path.join(data_directory, ldir[0]), 'r') as f: if len(f.read().split('\n')) != (sample_size + 2): for l in ldir: os.remove(path.join(data_directory, l)) n_existing_sample = len(os.listdir(data_directory)) Tstruct = load.load_struct(path.join(struct_directory, Tstruct_file)) ndag=otagr.NamedDAG(Tstruct) for i in range(n_existing_sample, n_sample): sample = dg.generate_data(ndag, sample_size, args.distribution, correlation) sample.exportToCSVFile(path.join(data_directory, data_file_name) + \ '_' + str(i+1).zfill(2) + ".csv", ',')
import pyAgrum as gum import matplotlib.pyplot as plt ds_size = 10000 distribution = 'student' restarts = 20 S = list(range(1000, 10100, 100)) names = ['X', 'Y'] dag = gum.DAG() dag.addNodes(2) # dag.addArc(0,1) ndag = otagrum.NamedDAG(dag, names) D = [dg.generate_data(ndag, ds_size, distribution, r=0.8) for _ in range(restarts)] I = [] for size in S: print("Size: ", size) info = 0 for i,data in enumerate(D): print("Restart: ", i+1) cmi = otagrum.CorrectedMutualInformation(data[:size]) cmi.setKMode(otagrum.CorrectedMutualInformation.KModeTypes_NoCorr) info += cmi.compute2PtCorrectedInformation(0, 1) I.append(info/restarts) plt.plot(S, I)
#!/usr/bin/env python import openturns as ot import otagrum import pyAgrum as gum import sys proto = "A->B->C->D;E->A->C<-E" bn = gum.BayesNet.fastPrototype(proto) print(" proto : ", proto) print(" BN : ", bn) ndag = otagrum.NamedDAG(bn) print(" size : ", ndag.getSize()) print(" desc : ", ndag.getDescription()) print(" nodes : ", ndag.getTopologicalOrder()) for nod in ndag.getTopologicalOrder(): print(" parents(", nod, ") : ", ndag.getParents(nod)) print("children(", nod, ") : ", ndag.getChildren(nod)) if False: marginals = [ot.Uniform(0.0, 1.0) for i in range(order.getSize())] copulas = list() for i in range(order.getSize()): d = 1 + ndag.getParents(i).getSize() print("i=", i, ", d=", d) if d == 1: copulas.append(ot.IndependentCopula(1)) else: R = ot.CorrelationMatrix(d) for i in range(d):
def load_struct(self): dag, names = gu.read_graph(self.structure_dir + self.data_structure + '.dot') return otagr.NamedDAG(dag, names)
import graph_utils as gu import elidan.hill_climbing as hc data = ot.Sample.ImportFromTextFile("../data/samples/dirichlet/alarm/sample01.csv", ',') # learner = cmiic.ContinuousMIIC(data, # cmode=cmiic.CModeType.Bernstein, # kmode=cmiic.KModeType.Naive) learner = otagrum.TabuList(data, 4, 5, 5) learner.setVerbosity(True) dag = learner.learnDAG() gu.write_graph(dag, "output_tabulist_gaussian.dot") # learner = otagrum.TabuList(data, 4, 5, 5) # learner.setCMode(otagrum.CorrectedMutualInformation.CModeTypes_Bernstein) # learner.setVerbosity(True) # dag = learner.learnDAG() # gu.write_graph(dag, "output_tabulist_bernstein.dot") t = hc.hill_climbing(data) names = list(data.getDescription()) gu.write_graph(otagrum.NamedDAG(t[1], names), "output_hc.dot") print("Final score hc: ", t[2]) # learner.use3off2() # pdag = learner.learnMixedStructure() # dag = learner.learnStructure() # gnb.showDot(learner._ContinuousMIIC__skeleton.toDot()) # gnb.showDot(pdag.toDot()) # gnb.showDot(dag.toDot())
mapping = {} mapping['A'] = dag.addNode() # Add node A mapping['B'] = dag.addNode() # Add node B mapping['C'] = dag.addNode() # Add node C mapping['D'] = dag.addNode() # Add node D # %% dag.addArc(mapping['A'], mapping['C']) # Arc A -> C dag.addArc(mapping['B'], mapping['C']) # Arc B -> C dag.addArc(mapping['C'], mapping['D']) # Arc C -> D # %% dag # %% structure = otagrum.NamedDAG(dag, list(mapping.keys())) # %% showDot(structure.toDot()) # %% # Parameters of the CBN ... and a collection of marginals and local conditional copulas. # %% m_list = [ot.Uniform(0.0, 1.0) for i in range(structure.getSize())] # Local marginals lcc_list = [] # Local Conditional Copulas for i in range(structure.getSize()): dim_lcc = structure.getParents(i).getSize() + 1 R = ot.CorrelationMatrix(dim_lcc) for j in range(dim_lcc):