def learning(sample, method, parameters): if method == "cpc": binNumber, alpha = parameters learner = otagr.ContinuousPC(sample, binNumber, alpha) ndag = learner.learnDAG() TTest = otagr.ContinuousTTest(sample, alpha) jointDistributions = [] for i in range(ndag.getSize()): d = 1 + ndag.getParents(i).getSize() if d == 1: bernsteinCopula = ot.Uniform(0.0, 1.0) else: K = TTest.GetK(len(sample), d) indices = [int(n) for n in ndag.getParents(i)] indices = [i] + indices bernsteinCopula = ot.EmpiricalBernsteinCopula( sample.getMarginal(indices), K, False) jointDistributions.append(bernsteinCopula) bn = named_dag_to_bn(ndag) elif method == "elidan": #print(sample.getDescription()) max_parents, n_restart_hc = parameters copula, dag = hc.hill_climbing(sample, max_parents, n_restart_hc)[0:2] #bn = dag_to_bn(dag, Tstruct.names()) bn = dag_to_bn(dag, sample.getDescription()) else: print("Wrong entry for method argument !") return bn
def learning(self, sample): if self.method == "cpc": learner = otagr.ContinuousPC(sample, self.parameters['binNumber'], self.parameters['alpha']) start = time.time() ndag = learner.learnDAG() end = time.time() # TTest = otagr.ContinuousTTest(sample, self.parameters['alpha']) # jointDistributions = [] # for i in range(ndag.getSize()): # d = 1+ndag.getParents(i).getSize() # if d == 1: # bernsteinCopula = ot.Uniform(0.0, 1.0) # else: # K = TTest.GetK(len(sample), d) # indices = [int(n) for n in ndag.getParents(i)] # indices = [i] + indices # bernsteinCopula = ot.EmpiricalBernsteinCopula(sample.getMarginal(indices), K, False) # jointDistributions.append(bernsteinCopula) elif self.method == "cbic": #print(sample.getDescription()) max_parents = self.parameters['max_parents'] n_restart_hc = self.parameters['hc_restart'] cmode = self.parameters['cmode'] learner = otagr.TabuList(sample, max_parents, n_restart_hc, 5) learner.setCMode(cmode) start = time.time() ndag = learner.learnDAG() end = time.time() #bn = dag_to_bn(dag, Tstruct.names()) elif self.method == "cmiic": cmode = self.parameters['cmode'] kmode = self.parameters['kmode'] learner = otagr.ContinuousMIIC(sample) learner.setCMode(cmode) learner.setKMode(kmode) learner.setAlpha(self.kalpha) # learner.setBeta(self.kbeta) start = time.time() ndag = learner.learnDAG() end = time.time() # bn = gu.named_dag_to_bn(ndag) elif self.method == "dmiic": # learner.setBeta(self.kbeta) ndag, start, end = dsc.learnDAG(sample) # bn = gu.named_dag_to_bn(ndag) elif self.method == "lgbn": start = time.time() end = time.time() else: print("Wrong entry for method argument !") return ndag, end - start
def CBN_PC(data, result_structure_path): print("CBN with PC") skeleton_path = result_structure_path.joinpath("skeleton") skeleton_path.mkdir(parents=True, exist_ok=True) pdag_path = result_structure_path.joinpath("pdag") pdag_path.mkdir(parents=True, exist_ok=True) dag_path = result_structure_path.joinpath("dag") dag_path.mkdir(parents=True, exist_ok=True) skeleton_file_name = "skeleton_" + str(size).zfill(7) + ".dot" skeleton_done = skeleton_path.joinpath(skeleton_file_name).exists() pdag_file_name = "pdag_" + str(size).zfill(7) + ".dot" pdag_done = pdag_path.joinpath(pdag_file_name).exists() dag_file_name = "dag_" + str(size).zfill(7) + ".dot" dag_done = dag_path.joinpath(dag_file_name).exists() alpha = 0.01 conditioningSet = 4 learner = otagr.ContinuousPC(data, conditioningSet, alpha) learner.setVerbosity(True) if not skeleton_done: skel = learner.learnSkeleton() gu.write_graph( skel, skeleton_path.joinpath("skeleton_" + str(size).zfill(7) + ".dot")) if not pdag_done: pdag = learner.learnPDAG() gu.write_graph( pdag, pdag_path.joinpath("pdag_" + str(size).zfill(7) + ".dot")) if not dag_done: dag = learner.learnDAG() gu.write_graph(dag, dag_path.joinpath("dag_" + str(size).zfill(7) + ".dot")) else: dag, names = gu.read_graph( dag_path.joinpath("dag_" + str(size).zfill(7) + ".dot")) dag = otagr.NamedDAG(dag, names) print("Learning parameters") factories = [ ot.KernelSmoothing(ot.Epanechnikov()), ot.BernsteinCopulaFactory() ] ot.Log.SetFile("log") ot.Log.Show(ot.Log.INFO) model = otagr.ContinuousBayesianNetworkFactory(factories, dag, alpha, conditioningSet, False).build(data) ot.Log.Show(ot.Log.INFO) return model
def testAsiaDirichlet(): data = ot.Sample.ImportFromTextFile( os.path.join(os.path.dirname(__file__), "asia_dirichlet_5000.csv"), ",") alpha = 0.1 binNumber = 3 learner = otagrum.ContinuousPC(data, binNumber, alpha) learner.setVerbosity(True) pdag = learner.learnPDAG() # print(pdag) print(learner.PDAGtoDot(pdag)) dag = learner.learnDAG() print(dag.toDot())
def testSpecificInstance(): size = 1000 data = generateDataForSpecificInstance(size) alpha = 0.1 binNumber = 3 learner = otagrum.ContinuousPC(data, binNumber, alpha) # skel = learner.learnSkeleton() # print(skel.toDot()) dag = learner.learnDAG() print(dag.toDot()) sys.stdout.flush()
def CPC_learning(data, maxCondSet=5, alpha=0.1): # Try an estimation of the coefficients distribution using # univariate kernel smoothing for the marginals and PC to learn the structure # of dependence parameterized by Bernstein copula dimension = data.getDimension() print("Build CPC coefficients distribution") t0 = time() print(" Learning structure") t1 = time() learner = otagrum.ContinuousPC(data, maxCondSet, alpha) dag = learner.learnDAG() with open("dags/new_dag_CPC_{}.dot".format(alpha), "w") as f: f.write(dag.toDot()) print(" t=", time() - t1, "s") cbn = CBN_parameter_learning(data, dag) print("t=", time() - t0, "s") # distribution = ot.ComposedDistribution(marginals, cbn) return cbn
def learn(data, alpha=0.1, verbose=False): with timer("Initiating"): learn = otagr.ContinuousPC(data, alpha=alpha) learn.setVerbosity(verbose) learn.setOptimalPolicy(True) with timer("Learning skeleton"): sk = learn.inferSkeleton() print("Nodes : {} , Edges : {}".format(sk.size(), sk.sizeEdges())) if sk.size() < 40: gnb.showDot(learn.skeletonToDot(sk), size="20", format="png") else: print(sk.edges()) with timer("Learning VStructures"): mg = learn.inferPDAG(sk) if mg.size() < 40: gnb.showDot(mg.toDot(), size="20", format="png") else: print(sk.edges()) with timer("Learning complete DAG"): dag = learn.learnDAG() return dag
n_nodes = [] times_cpc = [] times_elidan = [] for i in range(start_size, end_size + 1, step): print("Number of node :", i, flush=True) n_nodes.append(i) n_arc = int(density * (i - 1)) bn = generator.generate(i, n_arc) TNdag = otagr.NamedDAG(bn.dag(), bn.names()) data = ut.generate_gaussian_data(TNdag, sample_size, float(args.correlation)) learner = otagr.ContinuousPC(data, mcss, alpha) start = time.time() LNdagCPC = learner.learnDAG() end = time.time() times_cpc.append(end - start) start = time.time() LNdagElidan = hc.hill_climbing(data, max_parents, n_restart_hc)[1] end = time.time() times_elidan.append(end - start) #LNdagCPC = [[ut.named_dag_to_bn(LNdagCPC)]] #LNdagElidan = [[ut.dag_to_bn(LNdagElidan, data.getDescription())]] #cpc_scores = ut.structural_scores(ut.named_dag_to_bn(TNdag), LNdagCPC) #elidan_scores = ut.structural_scores(ut.named_dag_to_bn(TNdag), LNdagElidan)
f = figure_path.joinpath("pairs_ref_" + dataset_name + ".pdf") pairs(data_draw, f) #####################LEARNING CBN MODEL############################ # LEARNING STRUCTURE # # CBIC Algorithm learner = otagr.TabuList(data_ref, 2, 10, 2) # Using CPC algorithm cbic_dag = learner.learnDAG() # Learning DAG write_graph(cbic_dag, structure_path.joinpath("cbic_dag_" + dataset_name + ".dot")) # CPC Algorithm learner = otagr.ContinuousPC(data_ref, 4, 0.05) # Using CPC algorithm cpc_dag = learner.learnDAG() # Learning DAG write_graph(cpc_dag, structure_path.joinpath("cpc_dag_" + dataset_name + ".dot")) # CMIIC ALGORITHM alphas = np.arange(10, 501, 5)/1000 # alphas = [0.04, 0.05] fig, ax = plt.subplots() x_major_ticks = np.arange(0, 0.5, 0.05) x_minor_ticks = np.arange(0, 0.5, 0.01) y_major_ticks = np.arange(0, 25, 5) y_minor_ticks = np.arange(0, 25, 1)
from __future__ import print_function from time import time import openturns as ot import pyAgrum as gum import otagrum def generateDataForSpecificInstance(size): R = ot.CorrelationMatrix(3) R[0, 1] = 0.5 R[0, 2] = 0.45 collection = [ot.FrankCopula(3.0), ot.NormalCopula(R), ot.ClaytonCopula(2.0)] copula = ot.ComposedCopula(collection) return copula.getSample(size) size = 1000 data = generateDataForSpecificInstance(size) alpha = 0.9 binNumber = 3 t0 = time() learner = otagrum.ContinuousPC(data, binNumber, alpha) t1 = time() - t0 #skel = learner.getSkeleton()
import openturns as ot import otagrum as otagr X = ot.Sample.ImportFromCSVFile("advised.csv") binNumber = 2 alpha = 0.9 learner = otagr.ContinuousPC(X, binNumber, alpha) res = learner.learnDAG() print(res.toDot())
import pyAgrum as gum import openturns as ot import otagrum as otagr print('Importing data') data = ot.Sample.ImportFromTextFile( '../data/Standard_coefficients_0100000.csv', ';') data = data[0:20000] data = data.getMarginal(range(0, 12)) print('Initializing the learners') learners = { 'cbic': otagr.TabuList(data, 3, 1, 2), 'cpc': otagr.ContinuousPC(data, 4, 0.01), 'cmiic': otagr.ContinuousMIIC(data) } dags = {} for (name, learner) in learners.items(): print('Learning with ', name) dags[name] = learner.learnDAG() for (name, dag) in dags.items(): dot = dag.toDot() with open("dag_{}.dot".format(name), "w") as f: f.write(dot)
Tstruct_file_path = "data/cbn2/struct.txt" data_set_name = data_set_path.split('/')[-1].split('.')[0] Tstruct_file_name = Tstruct_file_path.split('/')[-1].split('.')[0] with open(Tstruct_file_path, 'r') as file: arcs = file.read().replace('\n', '') Tstruct = gum.fastBN(arcs) data = np.loadtxt(data_set_path, delimiter=',', skiprows=1) sizes = np.linspace(1000, len(data), 10, dtype=int) alpha = 0.1 binNumber = 3 list_g = [] for size in sizes: print(size) sample = data[np.random.randint(0, len(data), size=size)] sample = ot.Sample(sample) learner = otagr.ContinuousPC(sample, binNumber, alpha) list_g.append(learner.learnDAG()) #bn = gum.BayesNet() #for name in Tstruct.names(): # bn.add(gum.LabelizedVariable(name)) #for arc in g.arcs(): # bn.addArc(arc[0], arc[1]) #comparison = GraphicalBNComparator(Tstruct, bn) #print(comparison.scores())
# %% # Having a CBN, we can now sample from it. # %% ot.RandomGenerator.SetSeed(10) # Set random seed sample = cbn.getSample(1000) train = sample[:-100] test = sample[-100:] # %% # Learning the structure with continuous PC: # Now that we have data, we can use it to learn the structure with the continuous PC algorithm. # %% learner = otagrum.ContinuousPC(sample, maxConditioningSetSize=5, alpha=0.1) # %% # We first learn the skeleton, that is the undirected structure. # %% skeleton = learner.learnSkeleton() # %% skeleton # %% # Then we look for the v-structures, leading to a Partially Directed Acyclic Graph (PDAG) # %% pdag = learner.learnPDAG()