def mixedgraph_to_DAG(mixedGraph): dag = gum.DAG() for nodeId in mixedGraph.nodes(): dag.addNodeWithId(nodeId) for x, y in mixedGraph.arcs(): dag.addArc(x, y) return dag
def fastNamedDAG(dotlike): dag = gum.DAG() names = [] for string in dotlike.split(';'): if not string: continue lastId = 0 notfirst = False for substring in string.split('->'): forward = True for name in substring.split('<-'): if name not in names: idVar = dag.addNode() names.append(name) else: idVar = names.index(name) if notfirst: if forward: dag.addArc(lastId, idVar) forward = False else: dag.addArc(idVar, lastId) else: notfirst = True forward = False lastId = idVar return otagr.NamedDAG(dag, names)
def partialDAGFromBN(bn: "pyAgrum.BayesNet", Nexcl: NodeSet = None) -> "pyAgrum.DAG": """ Creates and returns a duplicate DAG of the given Bayesian network Parameters ---------- bn : pyAgrum.BayesNet the source Nexcl : NodeSet the nodes Returns ------- pyAgrum.DAG """ if Nexcl is None: Nexcl = set() d = pyAgrum.DAG() nodes = set(bn.nodes()) - (Nexcl) for n in nodes: d.addNodeWithId(n) for x, y in bn.arcs(): if x in nodes and y in nodes: d.addArc(x, y) return d
def test3off2(self): learner = gum.BNLearner(self.agrumSrcDir('asia.csv')) learner.use3off2() learner.useNMLCorrection() learner.addForbiddenArc(4, 1) learner.addMandatoryArc(7, 5) d = gum.DAG() for i in range(8): d.addNodeWithId(i) learner.setInitialDAG(d) self.assertNotEqual(len(learner.names()), 0) try: bn = learner.learnBN() except: self.fail("Exception has been raised and should not") self.assertEqual(len(bn.arcs()), 9) self.assertFalse(bn.dag().existsArc(4, 1)) self.assertTrue(bn.dag().existsArc(7, 5)) try: mg = learner.learnMixedStructure() except: self.fail("Exception has been raised and should not") self.assertEqual(mg.sizeArcs(), 8) self.assertEqual(mg.sizeEdges(), 1) self.assertFalse(bn.dag().existsArc(4, 1)) self.assertTrue(bn.dag().existsArc(7, 5)) self.assertEqual(len(learner.latentVariables()), 2)
def learn_parameters(bn_struct, ficname): # création du dag correspondant au bn_struct graphe = gum.DAG() nodes = [graphe.addNode() for i in range(bn_struct.shape[0])] for i in range(bn_struct.shape[0]): for parent in bn_struct[i]: graphe.addArc(nodes[parent], nodes[i]) # appel au BNLearner pour apprendre les paramètres learner = gum.BNLearner(ficname) learner.useScoreLog2Likelihood() learner.useAprioriSmoothing() return learner.learnParameters(graphe)
def test_EM(self): learner = gum.BNLearner(self.agrumSrcDir('EM.csv'), ["#"]) self.assertFalse(learner.hasMissingValues()) learner = gum.BNLearner(self.agrumSrcDir('EM.csv'), ["?"]) self.assertTrue(learner.hasMissingValues()) dag = gum.DAG() for i in range(len(learner.names())): dag.addNodeWithId(i) dag.addArc(1, 0) dag.addArc(2, 1) dag.addArc(3, 2) with self.assertRaises(gum.MissingValueInDatabase): learner.learnParameters(dag) learner.useEM(1e-3) learner.useSmoothingPrior() learner.learnParameters(dag, False)
def one_hill_climbing(D, gaussian_copula, G, max_parents): best_graph = G best_score = sc.bic_score(D, gaussian_copula, G) tabu_list = [] tabu_list.append(best_graph) converged = False while not converged: converged = True for n in du.find_neighbor(best_graph, max_parents): if n not in tabu_list: score = sc.bic_score(D, gaussian_copula, n) # print("graph: ", n) # print("score: ", score) if score > best_score: best_score = score best_graph = gum.DAG(n) converged = False tabu_list.append(best_graph) return best_graph, best_score
def find_neighbor(G, max_parents=4): for i in G.nodes(): for j in G.nodes(): if i != j: newdag = gum.DAG(G) # If arcs (i,j) exists we delete it or we reverse it if G.existsArc(i, j): newdag.eraseArc(i, j) yield newdag if len(G.parents(i)) < max_parents: try: newdag.addArc(j, i) except gum.InvalidDirectedCycle: continue yield newdag # Else if it doesn't exist and this node doesn't have # more parents thant max_parents, we add it elif len(G.parents(j)) < max_parents: try: newdag.addArc(i, j) except gum.InvalidDirectedCycle: continue yield newdag
def create_empty_dag(N): dag = gum.DAG() for i in range(N): dag.addNode() return dag
def read_graph(file_name): print("Loading file {}".format(file_name)) dot_graph = dot.graph_from_dot_file(file_name) isUndiGraph = False # Cleaning nodes for node in dot_graph.get_nodes(): name = node.get_name() if name in ['node', 'edge']: if name == 'edge': if node.get_attributes() and node.get_attributes( )['dir'] == 'none': isUndiGraph = True dot_graph.del_node(node) # Getting node names node_name_map = {} for i, node in enumerate(dot_graph.get_nodes()): node_name_map[node.get_name()] = i nodeId = max(node_name_map.values()) + 1 for edge in dot_graph.get_edges(): source = edge.get_source() destination = edge.get_destination() if source not in node_name_map.keys(): node_name_map[source] = nodeId nodeId += 1 if destination not in node_name_map.keys(): node_name_map[destination] = nodeId nodeId += 1 edges = [] arcs = [] for edge in dot_graph.get_edges(): if (isUndiGraph or (edge.get_attributes() and edge.get_attributes()['dir'] == 'none')): edges.append( gum.Edge(node_name_map[edge.get_source()], node_name_map[edge.get_destination()])) else: arcs.append( gum.Arc(node_name_map[edge.get_source()], node_name_map[edge.get_destination()])) if not edges: # DAG graph = gum.DAG() for node_name in node_name_map: graph.addNodeWithId(node_name_map[node_name]) for arc in arcs: graph.addArc(arc.tail(), arc.head()) elif not arcs: # UndiGraph graph = gum.UndiGraph() for node_name in node_name_map: graph.addNodeWithId(node_name_map[node_name]) for edge in edges: graph.addEdge(edge.first(), edge.second()) else: # MixedGraph graph = gum.MixedGraph() for node_name in node_name_map: graph.addNodeWithId(node_name_map[node_name]) for edge in edges: graph.addEdge(edge.first(), edge.second()) for arc in arcs: graph.addArc(arc.tail(), arc.head()) # Since python3.7, dict are insertion ordered so # just returning values should be fine but we never know ! return graph, list(node_name_map.keys())
import data_generation as dg import otagrum import pyAgrum as gum import matplotlib.pyplot as plt ds_size = 10000 distribution = 'student' restarts = 20 S = list(range(1000, 10100, 100)) names = ['X', 'Y'] dag = gum.DAG() dag.addNodes(2) # dag.addArc(0,1) ndag = otagrum.NamedDAG(dag, names) D = [dg.generate_data(ndag, ds_size, distribution, r=0.8) for _ in range(restarts)] I = [] for size in S: print("Size: ", size) info = 0 for i,data in enumerate(D): print("Restart: ", i+1) cmi = otagrum.CorrectedMutualInformation(data[:size]) cmi.setKMode(otagrum.CorrectedMutualInformation.KModeTypes_NoCorr) info += cmi.compute2PtCorrectedInformation(0, 1) I.append(info/restarts)
def testAddNodes(self): self._testAddNodes(gum.DiGraph()) self._testAddNodes(gum.UndiGraph()) self._testAddNodes(gum.MixedGraph()) self._testAddNodes(gum.DAG())