def moralize(self): """ Removes all the immoralities in the Network and creates a moral graph (UndirectedGraph). A v-structure X->Z<-Y is an immorality if there is no directed edge between X and Y. Examples -------- >>> from pgmpy.models import DynamicBayesianNetwork as DBN >>> dbn = DBN([(('D',0), ('G',0)), (('I',0), ('G',0))]) >>> moral_graph = dbn.moralize() >>> moral_graph.edges() [(('G', 0), ('I', 0)), (('G', 0), ('D', 0)), (('D', 1), ('I', 1)), (('D', 1), ('G', 1)), (('I', 0), ('D', 0)), (('G', 1), ('I', 1))] """ moral_graph = UndirectedGraph(self.to_undirected().edges()) for node in super().nodes(): moral_graph.add_edges_from(itertools.combinations( self.get_parents(node), 2)) return moral_graph
def test_is_clique(self): G = UndirectedGraph([('A', 'B'), ('C', 'B'), ('B', 'D'), ('B', 'E'), ('D', 'E'), ('E', 'F'), ('D', 'F'), ('B', 'F')]) self.assertFalse(G.is_clique(nodes=['A', 'B', 'C', 'D'])) self.assertTrue(G.is_clique(nodes=['B', 'D', 'E', 'F'])) self.assertTrue(G.is_clique(nodes=['D', 'E', 'B']))
def test_is_triangulated(self): #triangulated graph graph = UndirectedGraph([(0, 1), (1, 2), (0, 2), (2, 3), (3, 4), (4, 5), (3, 5), (3, 7), (6, 7), (6, 9), (9, 8), (7, 8), (6, 8)]) self.assertTrue(graph.is_triangulated()) #graph_not_triangulated graph = UndirectedGraph([(0, 1), (1, 2), (0, 2), (2, 3), (3, 4), (4, 5), (3, 5), (3, 7), (6, 7), (6, 9), (9, 8), (7, 8), (6, 8), (1, 6)]) self.assertFalse(graph.is_triangulated())
def test_triangulation_all_heuristics(self): i = 2 while True: graph = UndirectedGraph([(0, 1), (0, 3), (0, 8), (1, 2), (1, 4), (1, 8), (2, 4), (2, 6), (2, 7), (3, 8), (3, 9), (4, 7), (4, 8), (5, 8), (5, 9), (5, 10), (6, 7), (7, 10), (8, 10)]) #graph.read_simple_format("test_graphs/graph") #print(i) ret = graph.jt_techniques(i, False, True) if not ret: break self.assertTrue(graph.is_triangulated()) i += 1
def test_is_clique(self): G = UndirectedGraph([ ("A", "B"), ("C", "B"), ("B", "D"), ("B", "E"), ("D", "E"), ("E", "F"), ("D", "F"), ("B", "F"), ]) self.assertFalse(G.is_clique(nodes=["A", "B", "C", "D"])) self.assertTrue(G.is_clique(nodes=["B", "D", "E", "F"])) self.assertTrue(G.is_clique(nodes=["D", "E", "B"]))
def make_jt(self, triangulation_technique): """ Makes the junction tree for the MarkovModel Parameter --------- triangulation_technique : int Index of the triangulation technique to be used See jt_techniques in Undirected Graph for documentation on the triangulation techniques and the technique_num for each technique Examples -------- >>> from pgmpy.models import MarkovModel >>> student = MarkovModel([('diff', 'intel'), ('diff', 'grade'), ... ('intel', 'grade')]) >>> student.add_states({'diff': ['easy', 'hard'], ... 'intel': ['dumb', 'smart'], ... 'grade': ['A','B','C']}) >>> factor = student.add_factors(['diff','intel'], range(4)) >>> factor2 = student.add_factors(['intel','grade'], range(6)) >>> factor3 = student.add_factors(['diff','grade'], range(6)) >>> jt = student.make_jt(2) >>> jt.print_graph("Printing the Junction Tree") Printing the graph Printing the Junction Tree<<< 1 ( {'factors': [diff intel phi(diff, intel) diff_0 intel_0 0.0 diff_0 intel_1 1.0 diff_1 intel_0 2.0 diff_1 intel_1 3.0 , intel grade phi(intel, grade) intel_0 grade_0 0.0 intel_0 grade_1 1.0 intel_0 grade_2 2.0 intel_1 grade_0 3.0 intel_1 grade_1 4.0 intel_1 grade_2 5.0 , diff grade phi(diff, grade) diff_0 grade_0 0.0 diff_0 grade_1 1.0 diff_0 grade_2 2.0 diff_1 grade_0 3.0 diff_1 grade_1 4.0 diff_1 grade_2 5.0 ], 'clique_nodes': ['diff', 'grade', 'intel']} ) : [] >>> """ jt = UndirectedGraph.make_jt(self, triangulation_technique) #jt.print_graph("print junction tree before adding factors ") #jt.print_graph("after making the junction tree") jt.insert_factors(self.get_factors()) return jt
def test_jt_tree_width(self): #small triangulated graph graph = UndirectedGraph([(0, 1), (1, 2), (2, 0), (3, 4), (4, 5), (5, 3), (0, 3)]) ret = graph.jt_tree_width(0) self.assertEqual(ret, 2) res = [4, 4, 7, 5] i = 2 while True: graph = UndirectedGraph([(0, 1), (0, 3), (0, 8), (1, 2), (1, 4), (1, 8), (2, 4), (2, 6), (2, 7), (3, 8), (3, 9), (4, 7), (4, 8), (5, 8), (5, 9), (5, 10), (6, 7), (7, 10), (8, 10)]) #graph.read_simple_format("test_graphs/graph") ret = graph.jt_tree_width(i) if not ret: break #print("heu num "+str(i)) #print(ret) self.assertEqual(ret, res[i - 2]) i += 1
def moralize(self): """ Removes all the immoralities in the DirectedGraph and creates a moral graph (UndirectedGraph). A v-structure X->Z<-Y is an immorality if there is no directed edge between X and Y. Examples -------- >>> from pgmpy.base import DirectedGraph >>> G = DirectedGraph(ebunch=[('diff', 'grade'), ('intel', 'grade')]) >>> moral_graph = G.moralize() >>> moral_graph.edges() [('intel', 'grade'), ('intel', 'diff'), ('grade', 'diff')] """ moral_graph = UndirectedGraph(self.to_undirected().edges()) for node in self.nodes(): moral_graph.add_edges_from( itertools.combinations(self.get_parents(node), 2)) return moral_graph
def test_skeleton_to_pdag(self): data = pd.DataFrame(np.random.randint(0, 3, size=(1000, 3)), columns=list('ABD')) data['C'] = data['A'] - data['B'] data['D'] += data['A'] c = ConstraintBasedEstimator(data) pdag = c.skeleton_to_pdag(*c.estimate_skeleton()) self.assertSetEqual( set(pdag.edges()), set([('B', 'C'), ('A', 'D'), ('A', 'C'), ('D', 'A')])) skel = UndirectedGraph([('A', 'B'), ('A', 'C')]) sep_sets1 = {frozenset({'B', 'C'}): ()} self.assertSetEqual(set(c.skeleton_to_pdag(skel, sep_sets1).edges()), set([('B', 'A'), ('C', 'A')])) sep_sets2 = {frozenset({'B', 'C'}): ('A', )} pdag2 = c.skeleton_to_pdag(skel, sep_sets2) self.assertSetEqual( set(c.skeleton_to_pdag(skel, sep_sets2).edges()), set([('A', 'B'), ('B', 'A'), ('A', 'C'), ('C', 'A')]))
def test_skeleton_to_pdag(self): data = pd.DataFrame(np.random.randint(0, 3, size=(1000, 3)), columns=list("ABD")) data["C"] = data["A"] - data["B"] data["D"] += data["A"] c = ConstraintBasedEstimator(data) pdag = c.skeleton_to_pdag(*c.estimate_skeleton()) self.assertSetEqual( set(pdag.edges()), set([("B", "C"), ("A", "D"), ("A", "C"), ("D", "A")])) skel = UndirectedGraph([("A", "B"), ("A", "C")]) sep_sets1 = {frozenset({"B", "C"}): ()} self.assertSetEqual( set(c.skeleton_to_pdag(skel, sep_sets1).edges()), set([("B", "A"), ("C", "A")]), ) sep_sets2 = {frozenset({"B", "C"}): ("A", )} pdag2 = c.skeleton_to_pdag(skel, sep_sets2) self.assertSetEqual( set(c.skeleton_to_pdag(skel, sep_sets2).edges()), set([("A", "B"), ("B", "A"), ("A", "C"), ("C", "A")]), )
def test_is_triangulated(self): G = UndirectedGraph([('A', 'B'), ('A', 'C'), ('B', 'D'), ('C', 'D')]) self.assertFalse(G.is_triangulated()) G.add_edge('A', 'D') self.assertTrue(G.is_triangulated())
def build_skeleton(nodes, independencies): """Estimates a graph skeleton (UndirectedGraph) from a set of independencies using (the first part of) the PC algorithm. The independencies can either be provided as an instance of the `Independencies`-class or by passing a decision function that decides any conditional independency assertion. Returns a tuple `(skeleton, separating_sets)`. If an Independencies-instance is passed, the contained IndependenceAssertions have to admit a faithful BN representation. This is the case if they are obtained as a set of d-seperations of some Bayesian network or if the independence assertions are closed under the semi-graphoid axioms. Otherwise the procedure may fail to identify the correct structure. Parameters ---------- nodes: list, array-like A list of node/variable names of the network skeleton. independencies: Independencies-instance or function. The source of independency information from which to build the skeleton. The provided Independencies should admit a faithful representation. Can either be provided as an Independencies()-instance or by passing a function `f(X, Y, Zs)` that returns `True` when X _|_ Y | Zs, otherwise `False`. (X, Y being individual nodes and Zs a list of nodes). Returns ------- skeleton: UndirectedGraph An estimate for the undirected graph skeleton of the BN underlying the data. separating_sets: dict A dict containing for each pair of not directly connected nodes a separating set ("witnessing set") of variables that makes then conditionally independent. (needed for edge orientation procedures) Reference --------- [1] Neapolitan, Learning Bayesian Networks, Section 10.1.2, Algorithm 10.2 (page 550) http://www.cs.technion.ac.il/~dang/books/Learning%20Bayesian%20Networks(Neapolitan,%20Richard).pdf [2] Koller & Friedman, Probabilistic Graphical Models - Principles and Techniques, 2009 Section 3.4.2.1 (page 85), Algorithm 3.3 Examples -------- >>> from pgmpy.estimators import ConstraintBasedEstimator >>> from pgmpy.models import BayesianModel >>> from pgmpy.independencies import Independencies >>> # build skeleton from list of independencies: ... ind = Independencies(['B', 'C'], ['A', ['B', 'C'], 'D']) >>> # we need to compute closure, otherwise this set of independencies doesn't ... # admit a faithful representation: ... ind = ind.closure() >>> skel, sep_sets = ConstraintBasedEstimator.build_skeleton("ABCD", ind) >>> print(skel.edges()) [('A', 'D'), ('B', 'D'), ('C', 'D')] >>> # build skeleton from d-seperations of BayesianModel: ... model = BayesianModel([('A', 'C'), ('B', 'C'), ('B', 'D'), ('C', 'E')]) >>> skel, sep_sets = ConstraintBasedEstimator.build_skeleton(model.nodes(), model.get_independencies()) >>> print(skel.edges()) [('A', 'C'), ('B', 'C'), ('B', 'D'), ('C', 'E')] """ nodes = list(nodes) if isinstance(independencies, Independencies): def is_independent(X, Y, Zs): return IndependenceAssertion(X, Y, Zs) in independencies elif callable(independencies): is_independent = independencies else: raise ValueError("'independencies' must be either Independencies-instance " + "or a ternary function that decides independencies.") graph = UndirectedGraph(combinations(nodes, 2)) lim_neighbors = 0 separating_sets = dict() while not all([len(graph.neighbors(node)) < lim_neighbors for node in nodes]): for node in nodes: for neighbor in graph.neighbors(node): # search if there is a set of neighbors (of size lim_neighbors) # that makes X and Y independent: for separating_set in combinations(set(graph.neighbors(node)) - set([neighbor]), lim_neighbors): if is_independent(node, neighbor, separating_set): separating_sets[frozenset((node, neighbor))] = separating_set graph.remove_edge(node, neighbor) break lim_neighbors += 1 return graph, separating_sets
def setUp(self): self.graph = UndirectedGraph()
class TestUndirectedGraphCreation(unittest.TestCase): def setUp(self): self.graph = UndirectedGraph() def test_class_init_without_data(self): self.assertIsInstance(self.graph, UndirectedGraph) def test_class_init_with_data_string(self): self.G = UndirectedGraph([('a', 'b'), ('b', 'c')]) self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c']) self.assertListEqual(hf.recursive_sorted(self.G.edges()), [['a', 'b'], ['b', 'c']]) def test_add_node_string(self): self.graph.add_node('a') self.assertListEqual(self.graph.nodes(), ['a']) def test_add_node_nonstring(self): self.graph.add_node(1) self.assertListEqual(self.graph.nodes(), [1]) def test_add_nodes_from_string(self): self.graph.add_nodes_from(['a', 'b', 'c', 'd']) self.assertListEqual(sorted(self.graph.nodes()), ['a', 'b', 'c', 'd']) def test_add_node_with_weight(self): self.graph.add_node('a') self.graph.add_node('weight_a', weight=0.3) self.assertEqual(self.graph.node['weight_a']['weight'], 0.3) self.assertEqual(self.graph.node['a']['weight'], None) def test_add_nodes_from_with_weight(self): self.graph.add_node(1) self.graph.add_nodes_from(['weight_b', 'weight_c'], weights=[0.3, 0.5]) self.assertEqual(self.graph.node['weight_b']['weight'], 0.3) self.assertEqual(self.graph.node['weight_c']['weight'], 0.5) self.assertEqual(self.graph.node[1]['weight'], None) def test_add_nodes_from_non_string(self): self.graph.add_nodes_from([1, 2, 3, 4]) def test_add_edge_string(self): self.graph.add_edge('d', 'e') self.assertListEqual(sorted(self.graph.nodes()), ['d', 'e']) self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [['d', 'e']]) self.graph.add_nodes_from(['a', 'b', 'c']) self.graph.add_edge('a', 'b') self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [['a', 'b'], ['d', 'e']]) def test_add_edge_nonstring(self): self.graph.add_edge(1, 2) def test_add_edges_from_string(self): self.graph.add_edges_from([('a', 'b'), ('b', 'c')]) self.assertListEqual(sorted(self.graph.nodes()), ['a', 'b', 'c']) self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [['a', 'b'], ['b', 'c']]) self.graph.add_nodes_from(['d', 'e', 'f']) self.graph.add_edges_from([('d', 'e'), ('e', 'f')]) self.assertListEqual(sorted(self.graph.nodes()), ['a', 'b', 'c', 'd', 'e', 'f']) self.assertListEqual(hf.recursive_sorted(self.graph.edges()), hf.recursive_sorted([('a', 'b'), ('b', 'c'), ('d', 'e'), ('e', 'f')])) def test_add_edges_from_nonstring(self): self.graph.add_edges_from([(1, 2), (2, 3)]) def test_number_of_neighbors(self): self.graph.add_edges_from([('a', 'b'), ('b', 'c')]) self.assertEqual(len(self.graph.neighbors('b')), 2) def tearDown(self): del self.graph
class TestUndirectedGraphCreation(unittest.TestCase): def setUp(self): self.graph = UndirectedGraph() def test_class_init_without_data(self): self.assertIsInstance(self.graph, UndirectedGraph) def test_class_init_with_data_string(self): self.G = UndirectedGraph([("a", "b"), ("b", "c")]) self.assertListEqual(sorted(self.G.nodes()), ["a", "b", "c"]) self.assertListEqual(hf.recursive_sorted(self.G.edges()), [["a", "b"], ["b", "c"]]) def test_add_node_string(self): self.graph.add_node("a") self.assertListEqual(list(self.graph.nodes()), ["a"]) def test_add_node_nonstring(self): self.graph.add_node(1) self.assertListEqual(list(self.graph.nodes()), [1]) def test_add_nodes_from_string(self): self.graph.add_nodes_from(["a", "b", "c", "d"]) self.assertListEqual(sorted(self.graph.nodes()), ["a", "b", "c", "d"]) def test_add_node_with_weight(self): self.graph.add_node("a") self.graph.add_node("weight_a", weight=0.3) self.assertEqual(self.graph.nodes["weight_a"]["weight"], 0.3) self.assertEqual(self.graph.nodes["a"]["weight"], None) def test_add_nodes_from_with_weight(self): self.graph.add_node(1) self.graph.add_nodes_from(["weight_b", "weight_c"], weights=[0.3, 0.5]) self.assertEqual(self.graph.nodes["weight_b"]["weight"], 0.3) self.assertEqual(self.graph.nodes["weight_c"]["weight"], 0.5) self.assertEqual(self.graph.nodes[1]["weight"], None) def test_add_nodes_from_non_string(self): self.graph.add_nodes_from([1, 2, 3, 4]) def test_add_edge_string(self): self.graph.add_edge("d", "e") self.assertListEqual(sorted(self.graph.nodes()), ["d", "e"]) self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [["d", "e"]]) self.graph.add_nodes_from(["a", "b", "c"]) self.graph.add_edge("a", "b") self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [["a", "b"], ["d", "e"]]) def test_add_edge_nonstring(self): self.graph.add_edge(1, 2) def test_add_edges_from_string(self): self.graph.add_edges_from([("a", "b"), ("b", "c")]) self.assertListEqual(sorted(self.graph.nodes()), ["a", "b", "c"]) self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [["a", "b"], ["b", "c"]]) self.graph.add_nodes_from(["d", "e", "f"]) self.graph.add_edges_from([("d", "e"), ("e", "f")]) self.assertListEqual(sorted(self.graph.nodes()), ["a", "b", "c", "d", "e", "f"]) self.assertListEqual( hf.recursive_sorted(self.graph.edges()), hf.recursive_sorted([("a", "b"), ("b", "c"), ("d", "e"), ("e", "f")]), ) def test_add_edges_from_nonstring(self): self.graph.add_edges_from([(1, 2), (2, 3)]) def test_number_of_neighbors(self): self.graph.add_edges_from([("a", "b"), ("b", "c")]) self.assertEqual(len(list(self.graph.neighbors("b"))), 2) def tearDown(self): del self.graph
def test_jt_from_chordal_graph(self): #small triangulated graph graph = UndirectedGraph([(0, 1), (1, 2), (2, 0), (3, 4), (4, 5), (5, 3), (0, 3)]) ret = graph.jt_techniques(0, True, True) self.assertTrue(ret.is_triangulated())
def test_class_init_with_data_string(self): self.G = UndirectedGraph([("a", "b"), ("b", "c")]) self.assertListEqual(sorted(self.G.nodes()), ["a", "b", "c"]) self.assertListEqual(hf.recursive_sorted(self.G.edges()), [["a", "b"], ["b", "c"]])
class TestUndirectedGraphCreation(unittest.TestCase): def setUp(self): self.graph = UndirectedGraph() def test_class_init_without_data(self): self.assertIsInstance(self.graph, UndirectedGraph) def test_class_init_with_data_string(self): self.G = UndirectedGraph([('a', 'b'), ('b', 'c')]) self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c']) self.assertListEqual(hf.recursive_sorted(self.G.edges()), [['a', 'b'], ['b', 'c']]) def test_add_node_string(self): self.graph.add_node('a') self.assertListEqual(self.graph.nodes(), ['a']) def test_add_node_nonstring(self): self.graph.add_node(1) self.assertListEqual(self.graph.nodes(), [1]) def test_add_nodes_from_string(self): self.graph.add_nodes_from(['a', 'b', 'c', 'd']) self.assertListEqual(sorted(self.graph.nodes()), ['a', 'b', 'c', 'd']) def test_add_nodes_from_non_string(self): self.graph.add_nodes_from([1, 2, 3, 4]) def test_add_edge_string(self): self.graph.add_edge('d', 'e') self.assertListEqual(sorted(self.graph.nodes()), ['d', 'e']) self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [['d', 'e']]) self.graph.add_nodes_from(['a', 'b', 'c']) self.graph.add_edge('a', 'b') self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [['a', 'b'], ['d', 'e']]) def test_add_edge_nonstring(self): self.graph.add_edge(1, 2) def test_add_edges_from_string(self): self.graph.add_edges_from([('a', 'b'), ('b', 'c')]) self.assertListEqual(sorted(self.graph.nodes()), ['a', 'b', 'c']) self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [['a', 'b'], ['b', 'c']]) self.graph.add_nodes_from(['d', 'e', 'f']) self.graph.add_edges_from([('d', 'e'), ('e', 'f')]) self.assertListEqual(sorted(self.graph.nodes()), ['a', 'b', 'c', 'd', 'e', 'f']) self.assertListEqual(hf.recursive_sorted(self.graph.edges()), hf.recursive_sorted([('a', 'b'), ('b', 'c'), ('d', 'e'), ('e', 'f')])) def test_add_edges_from_nonstring(self): self.graph.add_edges_from([(1, 2), (2, 3)]) def test_number_of_neighbors(self): self.graph.add_edges_from([('a', 'b'), ('b', 'c')]) self.assertEqual(len(self.graph.neighbors('b')), 2) def tearDown(self): del self.graph
def test_class_init_with_data_string(self): self.G = UndirectedGraph([('a', 'b'), ('b', 'c')]) self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c']) self.assertListEqual(hf.recursive_sorted(self.G.edges()), [['a', 'b'], ['b', 'c']])
def build_skeleton(nodes, independencies): """Estimates a graph skeleton (UndirectedGraph) from a set of independencies using (the first part of) the PC algorithm. The independencies can either be provided as an instance of the `Independencies`-class or by passing a decision function that decides any conditional independency assertion. Returns a tuple `(skeleton, separating_sets)`. If an Independencies-instance is passed, the contained IndependenceAssertions have to admit a faithful BN representation. This is the case if they are obtained as a set of d-seperations of some Bayesian network or if the independence assertions are closed under the semi-graphoid axioms. Otherwise the procedure may fail to identify the correct structure. Parameters ---------- nodes: list, array-like A list of node/variable names of the network skeleton. independencies: Independencies-instance or function. The source of independency information from which to build the skeleton. The provided Independencies should admit a faithful representation. Can either be provided as an Independencies()-instance or by passing a function `f(X, Y, Zs)` that returns `True` when X _|_ Y | Zs, otherwise `False`. (X, Y being individual nodes and Zs a list of nodes). Returns ------- skeleton: UndirectedGraph An estimate for the undirected graph skeleton of the BN underlying the data. separating_sets: dict A dict containing for each pair of not directly connected nodes a separating set ("witnessing set") of variables that makes then conditionally independent. (needed for edge orientation procedures) Reference --------- [1] Neapolitan, Learning Bayesian Networks, Section 10.1.2, Algorithm 10.2 (page 550) http://www.cs.technion.ac.il/~dang/books/Learning%20Bayesian%20Networks(Neapolitan,%20Richard).pdf [2] Koller & Friedman, Probabilistic Graphical Models - Principles and Techniques, 2009 Section 3.4.2.1 (page 85), Algorithm 3.3 Examples -------- >>> from pgmpy.estimators import ConstraintBasedEstimator >>> from pgmpy.models import BayesianModel >>> from pgmpy.independencies import Independencies >>> # build skeleton from list of independencies: ... ind = Independencies(['B', 'C'], ['A', ['B', 'C'], 'D']) >>> # we need to compute closure, otherwise this set of independencies doesn't ... # admit a faithful representation: ... ind = ind.closure() >>> skel, sep_sets = ConstraintBasedEstimator.build_skeleton("ABCD", ind) >>> print(skel.edges()) [('A', 'D'), ('B', 'D'), ('C', 'D')] >>> # build skeleton from d-seperations of BayesianModel: ... model = BayesianModel([('A', 'C'), ('B', 'C'), ('B', 'D'), ('C', 'E')]) >>> skel, sep_sets = ConstraintBasedEstimator.build_skeleton(model.nodes(), model.get_independencies()) >>> print(skel.edges()) [('A', 'C'), ('B', 'C'), ('B', 'D'), ('C', 'E')] """ nodes = list(nodes) if isinstance(independencies, Independencies): def is_independent(X, Y, Zs): return IndependenceAssertion(X, Y, Zs) in independencies elif callable(independencies): is_independent = independencies else: raise ValueError( "'independencies' must be either Independencies-instance " + "or a ternary function that decides independencies.") graph = UndirectedGraph(combinations(nodes, 2)) lim_neighbors = 0 separating_sets = dict() while not all( [len(graph.neighbors(node)) < lim_neighbors for node in nodes]): for node in nodes: for neighbor in graph.neighbors(node): # search if there is a set of neighbors (of size lim_neighbors) # that makes X and Y independent: for separating_set in combinations( set(graph.neighbors(node)) - set([neighbor]), lim_neighbors): if is_independent(node, neighbor, separating_set): separating_sets[frozenset( (node, neighbor))] = separating_set graph.remove_edge(node, neighbor) break lim_neighbors += 1 return graph, separating_sets
class TestUndirectedGraphCreation(unittest.TestCase): def setUp(self): self.graph = UndirectedGraph() def test_class_init_without_data(self): self.assertIsInstance(self.graph, UndirectedGraph) def test_class_init_with_data_string(self): self.G = UndirectedGraph([("a", "b"), ("b", "c")]) self.assertListEqual(sorted(self.G.nodes()), ["a", "b", "c"]) self.assertListEqual(hf.recursive_sorted(self.G.edges()), [["a", "b"], ["b", "c"]]) def test_add_node_string(self): self.graph.add_node("a") self.assertListEqual(self.graph.nodes(), ["a"]) def test_add_node_nonstring(self): self.graph.add_node(1) self.assertListEqual(self.graph.nodes(), [1]) def test_add_nodes_from_string(self): self.graph.add_nodes_from(["a", "b", "c", "d"]) self.assertListEqual(sorted(self.graph.nodes()), ["a", "b", "c", "d"]) def test_add_nodes_from_non_string(self): self.graph.add_nodes_from([1, 2, 3, 4]) def test_add_edge_string(self): self.graph.add_edge("d", "e") self.assertListEqual(sorted(self.graph.nodes()), ["d", "e"]) self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [["d", "e"]]) self.graph.add_nodes_from(["a", "b", "c"]) self.graph.add_edge("a", "b") self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [["a", "b"], ["d", "e"]]) def test_add_edge_nonstring(self): self.graph.add_edge(1, 2) def test_add_edges_from_string(self): self.graph.add_edges_from([("a", "b"), ("b", "c")]) self.assertListEqual(sorted(self.graph.nodes()), ["a", "b", "c"]) self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [["a", "b"], ["b", "c"]]) self.graph.add_nodes_from(["d", "e", "f"]) self.graph.add_edges_from([("d", "e"), ("e", "f")]) self.assertListEqual(sorted(self.graph.nodes()), ["a", "b", "c", "d", "e", "f"]) self.assertListEqual( hf.recursive_sorted(self.graph.edges()), hf.recursive_sorted([("a", "b"), ("b", "c"), ("d", "e"), ("e", "f")]), ) def test_add_edges_from_nonstring(self): self.graph.add_edges_from([(1, 2), (2, 3)]) def test_number_of_neighbors(self): self.graph.add_edges_from([("a", "b"), ("b", "c")]) self.assertEqual(len(self.graph.neighbors("b")), 2) def tearDown(self): del self.graph
def mmpc(self, significance_level=0.01): nodes = self.state_names.keys() def assoc(X, Y, Zs): """Measure for (conditional) association between variables. Use negative p-value of independence test. """ return 1 - chi_square(X, Y, Zs, self.data)[1] def min_assoc(X, Y, Zs): "Minimal association of X, Y given any subset of Zs." return min(assoc(X, Y, Zs_subset) for Zs_subset in powerset(Zs)) def max_min_heuristic(X, Zs): "Finds variable that maximizes min_assoc with `node` relative to `neighbors`." max_min_assoc = 0 best_Y = None for Y in set(nodes) - set(Zs + [X]): min_assoc_val = min_assoc(X, Y, Zs) if min_assoc_val >= max_min_assoc: best_Y = Y max_min_assoc = min_assoc_val return (best_Y, max_min_assoc) # Find parents and children for each node neighbors = dict() for node in nodes: neighbors[node] = [] # Forward Phase while True: new_neighbor, new_neighbor_min_assoc = max_min_heuristic( node, neighbors[node]) if new_neighbor_min_assoc > 0: neighbors[node].append(new_neighbor) else: break # Backward Phase for neigh in neighbors[node]: other_neighbors = [n for n in neighbors[node] if n != neigh] for sep_set in powerset(other_neighbors): if self.test_conditional_independence( node, neigh, sep_set): neighbors[node].remove(neigh) break # correct for false positives for node in nodes: print(node, ":", neighbors[node]) print(neighbors[node]) for neigh in neighbors[node]: if node not in neighbors[neigh]: print("node-%s is removed" % neigh) neighbors[node].remove(neigh) skel = UndirectedGraph() skel.add_nodes_from(nodes) for node in nodes: print(node, "->", neighbors[node]) skel.add_edges_from([(node, neigh) for neigh in neighbors[node]]) return skel
def mmpc(self, data, nodes): """ Estimates a graph skeleton (UndirectedGraph) for the data set, using the MMPC (max-min parents-and-children) algorithm. :return: graph skeleton """ def is_independent(X, Y, Zs, cb_estimator): """ Returns result of hypothesis test for the null hypothesis that X _|_ Y | Zs, using a chi2 statistic and threshold `significance_level`. """ if (tuple(sorted([X, Y])), tuple(sorted(Zs))) in self.p_val_cache: p_value, sufficient_data = self.p_val_cache.get( (tuple(sorted([X, Y])), tuple(sorted(Zs)))) else: chi2, p_value, sufficient_data = cb_estimator.test_conditional_independence( X, Y, Zs) self.p_val_cache.update({ (tuple(sorted([X, Y])), tuple(sorted(Zs))): (p_value, sufficient_data) }) return p_value >= self.alpha and sufficient_data def assoc(X, Y, Zs, cb_estimator): """ Measure for (conditional) association between variables. Use negative p-value of independence test. """ if (tuple(sorted([X, Y])), tuple(sorted(Zs))) in self.p_val_cache: p_value, sufficient_data = self.p_val_cache.get( (tuple(sorted([X, Y])), tuple(sorted(Zs)))) else: chi2, p_value, sufficient_data = cb_estimator.test_conditional_independence( X, Y, Zs) self.p_val_cache.update({ (tuple(sorted([X, Y])), tuple(sorted(Zs))): (p_value, sufficient_data) }) return 1 - p_value def min_assoc(X, Y, Zs, cb_estimator): """ Minimal association of X, Y given any subset of Zs. """ min_association = float('inf') for size in range(min(self.max_reach, len(Zs)) + 1): partial_min_association = min( assoc(X, Y, Zs_subset, cb_estimator) for Zs_subset in combinations(Zs, size)) if partial_min_association < min_association: min_association = partial_min_association return min_association def max_min_heuristic(X, Zs): """ Finds variable that maximizes min_assoc with `node` relative to `neighbors`. """ max_min_assoc = 0 best_Y = None for Y in set(nodes) - set(Zs + [X]): min_assoc_val = min_assoc(X, Y, Zs, cb_estimator) if min_assoc_val >= max_min_assoc: best_Y = Y max_min_assoc = min_assoc_val return best_Y, max_min_assoc cb_estimator = BaseEstimator(data=data, complete_samples_only=False) # Find parents and children for each node neighbors = dict() for node in nodes: neighbors[node] = [] # Forward Phase while True: new_neighbor, new_neighbor_min_assoc = max_min_heuristic( node, neighbors[node]) if new_neighbor_min_assoc > 0: neighbors[node].append(new_neighbor) else: break # Backward Phase for neigh in neighbors[node]: other_neighbors = [n for n in neighbors[node] if n != neigh] sep_sets = [ sep_set for sep_set_size in range( min(self.max_reach, len(other_neighbors)) + 1) for sep_set in combinations(other_neighbors, sep_set_size) ] for sep_set in sep_sets: if is_independent(node, neigh, sep_set, cb_estimator): neighbors[node].remove(neigh) break # correct for false positives for node in nodes: for neigh in neighbors[node]: if node not in neighbors[neigh]: neighbors[node].remove(neigh) skel = UndirectedGraph() skel.add_nodes_from(nodes) for node in nodes: skel.add_edges_from([(node, neigh) for neigh in neighbors[node]]) return skel
def test_check_clique(self): #clique graph graph = UndirectedGraph([(0, 1), (0, 2), (0, 3), (1, 2), (1, 3), (2, 3)]) ret = graph.check_clique(graph.nodes()) self.assertTrue(ret)
def test_is_triangulated(self): G = UndirectedGraph([("A", "B"), ("A", "C"), ("B", "D"), ("C", "D")]) self.assertFalse(G.is_triangulated()) G.add_edge("A", "D") self.assertTrue(G.is_triangulated())
def to_junction_tree(self): """ Creates a junction tree (or clique tree) for a given markov model. For a given markov model (H) a junction tree (G) is a graph 1. where each node in G corresponds to a maximal clique in H 2. each sepset in G separates the variables strictly on one side of the edge to other. Examples -------- >>> from pgmpy.models import MarkovModel >>> from pgmpy.factors.discrete import DiscreteFactor >>> mm = MarkovModel() >>> mm.add_nodes_from(['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7']) >>> mm.add_edges_from([('x1', 'x3'), ('x1', 'x4'), ('x2', 'x4'), ... ('x2', 'x5'), ('x3', 'x6'), ('x4', 'x6'), ... ('x4', 'x7'), ('x5', 'x7')]) >>> phi = [DiscreteFactor(edge, [2, 2], np.random.rand(4)) for edge in mm.edges()] >>> mm.add_factors(*phi) >>> junction_tree = mm.to_junction_tree() """ from pgmpy.models import JunctionTree # Check whether the model is valid or not self.check_model() # Triangulate the graph to make it chordal triangulated_graph = self.triangulate() # Find maximal cliques in the chordal graph cliques = list(map(tuple, nx.find_cliques(triangulated_graph))) # If there is only 1 clique, then the junction tree formed is just a # clique tree with that single clique as the node if len(cliques) == 1: clique_trees = JunctionTree() clique_trees.add_node(cliques[0]) # Else if the number of cliques is more than 1 then create a complete # graph with all the cliques as nodes and weight of the edges being # the length of sepset between two cliques elif len(cliques) >= 2: complete_graph = UndirectedGraph() edges = list(itertools.combinations(cliques, 2)) weights = list(map(lambda x: len(set(x[0]).intersection(set(x[1]))), edges)) for edge, weight in zip(edges, weights): complete_graph.add_edge(*edge, weight=-weight) # Create clique trees by minimum (or maximum) spanning tree method clique_trees = JunctionTree(nx.minimum_spanning_tree(complete_graph).edges()) # Check whether the factors are defined for all the random variables or not all_vars = itertools.chain(*[factor.scope() for factor in self.factors]) if set(all_vars) != set(self.nodes()): ValueError('DiscreteFactor for all the random variables not specified') # Dictionary stating whether the factor is used to create clique # potential or not # If false, then it is not used to create any clique potential is_used = {factor: False for factor in self.factors} for node in clique_trees.nodes(): clique_factors = [] for factor in self.factors: # If the factor is not used in creating any clique potential as # well as has any variable of the given clique in its scope, # then use it in creating clique potential if not is_used[factor] and set(factor.scope()).issubset(node): clique_factors.append(factor) is_used[factor] = True # To compute clique potential, initially set it as unity factor var_card = [self.get_cardinality()[x] for x in node] clique_potential = DiscreteFactor(node, var_card, np.ones(np.product(var_card))) # multiply it with the factors associated with the variables present # in the clique (or node) clique_potential *= factor_product(*clique_factors) clique_trees.add_factors(clique_potential) if not all(is_used.values()): raise ValueError('All the factors were not used to create Junction Tree.' 'Extra factors are defined.') return clique_trees
def to_junction_tree(self): """ Creates a junction tree (or clique tree) for a given markov model. For a given markov model (H) a junction tree (G) is a graph 1. where each node in G corresponds to a maximal clique in H 2. each sepset in G separates the variables strictly on one side of the edge to other. Examples -------- >>> from pgmpy.models import MarkovModel >>> from pgmpy.factors import Factor >>> mm = MarkovModel() >>> mm.add_nodes_from(['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7']) >>> mm.add_edges_from([('x1', 'x3'), ('x1', 'x4'), ('x2', 'x4'), ... ('x2', 'x5'), ('x3', 'x6'), ('x4', 'x6'), ... ('x4', 'x7'), ('x5', 'x7')]) >>> phi = [Factor(edge, [2, 2], np.random.rand(4)) for edge in mm.edges()] >>> mm.add_factors(*phi) >>> junction_tree = mm.to_junction_tree() """ from pgmpy.models import JunctionTree # Check whether the model is valid or not self.check_model() # Triangulate the graph to make it chordal triangulated_graph = self.triangulate() # Find maximal cliques in the chordal graph cliques = list(map(tuple, nx.find_cliques(triangulated_graph))) # If there is only 1 clique, then the junction tree formed is just a # clique tree with that single clique as the node if len(cliques) == 1: clique_trees = JunctionTree() clique_trees.add_node(cliques[0]) # Else if the number of cliques is more than 1 then create a complete # graph with all the cliques as nodes and weight of the edges being # the length of sepset between two cliques elif len(cliques) >= 2: complete_graph = UndirectedGraph() edges = list(itertools.combinations(cliques, 2)) weights = list(map(lambda x: len(set(x[0]).intersection(set(x[1]))), edges)) for edge, weight in zip(edges, weights): complete_graph.add_edge(*edge, weight=-weight) # Create clique trees by minimum (or maximum) spanning tree method clique_trees = JunctionTree(nx.minimum_spanning_tree(complete_graph).edges()) # Check whether the factors are defined for all the random variables or not all_vars = itertools.chain(*[factor.scope() for factor in self.factors]) if set(all_vars) != set(self.nodes()): ValueError('Factor for all the random variables not specified') # Dictionary stating whether the factor is used to create clique # potential or not # If false, then it is not used to create any clique potential is_used = {factor: False for factor in self.factors} for node in clique_trees.nodes(): clique_factors = [] for factor in self.factors: # If the factor is not used in creating any clique potential as # well as has any variable of the given clique in its scope, # then use it in creating clique potential if not is_used[factor] and set(factor.scope()).issubset(node): clique_factors.append(factor) is_used[factor] = True # To compute clique potential, initially set it as unity factor var_card = [self.get_cardinality()[x] for x in node] clique_potential = Factor(node, var_card, np.ones(np.product(var_card))) # multiply it with the factors associated with the variables present # in the clique (or node) clique_potential *= factor_product(*clique_factors) clique_trees.add_factors(clique_potential) if not all(is_used.values()): raise ValueError('All the factors were not used to create Junction Tree.' 'Extra factors are defined.') return clique_trees