def moralize(self):
        """
        Removes all the immoralities in the Network and creates a moral
        graph (UndirectedGraph).

        A v-structure X->Z<-Y is an immorality if there is no directed edge
        between X and Y.

        Examples
        --------
        >>> from pgmpy.models import DynamicBayesianNetwork as DBN
        >>> dbn = DBN([(('D',0), ('G',0)), (('I',0), ('G',0))])
        >>> moral_graph = dbn.moralize()
        >>> moral_graph.edges()
        [(('G', 0), ('I', 0)),
        (('G', 0), ('D', 0)),
        (('D', 1), ('I', 1)),
        (('D', 1), ('G', 1)),
        (('I', 0), ('D', 0)),
        (('G', 1), ('I', 1))]
        """
        moral_graph = UndirectedGraph(self.to_undirected().edges())

        for node in super().nodes():
            moral_graph.add_edges_from(itertools.combinations(
                self.get_parents(node), 2))

        return moral_graph
 def test_is_clique(self):
     G = UndirectedGraph([('A', 'B'), ('C', 'B'), ('B', 'D'),
                          ('B', 'E'), ('D', 'E'), ('E', 'F'),
                          ('D', 'F'), ('B', 'F')])
     self.assertFalse(G.is_clique(nodes=['A', 'B', 'C', 'D']))
     self.assertTrue(G.is_clique(nodes=['B', 'D', 'E', 'F']))
     self.assertTrue(G.is_clique(nodes=['D', 'E', 'B']))
Example #3
0
 def test_is_triangulated(self):
     #triangulated graph
     graph = UndirectedGraph([(0, 1), (1, 2), (0, 2), (2, 3), (3, 4), (4, 5),
                                 (3, 5), (3, 7), (6, 7), (6, 9), (9, 8), (7, 8), (6, 8)])
     self.assertTrue(graph.is_triangulated())
     #graph_not_triangulated
     graph = UndirectedGraph([(0, 1), (1, 2), (0, 2), (2, 3), (3, 4), (4, 5),
                                 (3, 5), (3, 7), (6, 7), (6, 9), (9, 8), (7, 8),
                                 (6, 8), (1, 6)])
     self.assertFalse(graph.is_triangulated())
Example #4
0
 def test_triangulation_all_heuristics(self):
     i = 2
     while True:
         graph = UndirectedGraph([(0, 1), (0, 3), (0, 8), (1, 2), (1, 4), (1, 8),
                                     (2, 4), (2, 6), (2, 7), (3, 8), (3, 9), (4, 7),
                                     (4, 8), (5, 8), (5, 9), (5, 10), (6, 7), (7, 10),
                                     (8, 10)])
         #graph.read_simple_format("test_graphs/graph")
         #print(i)
         ret = graph.jt_techniques(i, False, True)
         if not ret:
             break
         self.assertTrue(graph.is_triangulated())
         i += 1
 def test_is_clique(self):
     G = UndirectedGraph([
         ("A", "B"),
         ("C", "B"),
         ("B", "D"),
         ("B", "E"),
         ("D", "E"),
         ("E", "F"),
         ("D", "F"),
         ("B", "F"),
     ])
     self.assertFalse(G.is_clique(nodes=["A", "B", "C", "D"]))
     self.assertTrue(G.is_clique(nodes=["B", "D", "E", "F"]))
     self.assertTrue(G.is_clique(nodes=["D", "E", "B"]))
Example #6
0
    def make_jt(self, triangulation_technique):
        """
        Makes the junction tree for the MarkovModel

        Parameter
        ---------
        triangulation_technique : int
            Index of the triangulation technique to be used
            See jt_techniques in Undirected Graph for documentation on
            the triangulation techniques and the technique_num for each
            technique

        Examples
        --------
        >>> from pgmpy.models import MarkovModel
        >>> student = MarkovModel([('diff', 'intel'), ('diff', 'grade'),
        ...                        ('intel', 'grade')])
        >>> student.add_states({'diff': ['easy', 'hard'],
        ...                     'intel': ['dumb', 'smart'],
        ...                     'grade': ['A','B','C']})
        >>> factor = student.add_factors(['diff','intel'], range(4))
        >>> factor2 = student.add_factors(['intel','grade'], range(6))
        >>> factor3 = student.add_factors(['diff','grade'], range(6))
        >>> jt = student.make_jt(2)
        >>> jt.print_graph("Printing the Junction Tree")
        Printing the graph Printing the Junction Tree<<<
        1	( {'factors': [diff	intel	phi(diff, intel)
        diff_0	intel_0	0.0
        diff_0	intel_1	1.0
        diff_1	intel_0	2.0
        diff_1	intel_1	3.0
        , intel	grade	phi(intel, grade)
        intel_0	grade_0	0.0
        intel_0	grade_1	1.0
        intel_0	grade_2	2.0
        intel_1	grade_0	3.0
        intel_1	grade_1	4.0
        intel_1	grade_2	5.0
        , diff	grade	phi(diff, grade)
        diff_0	grade_0	0.0
        diff_0	grade_1	1.0
        diff_0	grade_2	2.0
        diff_1	grade_0	3.0
        diff_1	grade_1	4.0
        diff_1	grade_2	5.0
        ], 'clique_nodes': ['diff', 'grade', 'intel']} ) : []
        >>>
        """
        jt = UndirectedGraph.make_jt(self, triangulation_technique)
        #jt.print_graph("print junction tree before adding factors ")
        #jt.print_graph("after making the junction tree")
        jt.insert_factors(self.get_factors())
        return jt
Example #7
0
 def test_jt_tree_width(self):
     #small triangulated graph
     graph = UndirectedGraph([(0, 1), (1, 2), (2, 0), (3, 4),
                                 (4, 5), (5, 3), (0, 3)])
     ret = graph.jt_tree_width(0)
     self.assertEqual(ret, 2)
     res = [4, 4, 7, 5]
     i = 2
     while True:
         graph = UndirectedGraph([(0, 1), (0, 3), (0, 8), (1, 2), (1, 4), (1, 8),
                                     (2, 4), (2, 6), (2, 7), (3, 8), (3, 9), (4, 7),
                                     (4, 8), (5, 8), (5, 9), (5, 10), (6, 7), (7, 10),
                                     (8, 10)])
         #graph.read_simple_format("test_graphs/graph")
         ret = graph.jt_tree_width(i)
         if not ret:
             break
         #print("heu num "+str(i))
         #print(ret)
         self.assertEqual(ret, res[i - 2])
         i += 1
    def moralize(self):
        """
        Removes all the immoralities in the DirectedGraph and creates a moral
        graph (UndirectedGraph).

        A v-structure X->Z<-Y is an immorality if there is no directed edge
        between X and Y.

        Examples
        --------
        >>> from pgmpy.base import DirectedGraph
        >>> G = DirectedGraph(ebunch=[('diff', 'grade'), ('intel', 'grade')])
        >>> moral_graph = G.moralize()
        >>> moral_graph.edges()
        [('intel', 'grade'), ('intel', 'diff'), ('grade', 'diff')]
        """
        moral_graph = UndirectedGraph(self.to_undirected().edges())

        for node in self.nodes():
            moral_graph.add_edges_from(
                itertools.combinations(self.get_parents(node), 2))

        return moral_graph
    def moralize(self):
        """
        Removes all the immoralities in the DirectedGraph and creates a moral
        graph (UndirectedGraph).

        A v-structure X->Z<-Y is an immorality if there is no directed edge
        between X and Y.

        Examples
        --------
        >>> from pgmpy.base import DirectedGraph
        >>> G = DirectedGraph(ebunch=[('diff', 'grade'), ('intel', 'grade')])
        >>> moral_graph = G.moralize()
        >>> moral_graph.edges()
        [('intel', 'grade'), ('intel', 'diff'), ('grade', 'diff')]
        """
        moral_graph = UndirectedGraph(self.to_undirected().edges())

        for node in self.nodes():
            moral_graph.add_edges_from(
                itertools.combinations(self.get_parents(node), 2))

        return moral_graph
Example #10
0
    def test_skeleton_to_pdag(self):
        data = pd.DataFrame(np.random.randint(0, 3, size=(1000, 3)),
                            columns=list('ABD'))
        data['C'] = data['A'] - data['B']
        data['D'] += data['A']
        c = ConstraintBasedEstimator(data)
        pdag = c.skeleton_to_pdag(*c.estimate_skeleton())
        self.assertSetEqual(
            set(pdag.edges()),
            set([('B', 'C'), ('A', 'D'), ('A', 'C'), ('D', 'A')]))

        skel = UndirectedGraph([('A', 'B'), ('A', 'C')])
        sep_sets1 = {frozenset({'B', 'C'}): ()}
        self.assertSetEqual(set(c.skeleton_to_pdag(skel, sep_sets1).edges()),
                            set([('B', 'A'), ('C', 'A')]))

        sep_sets2 = {frozenset({'B', 'C'}): ('A', )}
        pdag2 = c.skeleton_to_pdag(skel, sep_sets2)
        self.assertSetEqual(
            set(c.skeleton_to_pdag(skel, sep_sets2).edges()),
            set([('A', 'B'), ('B', 'A'), ('A', 'C'), ('C', 'A')]))
    def test_skeleton_to_pdag(self):
        data = pd.DataFrame(np.random.randint(0, 3, size=(1000, 3)),
                            columns=list("ABD"))
        data["C"] = data["A"] - data["B"]
        data["D"] += data["A"]
        c = ConstraintBasedEstimator(data)
        pdag = c.skeleton_to_pdag(*c.estimate_skeleton())
        self.assertSetEqual(
            set(pdag.edges()),
            set([("B", "C"), ("A", "D"), ("A", "C"), ("D", "A")]))

        skel = UndirectedGraph([("A", "B"), ("A", "C")])
        sep_sets1 = {frozenset({"B", "C"}): ()}
        self.assertSetEqual(
            set(c.skeleton_to_pdag(skel, sep_sets1).edges()),
            set([("B", "A"), ("C", "A")]),
        )

        sep_sets2 = {frozenset({"B", "C"}): ("A", )}
        pdag2 = c.skeleton_to_pdag(skel, sep_sets2)
        self.assertSetEqual(
            set(c.skeleton_to_pdag(skel, sep_sets2).edges()),
            set([("A", "B"), ("B", "A"), ("A", "C"), ("C", "A")]),
        )
 def test_is_triangulated(self):
     G = UndirectedGraph([('A', 'B'), ('A', 'C'),
                          ('B', 'D'), ('C', 'D')])
     self.assertFalse(G.is_triangulated())
     G.add_edge('A', 'D')
     self.assertTrue(G.is_triangulated())
    def build_skeleton(nodes, independencies):
        """Estimates a graph skeleton (UndirectedGraph) from a set of independencies
        using (the first part of) the PC algorithm. The independencies can either be
        provided as an instance of the `Independencies`-class or by passing a
        decision function that decides any conditional independency assertion.
        Returns a tuple `(skeleton, separating_sets)`.

        If an Independencies-instance is passed, the contained IndependenceAssertions
        have to admit a faithful BN representation. This is the case if
        they are obtained as a set of d-seperations of some Bayesian network or
        if the independence assertions are closed under the semi-graphoid axioms.
        Otherwise the procedure may fail to identify the correct structure.

        Parameters
        ----------
        nodes: list, array-like
            A list of node/variable names of the network skeleton.

        independencies: Independencies-instance or function.
            The source of independency information from which to build the skeleton.
            The provided Independencies should admit a faithful representation.
            Can either be provided as an Independencies()-instance or by passing a
            function `f(X, Y, Zs)` that returns `True` when X _|_ Y | Zs,
            otherwise `False`. (X, Y being individual nodes and Zs a list of nodes).

        Returns
        -------
        skeleton: UndirectedGraph
            An estimate for the undirected graph skeleton of the BN underlying the data.

        separating_sets: dict
            A dict containing for each pair of not directly connected nodes a
            separating set ("witnessing set") of variables that makes then
            conditionally independent. (needed for edge orientation procedures)

        Reference
        ---------
        [1] Neapolitan, Learning Bayesian Networks, Section 10.1.2, Algorithm 10.2 (page 550)
            http://www.cs.technion.ac.il/~dang/books/Learning%20Bayesian%20Networks(Neapolitan,%20Richard).pdf
        [2] Koller & Friedman, Probabilistic Graphical Models - Principles and Techniques, 2009
            Section 3.4.2.1 (page 85), Algorithm 3.3

        Examples
        --------
        >>> from pgmpy.estimators import ConstraintBasedEstimator
        >>> from pgmpy.models import BayesianModel
        >>> from pgmpy.independencies import Independencies

        >>> # build skeleton from list of independencies:
        ... ind = Independencies(['B', 'C'], ['A', ['B', 'C'], 'D'])
        >>> # we need to compute closure, otherwise this set of independencies doesn't
        ... # admit a faithful representation:
        ... ind = ind.closure()
        >>> skel, sep_sets = ConstraintBasedEstimator.build_skeleton("ABCD", ind)
        >>> print(skel.edges())
        [('A', 'D'), ('B', 'D'), ('C', 'D')]

        >>> # build skeleton from d-seperations of BayesianModel:
        ... model = BayesianModel([('A', 'C'), ('B', 'C'), ('B', 'D'), ('C', 'E')])
        >>> skel, sep_sets = ConstraintBasedEstimator.build_skeleton(model.nodes(), model.get_independencies())
        >>> print(skel.edges())
        [('A', 'C'), ('B', 'C'), ('B', 'D'), ('C', 'E')]
        """

        nodes = list(nodes)

        if isinstance(independencies, Independencies):
            def is_independent(X, Y, Zs):
                return IndependenceAssertion(X, Y, Zs) in independencies
        elif callable(independencies):
            is_independent = independencies
        else:
            raise ValueError("'independencies' must be either Independencies-instance " +
                             "or a ternary function that decides independencies.")

        graph = UndirectedGraph(combinations(nodes, 2))
        lim_neighbors = 0
        separating_sets = dict()
        while not all([len(graph.neighbors(node)) < lim_neighbors for node in nodes]):
            for node in nodes:
                for neighbor in graph.neighbors(node):
                    # search if there is a set of neighbors (of size lim_neighbors)
                    # that makes X and Y independent:
                    for separating_set in combinations(set(graph.neighbors(node)) - set([neighbor]), lim_neighbors):
                        if is_independent(node, neighbor, separating_set):
                            separating_sets[frozenset((node, neighbor))] = separating_set
                            graph.remove_edge(node, neighbor)
                            break
            lim_neighbors += 1

        return graph, separating_sets
 def setUp(self):
     self.graph = UndirectedGraph()
class TestUndirectedGraphCreation(unittest.TestCase):
    def setUp(self):
        self.graph = UndirectedGraph()

    def test_class_init_without_data(self):
        self.assertIsInstance(self.graph, UndirectedGraph)

    def test_class_init_with_data_string(self):
        self.G = UndirectedGraph([('a', 'b'), ('b', 'c')])
        self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c'])
        self.assertListEqual(hf.recursive_sorted(self.G.edges()),
                             [['a', 'b'], ['b', 'c']])

    def test_add_node_string(self):
        self.graph.add_node('a')
        self.assertListEqual(self.graph.nodes(), ['a'])

    def test_add_node_nonstring(self):
        self.graph.add_node(1)
        self.assertListEqual(self.graph.nodes(), [1])

    def test_add_nodes_from_string(self):
        self.graph.add_nodes_from(['a', 'b', 'c', 'd'])
        self.assertListEqual(sorted(self.graph.nodes()),
                             ['a', 'b', 'c', 'd'])

    def test_add_node_with_weight(self):
        self.graph.add_node('a')
        self.graph.add_node('weight_a', weight=0.3)
        self.assertEqual(self.graph.node['weight_a']['weight'], 0.3)
        self.assertEqual(self.graph.node['a']['weight'], None)

    def test_add_nodes_from_with_weight(self):
        self.graph.add_node(1)
        self.graph.add_nodes_from(['weight_b', 'weight_c'], weights=[0.3, 0.5])
        self.assertEqual(self.graph.node['weight_b']['weight'], 0.3)
        self.assertEqual(self.graph.node['weight_c']['weight'], 0.5)
        self.assertEqual(self.graph.node[1]['weight'], None)

    def test_add_nodes_from_non_string(self):
        self.graph.add_nodes_from([1, 2, 3, 4])

    def test_add_edge_string(self):
        self.graph.add_edge('d', 'e')
        self.assertListEqual(sorted(self.graph.nodes()), ['d', 'e'])
        self.assertListEqual(hf.recursive_sorted(self.graph.edges()),
                             [['d', 'e']])
        self.graph.add_nodes_from(['a', 'b', 'c'])
        self.graph.add_edge('a', 'b')
        self.assertListEqual(hf.recursive_sorted(self.graph.edges()),
                             [['a', 'b'], ['d', 'e']])

    def test_add_edge_nonstring(self):
        self.graph.add_edge(1, 2)

    def test_add_edges_from_string(self):
        self.graph.add_edges_from([('a', 'b'), ('b', 'c')])
        self.assertListEqual(sorted(self.graph.nodes()), ['a', 'b', 'c'])
        self.assertListEqual(hf.recursive_sorted(self.graph.edges()),
                             [['a', 'b'], ['b', 'c']])
        self.graph.add_nodes_from(['d', 'e', 'f'])
        self.graph.add_edges_from([('d', 'e'), ('e', 'f')])
        self.assertListEqual(sorted(self.graph.nodes()),
                             ['a', 'b', 'c', 'd', 'e', 'f'])
        self.assertListEqual(hf.recursive_sorted(self.graph.edges()),
                             hf.recursive_sorted([('a', 'b'), ('b', 'c'),
                                                  ('d', 'e'), ('e', 'f')]))

    def test_add_edges_from_nonstring(self):
        self.graph.add_edges_from([(1, 2), (2, 3)])

    def test_number_of_neighbors(self):
        self.graph.add_edges_from([('a', 'b'), ('b', 'c')])
        self.assertEqual(len(self.graph.neighbors('b')), 2)

    def tearDown(self):
        del self.graph
class TestUndirectedGraphCreation(unittest.TestCase):
    def setUp(self):
        self.graph = UndirectedGraph()

    def test_class_init_without_data(self):
        self.assertIsInstance(self.graph, UndirectedGraph)

    def test_class_init_with_data_string(self):
        self.G = UndirectedGraph([("a", "b"), ("b", "c")])
        self.assertListEqual(sorted(self.G.nodes()), ["a", "b", "c"])
        self.assertListEqual(hf.recursive_sorted(self.G.edges()),
                             [["a", "b"], ["b", "c"]])

    def test_add_node_string(self):
        self.graph.add_node("a")
        self.assertListEqual(list(self.graph.nodes()), ["a"])

    def test_add_node_nonstring(self):
        self.graph.add_node(1)
        self.assertListEqual(list(self.graph.nodes()), [1])

    def test_add_nodes_from_string(self):
        self.graph.add_nodes_from(["a", "b", "c", "d"])
        self.assertListEqual(sorted(self.graph.nodes()), ["a", "b", "c", "d"])

    def test_add_node_with_weight(self):
        self.graph.add_node("a")
        self.graph.add_node("weight_a", weight=0.3)
        self.assertEqual(self.graph.nodes["weight_a"]["weight"], 0.3)
        self.assertEqual(self.graph.nodes["a"]["weight"], None)

    def test_add_nodes_from_with_weight(self):
        self.graph.add_node(1)
        self.graph.add_nodes_from(["weight_b", "weight_c"], weights=[0.3, 0.5])
        self.assertEqual(self.graph.nodes["weight_b"]["weight"], 0.3)
        self.assertEqual(self.graph.nodes["weight_c"]["weight"], 0.5)
        self.assertEqual(self.graph.nodes[1]["weight"], None)

    def test_add_nodes_from_non_string(self):
        self.graph.add_nodes_from([1, 2, 3, 4])

    def test_add_edge_string(self):
        self.graph.add_edge("d", "e")
        self.assertListEqual(sorted(self.graph.nodes()), ["d", "e"])
        self.assertListEqual(hf.recursive_sorted(self.graph.edges()),
                             [["d", "e"]])
        self.graph.add_nodes_from(["a", "b", "c"])
        self.graph.add_edge("a", "b")
        self.assertListEqual(hf.recursive_sorted(self.graph.edges()),
                             [["a", "b"], ["d", "e"]])

    def test_add_edge_nonstring(self):
        self.graph.add_edge(1, 2)

    def test_add_edges_from_string(self):
        self.graph.add_edges_from([("a", "b"), ("b", "c")])
        self.assertListEqual(sorted(self.graph.nodes()), ["a", "b", "c"])
        self.assertListEqual(hf.recursive_sorted(self.graph.edges()),
                             [["a", "b"], ["b", "c"]])
        self.graph.add_nodes_from(["d", "e", "f"])
        self.graph.add_edges_from([("d", "e"), ("e", "f")])
        self.assertListEqual(sorted(self.graph.nodes()),
                             ["a", "b", "c", "d", "e", "f"])
        self.assertListEqual(
            hf.recursive_sorted(self.graph.edges()),
            hf.recursive_sorted([("a", "b"), ("b", "c"), ("d", "e"),
                                 ("e", "f")]),
        )

    def test_add_edges_from_nonstring(self):
        self.graph.add_edges_from([(1, 2), (2, 3)])

    def test_number_of_neighbors(self):
        self.graph.add_edges_from([("a", "b"), ("b", "c")])
        self.assertEqual(len(list(self.graph.neighbors("b"))), 2)

    def tearDown(self):
        del self.graph
Example #17
0
 def test_jt_from_chordal_graph(self):
     #small triangulated graph
     graph = UndirectedGraph([(0, 1), (1, 2), (2, 0), (3, 4),
                                 (4, 5), (5, 3), (0, 3)])
     ret = graph.jt_techniques(0, True, True)
     self.assertTrue(ret.is_triangulated())
Example #18
0
 def test_class_init_with_data_string(self):
     self.G = UndirectedGraph([("a", "b"), ("b", "c")])
     self.assertListEqual(sorted(self.G.nodes()), ["a", "b", "c"])
     self.assertListEqual(hf.recursive_sorted(self.G.edges()), [["a", "b"], ["b", "c"]])
Example #19
0
 def setUp(self):
     self.graph = UndirectedGraph()
Example #20
0
class TestUndirectedGraphCreation(unittest.TestCase):
    def setUp(self):
        self.graph = UndirectedGraph()

    def test_class_init_without_data(self):
        self.assertIsInstance(self.graph, UndirectedGraph)

    def test_class_init_with_data_string(self):
        self.G = UndirectedGraph([('a', 'b'), ('b', 'c')])
        self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c'])
        self.assertListEqual(hf.recursive_sorted(self.G.edges()),
                             [['a', 'b'], ['b', 'c']])

    def test_add_node_string(self):
        self.graph.add_node('a')
        self.assertListEqual(self.graph.nodes(), ['a'])

    def test_add_node_nonstring(self):
        self.graph.add_node(1)
        self.assertListEqual(self.graph.nodes(), [1])

    def test_add_nodes_from_string(self):
        self.graph.add_nodes_from(['a', 'b', 'c', 'd'])
        self.assertListEqual(sorted(self.graph.nodes()),
                             ['a', 'b', 'c', 'd'])

    def test_add_nodes_from_non_string(self):
        self.graph.add_nodes_from([1, 2, 3, 4])

    def test_add_edge_string(self):
        self.graph.add_edge('d', 'e')
        self.assertListEqual(sorted(self.graph.nodes()), ['d', 'e'])
        self.assertListEqual(hf.recursive_sorted(self.graph.edges()),
                             [['d', 'e']])
        self.graph.add_nodes_from(['a', 'b', 'c'])
        self.graph.add_edge('a', 'b')
        self.assertListEqual(hf.recursive_sorted(self.graph.edges()),
                             [['a', 'b'], ['d', 'e']])

    def test_add_edge_nonstring(self):
        self.graph.add_edge(1, 2)

    def test_add_edges_from_string(self):
        self.graph.add_edges_from([('a', 'b'), ('b', 'c')])
        self.assertListEqual(sorted(self.graph.nodes()), ['a', 'b', 'c'])
        self.assertListEqual(hf.recursive_sorted(self.graph.edges()),
                             [['a', 'b'], ['b', 'c']])
        self.graph.add_nodes_from(['d', 'e', 'f'])
        self.graph.add_edges_from([('d', 'e'), ('e', 'f')])
        self.assertListEqual(sorted(self.graph.nodes()),
                             ['a', 'b', 'c', 'd', 'e', 'f'])
        self.assertListEqual(hf.recursive_sorted(self.graph.edges()),
                             hf.recursive_sorted([('a', 'b'), ('b', 'c'),
                                                  ('d', 'e'), ('e', 'f')]))

    def test_add_edges_from_nonstring(self):
        self.graph.add_edges_from([(1, 2), (2, 3)])

    def test_number_of_neighbors(self):
        self.graph.add_edges_from([('a', 'b'), ('b', 'c')])
        self.assertEqual(len(self.graph.neighbors('b')), 2)

    def tearDown(self):
        del self.graph
Example #21
0
 def test_class_init_with_data_string(self):
     self.G = UndirectedGraph([('a', 'b'), ('b', 'c')])
     self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c'])
     self.assertListEqual(hf.recursive_sorted(self.G.edges()),
                          [['a', 'b'], ['b', 'c']])
Example #22
0
    def build_skeleton(nodes, independencies):
        """Estimates a graph skeleton (UndirectedGraph) from a set of independencies
        using (the first part of) the PC algorithm. The independencies can either be
        provided as an instance of the `Independencies`-class or by passing a
        decision function that decides any conditional independency assertion.
        Returns a tuple `(skeleton, separating_sets)`.

        If an Independencies-instance is passed, the contained IndependenceAssertions
        have to admit a faithful BN representation. This is the case if
        they are obtained as a set of d-seperations of some Bayesian network or
        if the independence assertions are closed under the semi-graphoid axioms.
        Otherwise the procedure may fail to identify the correct structure.

        Parameters
        ----------
        nodes: list, array-like
            A list of node/variable names of the network skeleton.

        independencies: Independencies-instance or function.
            The source of independency information from which to build the skeleton.
            The provided Independencies should admit a faithful representation.
            Can either be provided as an Independencies()-instance or by passing a
            function `f(X, Y, Zs)` that returns `True` when X _|_ Y | Zs,
            otherwise `False`. (X, Y being individual nodes and Zs a list of nodes).

        Returns
        -------
        skeleton: UndirectedGraph
            An estimate for the undirected graph skeleton of the BN underlying the data.

        separating_sets: dict
            A dict containing for each pair of not directly connected nodes a
            separating set ("witnessing set") of variables that makes then
            conditionally independent. (needed for edge orientation procedures)

        Reference
        ---------
        [1] Neapolitan, Learning Bayesian Networks, Section 10.1.2, Algorithm 10.2 (page 550)
            http://www.cs.technion.ac.il/~dang/books/Learning%20Bayesian%20Networks(Neapolitan,%20Richard).pdf
        [2] Koller & Friedman, Probabilistic Graphical Models - Principles and Techniques, 2009
            Section 3.4.2.1 (page 85), Algorithm 3.3

        Examples
        --------
        >>> from pgmpy.estimators import ConstraintBasedEstimator
        >>> from pgmpy.models import BayesianModel
        >>> from pgmpy.independencies import Independencies

        >>> # build skeleton from list of independencies:
        ... ind = Independencies(['B', 'C'], ['A', ['B', 'C'], 'D'])
        >>> # we need to compute closure, otherwise this set of independencies doesn't
        ... # admit a faithful representation:
        ... ind = ind.closure()
        >>> skel, sep_sets = ConstraintBasedEstimator.build_skeleton("ABCD", ind)
        >>> print(skel.edges())
        [('A', 'D'), ('B', 'D'), ('C', 'D')]

        >>> # build skeleton from d-seperations of BayesianModel:
        ... model = BayesianModel([('A', 'C'), ('B', 'C'), ('B', 'D'), ('C', 'E')])
        >>> skel, sep_sets = ConstraintBasedEstimator.build_skeleton(model.nodes(), model.get_independencies())
        >>> print(skel.edges())
        [('A', 'C'), ('B', 'C'), ('B', 'D'), ('C', 'E')]
        """

        nodes = list(nodes)

        if isinstance(independencies, Independencies):

            def is_independent(X, Y, Zs):
                return IndependenceAssertion(X, Y, Zs) in independencies
        elif callable(independencies):
            is_independent = independencies
        else:
            raise ValueError(
                "'independencies' must be either Independencies-instance " +
                "or a ternary function that decides independencies.")

        graph = UndirectedGraph(combinations(nodes, 2))
        lim_neighbors = 0
        separating_sets = dict()
        while not all(
            [len(graph.neighbors(node)) < lim_neighbors for node in nodes]):
            for node in nodes:
                for neighbor in graph.neighbors(node):
                    # search if there is a set of neighbors (of size lim_neighbors)
                    # that makes X and Y independent:
                    for separating_set in combinations(
                            set(graph.neighbors(node)) - set([neighbor]),
                            lim_neighbors):
                        if is_independent(node, neighbor, separating_set):
                            separating_sets[frozenset(
                                (node, neighbor))] = separating_set
                            graph.remove_edge(node, neighbor)
                            break
            lim_neighbors += 1

        return graph, separating_sets
Example #23
0
class TestUndirectedGraphCreation(unittest.TestCase):
    def setUp(self):
        self.graph = UndirectedGraph()

    def test_class_init_without_data(self):
        self.assertIsInstance(self.graph, UndirectedGraph)

    def test_class_init_with_data_string(self):
        self.G = UndirectedGraph([("a", "b"), ("b", "c")])
        self.assertListEqual(sorted(self.G.nodes()), ["a", "b", "c"])
        self.assertListEqual(hf.recursive_sorted(self.G.edges()), [["a", "b"], ["b", "c"]])

    def test_add_node_string(self):
        self.graph.add_node("a")
        self.assertListEqual(self.graph.nodes(), ["a"])

    def test_add_node_nonstring(self):
        self.graph.add_node(1)
        self.assertListEqual(self.graph.nodes(), [1])

    def test_add_nodes_from_string(self):
        self.graph.add_nodes_from(["a", "b", "c", "d"])
        self.assertListEqual(sorted(self.graph.nodes()), ["a", "b", "c", "d"])

    def test_add_nodes_from_non_string(self):
        self.graph.add_nodes_from([1, 2, 3, 4])

    def test_add_edge_string(self):
        self.graph.add_edge("d", "e")
        self.assertListEqual(sorted(self.graph.nodes()), ["d", "e"])
        self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [["d", "e"]])
        self.graph.add_nodes_from(["a", "b", "c"])
        self.graph.add_edge("a", "b")
        self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [["a", "b"], ["d", "e"]])

    def test_add_edge_nonstring(self):
        self.graph.add_edge(1, 2)

    def test_add_edges_from_string(self):
        self.graph.add_edges_from([("a", "b"), ("b", "c")])
        self.assertListEqual(sorted(self.graph.nodes()), ["a", "b", "c"])
        self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [["a", "b"], ["b", "c"]])
        self.graph.add_nodes_from(["d", "e", "f"])
        self.graph.add_edges_from([("d", "e"), ("e", "f")])
        self.assertListEqual(sorted(self.graph.nodes()), ["a", "b", "c", "d", "e", "f"])
        self.assertListEqual(
            hf.recursive_sorted(self.graph.edges()),
            hf.recursive_sorted([("a", "b"), ("b", "c"), ("d", "e"), ("e", "f")]),
        )

    def test_add_edges_from_nonstring(self):
        self.graph.add_edges_from([(1, 2), (2, 3)])

    def test_number_of_neighbors(self):
        self.graph.add_edges_from([("a", "b"), ("b", "c")])
        self.assertEqual(len(self.graph.neighbors("b")), 2)

    def tearDown(self):
        del self.graph
Example #24
0
    def mmpc(self, significance_level=0.01):
        nodes = self.state_names.keys()

        def assoc(X, Y, Zs):
            """Measure for (conditional) association between variables. Use negative
            p-value of independence test.
            """
            return 1 - chi_square(X, Y, Zs, self.data)[1]

        def min_assoc(X, Y, Zs):
            "Minimal association of X, Y given any subset of Zs."
            return min(assoc(X, Y, Zs_subset) for Zs_subset in powerset(Zs))

        def max_min_heuristic(X, Zs):
            "Finds variable that maximizes min_assoc with `node` relative to `neighbors`."
            max_min_assoc = 0
            best_Y = None

            for Y in set(nodes) - set(Zs + [X]):
                min_assoc_val = min_assoc(X, Y, Zs)
                if min_assoc_val >= max_min_assoc:
                    best_Y = Y
                    max_min_assoc = min_assoc_val

            return (best_Y, max_min_assoc)

        # Find parents and children for each node
        neighbors = dict()
        for node in nodes:
            neighbors[node] = []

            # Forward Phase
            while True:
                new_neighbor, new_neighbor_min_assoc = max_min_heuristic(
                    node, neighbors[node])
                if new_neighbor_min_assoc > 0:
                    neighbors[node].append(new_neighbor)
                else:
                    break

            # Backward Phase
            for neigh in neighbors[node]:
                other_neighbors = [n for n in neighbors[node] if n != neigh]
                for sep_set in powerset(other_neighbors):
                    if self.test_conditional_independence(
                            node, neigh, sep_set):
                        neighbors[node].remove(neigh)
                        break

        # correct for false positives
        for node in nodes:
            print(node, ":", neighbors[node])
            print(neighbors[node])
            for neigh in neighbors[node]:
                if node not in neighbors[neigh]:
                    print("node-%s is removed" % neigh)
                    neighbors[node].remove(neigh)

        skel = UndirectedGraph()
        skel.add_nodes_from(nodes)
        for node in nodes:
            print(node, "->", neighbors[node])
            skel.add_edges_from([(node, neigh) for neigh in neighbors[node]])

        return skel
 def test_is_triangulated(self):
     G = UndirectedGraph([('A', 'B'), ('A', 'C'), ('B', 'D'), ('C', 'D')])
     self.assertFalse(G.is_triangulated())
     G.add_edge('A', 'D')
     self.assertTrue(G.is_triangulated())
Example #26
0
    def mmpc(self, data, nodes):
        """
        Estimates a graph skeleton (UndirectedGraph) for the data set, using the MMPC (max-min parents-and-children) algorithm.
        :return: graph skeleton
        """
        def is_independent(X, Y, Zs, cb_estimator):
            """
            Returns result of hypothesis test for the null hypothesis that
            X _|_ Y | Zs, using a chi2 statistic and threshold `significance_level`.
            """
            if (tuple(sorted([X, Y])), tuple(sorted(Zs))) in self.p_val_cache:
                p_value, sufficient_data = self.p_val_cache.get(
                    (tuple(sorted([X, Y])), tuple(sorted(Zs))))
            else:
                chi2, p_value, sufficient_data = cb_estimator.test_conditional_independence(
                    X, Y, Zs)
                self.p_val_cache.update({
                    (tuple(sorted([X, Y])), tuple(sorted(Zs))):
                    (p_value, sufficient_data)
                })
            return p_value >= self.alpha and sufficient_data

        def assoc(X, Y, Zs, cb_estimator):
            """
            Measure for (conditional) association between variables. Use negative
            p-value of independence test.
            """
            if (tuple(sorted([X, Y])), tuple(sorted(Zs))) in self.p_val_cache:
                p_value, sufficient_data = self.p_val_cache.get(
                    (tuple(sorted([X, Y])), tuple(sorted(Zs))))
            else:
                chi2, p_value, sufficient_data = cb_estimator.test_conditional_independence(
                    X, Y, Zs)
                self.p_val_cache.update({
                    (tuple(sorted([X, Y])), tuple(sorted(Zs))):
                    (p_value, sufficient_data)
                })
            return 1 - p_value

        def min_assoc(X, Y, Zs, cb_estimator):
            """
            Minimal association of X, Y given any subset of Zs.
            """
            min_association = float('inf')
            for size in range(min(self.max_reach, len(Zs)) + 1):
                partial_min_association = min(
                    assoc(X, Y, Zs_subset, cb_estimator)
                    for Zs_subset in combinations(Zs, size))
                if partial_min_association < min_association:
                    min_association = partial_min_association
            return min_association

        def max_min_heuristic(X, Zs):
            """
            Finds variable that maximizes min_assoc with `node` relative to `neighbors`.
            """
            max_min_assoc = 0
            best_Y = None

            for Y in set(nodes) - set(Zs + [X]):
                min_assoc_val = min_assoc(X, Y, Zs, cb_estimator)
                if min_assoc_val >= max_min_assoc:
                    best_Y = Y
                    max_min_assoc = min_assoc_val

            return best_Y, max_min_assoc

        cb_estimator = BaseEstimator(data=data, complete_samples_only=False)

        # Find parents and children for each node
        neighbors = dict()
        for node in nodes:
            neighbors[node] = []

            # Forward Phase
            while True:
                new_neighbor, new_neighbor_min_assoc = max_min_heuristic(
                    node, neighbors[node])
                if new_neighbor_min_assoc > 0:
                    neighbors[node].append(new_neighbor)
                else:
                    break

            # Backward Phase
            for neigh in neighbors[node]:
                other_neighbors = [n for n in neighbors[node] if n != neigh]
                sep_sets = [
                    sep_set for sep_set_size in range(
                        min(self.max_reach, len(other_neighbors)) + 1)
                    for sep_set in combinations(other_neighbors, sep_set_size)
                ]
                for sep_set in sep_sets:
                    if is_independent(node, neigh, sep_set, cb_estimator):
                        neighbors[node].remove(neigh)
                        break

        # correct for false positives
        for node in nodes:
            for neigh in neighbors[node]:
                if node not in neighbors[neigh]:
                    neighbors[node].remove(neigh)

        skel = UndirectedGraph()
        skel.add_nodes_from(nodes)
        for node in nodes:
            skel.add_edges_from([(node, neigh) for neigh in neighbors[node]])

        return skel
Example #27
0
 def test_check_clique(self):
     #clique graph
     graph = UndirectedGraph([(0, 1), (0, 2), (0, 3), (1, 2), (1, 3), (2, 3)])
     ret = graph.check_clique(graph.nodes())
     self.assertTrue(ret)
 def test_is_triangulated(self):
     G = UndirectedGraph([("A", "B"), ("A", "C"), ("B", "D"), ("C", "D")])
     self.assertFalse(G.is_triangulated())
     G.add_edge("A", "D")
     self.assertTrue(G.is_triangulated())
 def test_class_init_with_data_string(self):
     self.G = UndirectedGraph([("a", "b"), ("b", "c")])
     self.assertListEqual(sorted(self.G.nodes()), ["a", "b", "c"])
     self.assertListEqual(hf.recursive_sorted(self.G.edges()),
                          [["a", "b"], ["b", "c"]])
    def to_junction_tree(self):
        """
        Creates a junction tree (or clique tree) for a given markov model.

        For a given markov model (H) a junction tree (G) is a graph
        1. where each node in G corresponds to a maximal clique in H
        2. each sepset in G separates the variables strictly on one side of the
        edge to other.

        Examples
        --------
        >>> from pgmpy.models import MarkovModel
        >>> from pgmpy.factors.discrete import DiscreteFactor
        >>> mm = MarkovModel()
        >>> mm.add_nodes_from(['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7'])
        >>> mm.add_edges_from([('x1', 'x3'), ('x1', 'x4'), ('x2', 'x4'),
        ...                    ('x2', 'x5'), ('x3', 'x6'), ('x4', 'x6'),
        ...                    ('x4', 'x7'), ('x5', 'x7')])
        >>> phi = [DiscreteFactor(edge, [2, 2], np.random.rand(4)) for edge in mm.edges()]
        >>> mm.add_factors(*phi)
        >>> junction_tree = mm.to_junction_tree()
        """
        from pgmpy.models import JunctionTree

        # Check whether the model is valid or not
        self.check_model()

        # Triangulate the graph to make it chordal
        triangulated_graph = self.triangulate()

        # Find maximal cliques in the chordal graph
        cliques = list(map(tuple, nx.find_cliques(triangulated_graph)))

        # If there is only 1 clique, then the junction tree formed is just a
        # clique tree with that single clique as the node
        if len(cliques) == 1:
            clique_trees = JunctionTree()
            clique_trees.add_node(cliques[0])

        # Else if the number of cliques is more than 1 then create a complete
        # graph with all the cliques as nodes and weight of the edges being
        # the length of sepset between two cliques
        elif len(cliques) >= 2:
            complete_graph = UndirectedGraph()
            edges = list(itertools.combinations(cliques, 2))
            weights = list(map(lambda x: len(set(x[0]).intersection(set(x[1]))),
                           edges))
            for edge, weight in zip(edges, weights):
                complete_graph.add_edge(*edge, weight=-weight)

            # Create clique trees by minimum (or maximum) spanning tree method
            clique_trees = JunctionTree(nx.minimum_spanning_tree(complete_graph).edges())

        # Check whether the factors are defined for all the random variables or not
        all_vars = itertools.chain(*[factor.scope() for factor in self.factors])
        if set(all_vars) != set(self.nodes()):
            ValueError('DiscreteFactor for all the random variables not specified')

        # Dictionary stating whether the factor is used to create clique
        # potential or not
        # If false, then it is not used to create any clique potential
        is_used = {factor: False for factor in self.factors}

        for node in clique_trees.nodes():
            clique_factors = []
            for factor in self.factors:
                # If the factor is not used in creating any clique potential as
                # well as has any variable of the given clique in its scope,
                # then use it in creating clique potential
                if not is_used[factor] and set(factor.scope()).issubset(node):
                    clique_factors.append(factor)
                    is_used[factor] = True

            # To compute clique potential, initially set it as unity factor
            var_card = [self.get_cardinality()[x] for x in node]
            clique_potential = DiscreteFactor(node, var_card, np.ones(np.product(var_card)))
            # multiply it with the factors associated with the variables present
            # in the clique (or node)
            clique_potential *= factor_product(*clique_factors)
            clique_trees.add_factors(clique_potential)

        if not all(is_used.values()):
            raise ValueError('All the factors were not used to create Junction Tree.'
                             'Extra factors are defined.')

        return clique_trees
 def test_class_init_with_data_string(self):
     self.G = UndirectedGraph([('a', 'b'), ('b', 'c')])
     self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c'])
     self.assertListEqual(hf.recursive_sorted(self.G.edges()),
                          [['a', 'b'], ['b', 'c']])
Example #32
0
    def to_junction_tree(self):
        """
        Creates a junction tree (or clique tree) for a given markov model.

        For a given markov model (H) a junction tree (G) is a graph
        1. where each node in G corresponds to a maximal clique in H
        2. each sepset in G separates the variables strictly on one side of the
        edge to other.

        Examples
        --------
        >>> from pgmpy.models import MarkovModel
        >>> from pgmpy.factors import Factor
        >>> mm = MarkovModel()
        >>> mm.add_nodes_from(['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7'])
        >>> mm.add_edges_from([('x1', 'x3'), ('x1', 'x4'), ('x2', 'x4'),
        ...                    ('x2', 'x5'), ('x3', 'x6'), ('x4', 'x6'),
        ...                    ('x4', 'x7'), ('x5', 'x7')])
        >>> phi = [Factor(edge, [2, 2], np.random.rand(4)) for edge in mm.edges()]
        >>> mm.add_factors(*phi)
        >>> junction_tree = mm.to_junction_tree()
        """
        from pgmpy.models import JunctionTree

        # Check whether the model is valid or not
        self.check_model()

        # Triangulate the graph to make it chordal
        triangulated_graph = self.triangulate()

        # Find maximal cliques in the chordal graph
        cliques = list(map(tuple, nx.find_cliques(triangulated_graph)))

        # If there is only 1 clique, then the junction tree formed is just a
        # clique tree with that single clique as the node
        if len(cliques) == 1:
            clique_trees = JunctionTree()
            clique_trees.add_node(cliques[0])

        # Else if the number of cliques is more than 1 then create a complete
        # graph with all the cliques as nodes and weight of the edges being
        # the length of sepset between two cliques
        elif len(cliques) >= 2:
            complete_graph = UndirectedGraph()
            edges = list(itertools.combinations(cliques, 2))
            weights = list(map(lambda x: len(set(x[0]).intersection(set(x[1]))),
                           edges))
            for edge, weight in zip(edges, weights):
                complete_graph.add_edge(*edge, weight=-weight)

            # Create clique trees by minimum (or maximum) spanning tree method
            clique_trees = JunctionTree(nx.minimum_spanning_tree(complete_graph).edges())

        # Check whether the factors are defined for all the random variables or not
        all_vars = itertools.chain(*[factor.scope() for factor in self.factors])
        if set(all_vars) != set(self.nodes()):
            ValueError('Factor for all the random variables not specified')

        # Dictionary stating whether the factor is used to create clique
        # potential or not
        # If false, then it is not used to create any clique potential
        is_used = {factor: False for factor in self.factors}

        for node in clique_trees.nodes():
            clique_factors = []
            for factor in self.factors:
                # If the factor is not used in creating any clique potential as
                # well as has any variable of the given clique in its scope,
                # then use it in creating clique potential
                if not is_used[factor] and set(factor.scope()).issubset(node):
                    clique_factors.append(factor)
                    is_used[factor] = True

            # To compute clique potential, initially set it as unity factor
            var_card = [self.get_cardinality()[x] for x in node]
            clique_potential = Factor(node, var_card, np.ones(np.product(var_card)))
            # multiply it with the factors associated with the variables present
            # in the clique (or node)
            clique_potential *= factor_product(*clique_factors)
            clique_trees.add_factors(clique_potential)

        if not all(is_used.values()):
            raise ValueError('All the factors were not used to create Junction Tree.'
                             'Extra factors are defined.')

        return clique_trees
 def test_is_clique(self):
     G = UndirectedGraph([('A', 'B'), ('C', 'B'), ('B', 'D'), ('B', 'E'),
                          ('D', 'E'), ('E', 'F'), ('D', 'F'), ('B', 'F')])
     self.assertFalse(G.is_clique(nodes=['A', 'B', 'C', 'D']))
     self.assertTrue(G.is_clique(nodes=['B', 'D', 'E', 'F']))
     self.assertTrue(G.is_clique(nodes=['D', 'E', 'B']))