Ejemplo n.º 1
0
def random_dag(number_of_nodes: int = 5,
               edge_density: float = 0.4,
               max_in_degree: int = 4) -> DAG:
    """Create a connected, random directed acyclic graph (DAG), with the given number of nodes,
    the given edge density, and with no node exceeding having too high in degree"""
    node_names = [f"X{i}" for i in range(number_of_nodes)]
    dag = DAG()

    # First make sure the dag is connected
    visited = list()
    unvisited = list(node_names)
    node = random.choice(unvisited)
    unvisited.remove(node)
    visited.append(node)
    dag.add_node(node)

    while unvisited:
        node = random.choice(unvisited)
        neighbor = random.choice(visited)
        if node_names.index(node) < node_names.index(
                neighbor) and dag.in_degree(neighbor) < max_in_degree:
            dag.add_edge(node, neighbor)
        elif node_names.index(neighbor) < node_names.index(node):
            dag.add_edge(neighbor, node)
        else:
            continue
        unvisited.remove(node)
        visited.append(node)

    # Then add edges until desired density is reached
    maximum_number_of_edges = number_of_nodes * (number_of_nodes - 1) / 2
    while dag.number_of_edges() < int(edge_density * maximum_number_of_edges):
        add_random_edge(dag, node_names)

    return dag
Ejemplo n.º 2
0
def add_random_edge(dag: DAG,
                    node_order: List[str],
                    max_in_degree: int = 4) -> None:
    """Add a random edge to the graph, that respects the given node_order, and
    also doesn't add a link if the sampled node has maximal in_degree already.

    It may not add any edge.
    """
    n1, n2 = random.sample(node_order, 2)
    if node_order.index(n1) < node_order.index(n2) and dag.in_degree(
            n2) < max_in_degree:
        dag.add_edge(n1, n2)
    elif node_order.index(n2) < node_order.index(n1) and dag.in_degree(
            n1) < max_in_degree:
        dag.add_edge(n2, n1)
Ejemplo n.º 3
0
class TestDAGCreation(unittest.TestCase):
    def setUp(self):
        self.graph = DAG()

    def test_class_init_without_data(self):
        self.assertIsInstance(self.graph, DAG)

    def test_class_init_with_data_string(self):
        self.graph = DAG([("a", "b"), ("b", "c")])
        self.assertListEqual(sorted(self.graph.nodes()), ["a", "b", "c"])
        self.assertListEqual(
            hf.recursive_sorted(self.graph.edges()), [["a", "b"], ["b", "c"]]
        )

    def test_add_node_string(self):
        self.graph.add_node("a")
        self.assertListEqual(list(self.graph.nodes()), ["a"])

    def test_add_node_nonstring(self):
        self.graph.add_node(1)

    def test_add_nodes_from_string(self):
        self.graph.add_nodes_from(["a", "b", "c", "d"])
        self.assertListEqual(sorted(self.graph.nodes()), ["a", "b", "c", "d"])

    def test_add_nodes_from_non_string(self):
        self.graph.add_nodes_from([1, 2, 3, 4])

    def test_add_node_weight(self):
        self.graph.add_node("weighted_a", 0.3)
        self.assertEqual(self.graph.nodes["weighted_a"]["weight"], 0.3)

    def test_add_nodes_from_weight(self):
        self.graph.add_nodes_from(["weighted_b", "weighted_c"], [0.5, 0.6])
        self.assertEqual(self.graph.nodes["weighted_b"]["weight"], 0.5)
        self.assertEqual(self.graph.nodes["weighted_c"]["weight"], 0.6)

        self.graph.add_nodes_from(["e", "f"])
        self.assertEqual(self.graph.nodes["e"]["weight"], None)
        self.assertEqual(self.graph.nodes["f"]["weight"], None)

    def test_add_edge_string(self):
        self.graph.add_edge("d", "e")
        self.assertListEqual(sorted(self.graph.nodes()), ["d", "e"])
        self.assertListEqual(list(self.graph.edges()), [("d", "e")])
        self.graph.add_nodes_from(["a", "b", "c"])
        self.graph.add_edge("a", "b")
        self.assertListEqual(
            hf.recursive_sorted(self.graph.edges()), [["a", "b"], ["d", "e"]]
        )

    def test_add_edge_nonstring(self):
        self.graph.add_edge(1, 2)

    def test_add_edges_from_string(self):
        self.graph.add_edges_from([("a", "b"), ("b", "c")])
        self.assertListEqual(sorted(self.graph.nodes()), ["a", "b", "c"])
        self.assertListEqual(
            hf.recursive_sorted(self.graph.edges()), [["a", "b"], ["b", "c"]]
        )
        self.graph.add_nodes_from(["d", "e", "f"])
        self.graph.add_edges_from([("d", "e"), ("e", "f")])
        self.assertListEqual(sorted(self.graph.nodes()), ["a", "b", "c", "d", "e", "f"])
        self.assertListEqual(
            hf.recursive_sorted(self.graph.edges()),
            hf.recursive_sorted([("a", "b"), ("b", "c"), ("d", "e"), ("e", "f")]),
        )

    def test_add_edges_from_nonstring(self):
        self.graph.add_edges_from([(1, 2), (2, 3)])

    def test_add_edge_weight(self):
        self.graph.add_edge("a", "b", weight=0.3)
        if nx.__version__.startswith("1"):
            self.assertEqual(self.graph.edge["a"]["b"]["weight"], 0.3)
        else:
            self.assertEqual(self.graph.adj["a"]["b"]["weight"], 0.3)

    def test_add_edges_from_weight(self):
        self.graph.add_edges_from([("b", "c"), ("c", "d")], weights=[0.5, 0.6])
        if nx.__version__.startswith("1"):
            self.assertEqual(self.graph.edge["b"]["c"]["weight"], 0.5)
            self.assertEqual(self.graph.edge["c"]["d"]["weight"], 0.6)

            self.graph.add_edges_from([("e", "f")])
            self.assertEqual(self.graph.edge["e"]["f"]["weight"], None)
        else:
            self.assertEqual(self.graph.adj["b"]["c"]["weight"], 0.5)
            self.assertEqual(self.graph.adj["c"]["d"]["weight"], 0.6)

            self.graph.add_edges_from([("e", "f")])
            self.assertEqual(self.graph.adj["e"]["f"]["weight"], None)

    def test_update_node_parents_bm_constructor(self):
        self.graph = DAG([("a", "b"), ("b", "c")])
        self.assertListEqual(list(self.graph.predecessors("a")), [])
        self.assertListEqual(list(self.graph.predecessors("b")), ["a"])
        self.assertListEqual(list(self.graph.predecessors("c")), ["b"])

    def test_update_node_parents(self):
        self.graph.add_nodes_from(["a", "b", "c"])
        self.graph.add_edges_from([("a", "b"), ("b", "c")])
        self.assertListEqual(list(self.graph.predecessors("a")), [])
        self.assertListEqual(list(self.graph.predecessors("b")), ["a"])
        self.assertListEqual(list(self.graph.predecessors("c")), ["b"])

    def test_get_leaves(self):
        self.graph.add_edges_from(
            [("A", "B"), ("B", "C"), ("B", "D"), ("D", "E"), ("D", "F"), ("A", "G")]
        )
        self.assertEqual(sorted(self.graph.get_leaves()), sorted(["C", "G", "E", "F"]))

    def test_get_roots(self):
        self.graph.add_edges_from(
            [("A", "B"), ("B", "C"), ("B", "D"), ("D", "E"), ("D", "F"), ("A", "G")]
        )
        self.assertEqual(["A"], self.graph.get_roots())
        self.graph.add_edge("H", "G")
        self.assertEqual(sorted(["A", "H"]), sorted(self.graph.get_roots()))

    def test_init_with_cycle(self):
        self.assertRaises(ValueError, DAG, [("a", "a")])
        self.assertRaises(ValueError, DAG, [("a", "b"), ("b", "a")])
        self.assertRaises(ValueError, DAG, [("a", "b"), ("b", "c"), ("c", "a")])

    def tearDown(self):
        del self.graph
Ejemplo n.º 4
0
    def pdag_to_dag(pdag):
        """Completes a PDAG to a DAG, without adding v-structures, if such a
        completion exists. If no faithful extension is possible, some fully
        oriented DAG that corresponds to the PDAG is returned and a warning is
        generated. This is a static method.

        Parameters
        ----------
        pdag: DAG
            A directed acyclic graph pattern, consisting in (acyclic) directed edges
            as well as "undirected" edges, represented as both-way edges between
            nodes.

        Returns
        -------
        dag: DAG
            A faithful orientation of pdag, if one exists. Otherwise any
            fully orientated DAG/BayesianModel with the structure of pdag.

        References
        ----------
        [1] Chickering, Learning Equivalence Classes of Bayesian-Network Structures,
            2002; See page 454 (last paragraph) for the algorithm pdag_to_dag
            http://www.jmlr.org/papers/volume2/chickering02a/chickering02a.pdf
        [2] Dor & Tarsi, A simple algorithm to construct a consistent extension
            of a partially oriented graph, 1992,
            http://ftp.cs.ucla.edu/pub/stat_ser/r185-dor-tarsi.pdf

        Examples
        --------
        >>> import pandas as pd
        >>> import numpy as np
        >>> from pgmpy.base import DAG
        >>> from pgmpy.estimators import ConstraintBasedEstimator
        >>> data = pd.DataFrame(np.random.randint(0, 4, size=(5000, 3)), columns=list('ABD'))
        >>> data['C'] = data['A'] - data['B']
        >>> data['D'] += data['A']
        >>> c = ConstraintBasedEstimator(data)
        >>> pdag = c.skeleton_to_pdag(*c.estimate_skeleton())
        >>> pdag.edges()
        [('B', 'C'), ('D', 'A'), ('A', 'D'), ('A', 'C')]
        >>> c.pdag_to_dag(pdag).edges()
        [('B', 'C'), ('A', 'D'), ('A', 'C')]

        >>> # pdag_to_dag is static:
        ... pdag1 = DAG([('A', 'B'), ('C', 'B'), ('C', 'D'), ('D', 'C'), ('D', 'A'), ('A', 'D')])
        >>> ConstraintBasedEstimator.pdag_to_dag(pdag1).edges()
        [('D', 'C'), ('C', 'B'), ('A', 'B'), ('A', 'D')]

        >>> # example of a pdag with no faithful extension:
        ... pdag2 = DAG([('A', 'B'), ('A', 'C'), ('B', 'C'), ('C', 'B')])
        >>> ConstraintBasedEstimator.pdag_to_dag(pdag2).edges()
        UserWarning: PDAG has no faithful extension (= no oriented DAG with the same v-structures as PDAG).
        Remaining undirected PDAG edges oriented arbitrarily.
        [('B', 'C'), ('A', 'B'), ('A', 'C')]
        """

        pdag = pdag.copy()
        dag = DAG()
        dag.add_nodes_from(pdag.nodes())

        # add already directed edges of pdag to dag
        for X, Y in pdag.edges():
            if not pdag.has_edge(Y, X):
                dag.add_edge(X, Y)

        while pdag.number_of_nodes() > 0:
            # find node with (1) no directed outgoing edges and
            #                (2) the set of undirected neighbors is either empty or
            #                    undirected neighbors + parents of X are a clique
            found = False
            for X in pdag.nodes():
                directed_outgoing_edges = set(pdag.successors(X)) - set(
                    pdag.predecessors(X))
                undirected_neighbors = set(pdag.successors(X)) & set(
                    pdag.predecessors(X))
                neighbors_are_clique = all((pdag.has_edge(Y, Z)
                                            for Z in pdag.predecessors(X)
                                            for Y in undirected_neighbors
                                            if not Y == Z))

                if not directed_outgoing_edges and (not undirected_neighbors
                                                    or neighbors_are_clique):
                    found = True
                    # add all edges of X as outgoing edges to dag
                    for Y in pdag.predecessors(X):
                        dag.add_edge(Y, X)

                    pdag.remove_node(X)
                    break

            if not found:
                warn(
                    "PDAG has no faithful extension (= no oriented DAG with the "
                    +
                    "same v-structures as PDAG). Remaining undirected PDAG edges "
                    + "oriented arbitrarily.")
                for X, Y in pdag.edges():
                    if not dag.has_edge(Y, X):
                        try:
                            dag.add_edge(X, Y)
                        except ValueError:
                            pass
                break

        return dag