Ejemplo n.º 1
0
    def test_nrows_data_mismatch_raises_error(self):
        """
        Providing input data and lagged data with different number of rows should result in a Value Error.
        """

        with pytest.raises(
                ValueError,
                match="Input data X and Xlags must have the same number of rows"
        ):
            from_numpy_dynamic(np.zeros([5, 5]), np.zeros([6, 5]))
Ejemplo n.º 2
0
    def test_ncols_lagged_data_not_multiple_raises_error(self):
        """
        Number of columns of lagged data is not a multiple of those of input data should result in a Value Error.
        """

        with pytest.raises(
                ValueError,
                match=
                "Number of columns of Xlags must be a multiple of number of columns of X",
        ):
            from_numpy_dynamic(np.zeros([5, 5]), np.zeros([5, 6]))
Ejemplo n.º 3
0
    def test_single_iter_gets_converged_fail_warnings(self, data_dynotears_p1):
        """
        With a single iteration on this dataset, learn_structure fails to converge and should give warnings.
        """

        with pytest.warns(
                UserWarning,
                match=r"Failed to converge\. Consider increasing max_iter."):
            from_numpy_dynamic(data_dynotears_p1["X"],
                               data_dynotears_p1["Y"],
                               max_iter=1)
Ejemplo n.º 4
0
    def test_tabu_edges_on_non_existing_edges_do_nothing(
            self, data_dynotears_p2):
        """If tabu edges do not exist in the original unconstrained network then nothing changes"""
        sm = from_numpy_dynamic(data_dynotears_p2["X"],
                                data_dynotears_p2["Y"],
                                w_threshold=0.2)

        sm_2 = from_numpy_dynamic(
            data_dynotears_p2["X"],
            data_dynotears_p2["Y"],
            w_threshold=0.2,
            tabu_edges=[(0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3)],
        )
        assert set(sm_2.edges) == set(sm.edges)
Ejemplo n.º 5
0
    def test_empty_data_raises_error(self):
        """
        Providing an empty data set should result in a Value Error explaining that data must not be empty.
        This error is useful to catch and handle gracefully, because otherwise the user would experience
        misleading division by zero, or unpacking errors.
        """

        with pytest.raises(
                ValueError,
                match="Input data X is empty, cannot learn any structure"):
            from_numpy_dynamic(np.empty([0, 5]), np.zeros([5, 5]))
        with pytest.raises(
                ValueError,
                match="Input data Xlags is empty, cannot learn any structure"):
            from_numpy_dynamic(np.zeros([5, 5]), np.empty([0, 5]))
Ejemplo n.º 6
0
 def test_edges_contain_weight(self, data_dynotears_p2):
     """Edges must contain the 'weight' from the adjacent table """
     sm = from_numpy_dynamic(data_dynotears_p2["X"], data_dynotears_p2["Y"])
     assert np.all([
         isinstance(w, (float, int, np.number))
         for u, v, w in sm.edges(data="weight")
     ])
Ejemplo n.º 7
0
 def test_isolated_nodes_exist(self, data_dynotears_p2):
     """Isolated nodes should still be in the learned structure"""
     sm = from_numpy_dynamic(data_dynotears_p2["X"],
                             data_dynotears_p2["Y"],
                             w_threshold=1)
     assert len(sm.edges) == 2
     assert len(sm.nodes) == 15
Ejemplo n.º 8
0
    def test_expected_structure_learned_p2(self, data_dynotears_p2):
        """
        Given a small data set with p=2, all the intra-slice must be correct, and 90%+ found.
        the majority of the inter edges must be found too
        """

        sm = from_numpy_dynamic(data_dynotears_p2["X"],
                                data_dynotears_p2["Y"],
                                w_threshold=0.25)
        w_edges = [("{i}_lag0".format(i=i), "{j}_lag0".format(j=j))
                   for i in range(5) for j in range(5)
                   if data_dynotears_p2["W"][i, j] != 0]
        a_edges = [("{i_1}_lag{i_2}".format(i_1=i % 5, i_2=1 + i // 5),
                    "{j}_lag0".format(j=j)) for i in range(5) for j in range(5)
                   if data_dynotears_p2["A"][i, j] != 0]

        edges_in_sm_and_a = [el for el in sm.edges if el in a_edges]
        sm_inter_edges = [el for el in sm.edges if "lag0" not in el[0]]
        sm_intra_edges = [el for el in sm.edges if "lag0" in el[0]]

        assert len([el for el in sm_intra_edges if el not in w_edges]) == 0
        assert (len([el for el in w_edges if el not in sm_intra_edges]) /
                len(w_edges) <= 1.0)
        assert len(edges_in_sm_and_a) / len(a_edges) > 0.5
        assert len(edges_in_sm_and_a) / len(sm_inter_edges) > 0.5
Ejemplo n.º 9
0
    def test_tabu_children(self, data_dynotears_p2):
        """
        If tabu relationships are set, the corresponding edges must not exist
        """
        sm = from_numpy_dynamic(
            data_dynotears_p2["X"],
            data_dynotears_p2["Y"],
            tabu_child_nodes=[4],
        )
        assert not ([el for el in sm.edges if "4_lag" in el[1]])

        sm = from_numpy_dynamic(
            data_dynotears_p2["X"],
            data_dynotears_p2["Y"],
            tabu_child_nodes=[1],
        )
        assert not ([el for el in sm.edges if "1_lag" in el[1]])
Ejemplo n.º 10
0
    def test_no_cycles(self, data_dynotears_p2):
        """
        The learned structure should be acyclic
        """

        sm = from_numpy_dynamic(data_dynotears_p2["X"],
                                data_dynotears_p2["Y"],
                                w_threshold=0.05)
        assert nx.algorithms.is_directed_acyclic_graph(sm)
Ejemplo n.º 11
0
 def test_all_columns_in_structure(self, data_dynotears_p2):
     """Every columns that is in the data should become a node in the learned structure"""
     sm = from_numpy_dynamic(
         data_dynotears_p2["X"],
         data_dynotears_p2["Y"],
     )
     assert sorted(sm.nodes) == [
         f"{var}_lag{l_val}" for var in range(5) for l_val in range(3)
     ]
Ejemplo n.º 12
0
    def test_inter_edges(self, data_dynotears_p3):
        """
        inter-slice edges must be {var}_lag{l} -> {var'}_lag0 , l > 0
        """

        sm = from_numpy_dynamic(data_dynotears_p3["X"], data_dynotears_p3["Y"])

        for start, end in sm.edges:
            if int(start[-1]) > 0:
                assert int(end[-1]) == 0
Ejemplo n.º 13
0
    def test_naming_nodes(self, data_dynotears_p3):
        """
        Nodes should have the format {var}_lag{l}
        """
        sm = from_numpy_dynamic(data_dynotears_p3["X"], data_dynotears_p3["Y"])
        pattern = re.compile(r"[0-5]_lag[0-3]")

        for node in sm.nodes:
            match = pattern.match(node)
            assert match
            assert match.group() == node
Ejemplo n.º 14
0
    def test_inverse_relationships_get_negative_weight(self):
        """If a == -b always, ther should be an edge a->b or b->a with coefficient close to minus one """

        np.random.seed(17)
        data = pd.DataFrame([[el, -el]
                             for el in np.random.choice(100, size=500)],
                            columns=["a", "b"])
        sm = from_numpy_dynamic(data.values[1:],
                                data.values[:-1],
                                w_threshold=0.1)
        edge = (sm.get_edge_data("1_lag0", "0_lag0")
                or sm.get_edge_data("0_lag0", "1_lag0"))["weight"]
        assert -1.01 < edge <= -0.99
Ejemplo n.º 15
0
    def test_tabu_edges(self, data_dynotears_p2):
        """
        Tabu edges must not be in the edges learnt
        """
        sm = from_numpy_dynamic(
            data_dynotears_p2["X"],
            data_dynotears_p2["Y"],
            tabu_edges=[(0, 2, 4), (0, 0, 3), (1, 1, 4), (1, 3, 4)],
        )

        assert ("2_lag0", "4_lag0") not in sm.edges
        assert ("0_lag0", "3_lag0") not in sm.edges
        assert ("1_lag1", "4_lag0") not in sm.edges
        assert ("3_lag1", "4_lag0") not in sm.edges
Ejemplo n.º 16
0
    def test_certain_relationships_get_near_certain_weight(self):
        """If a == b always, ther should be an edge a->b or b->a with coefficient close to one """

        np.random.seed(17)
        data = pd.DataFrame(
            [[np.sqrt(el), np.sqrt(el)]
             for el in np.random.choice(100, size=500)],
            columns=["a", "b"],
        )
        sm = from_numpy_dynamic(data.values[1:],
                                data.values[:-1],
                                w_threshold=0.1)
        edge = (sm.get_edge_data("1_lag0", "0_lag0")
                or sm.get_edge_data("0_lag0", "1_lag0"))["weight"]

        assert 0.99 < edge <= 1.01
Ejemplo n.º 17
0
    def test_multiple_tabu(self, data_dynotears_p2):
        """
        If tabu relationships are set, the corresponding edges must not exist
        """
        sm = from_numpy_dynamic(
            data_dynotears_p2["X"],
            data_dynotears_p2["Y"],
            tabu_edges=[(0, 1, 4), (0, 0, 3), (1, 1, 4), (1, 3, 4)],
            tabu_child_nodes=[0, 1],
            tabu_parent_nodes=[3],
        )

        assert ("1_lag0", "4_lag0") not in sm.edges
        assert ("0_lag0", "3_lag0") not in sm.edges
        assert ("1_lag1", "4_lag0") not in sm.edges
        assert ("3_lag1", "4_lag0") not in sm.edges
        assert not ([el for el in sm.edges if "0_lag" in el[1]])
        assert not ([el for el in sm.edges if "1_lag" in el[1]])
        assert not ([el for el in sm.edges if "3_lag" in el[0]])
Ejemplo n.º 18
0
    def test_expected_structure_learned_p1(self, data_dynotears_p1):
        """
        Given a small data set with p=1, find all the intra-slice edges and the majority of the inter-slice ones
        """

        sm = from_numpy_dynamic(data_dynotears_p1["X"],
                                data_dynotears_p1["Y"],
                                w_threshold=0.2)
        w_edges = [(f"{i}_lag0", f"{j}_lag0") for i in range(5)
                   for j in range(5) if data_dynotears_p1["W"][i, j] != 0]
        a_edges = [(f"{i % 5}_lag{1 + i // 5}", f"{j}_lag0") for i in range(5)
                   for j in range(5) if data_dynotears_p1["A"][i, j] != 0]

        edges_in_sm_and_a = [el for el in sm.edges if el in a_edges]
        sm_inter_edges = [el for el in sm.edges if "lag0" not in el[0]]

        assert sorted([el for el in sm.edges
                       if "lag0" in el[0]]) == sorted(w_edges)
        assert len(edges_in_sm_and_a) / len(a_edges) > 0.6
        assert len(edges_in_sm_and_a) / len(sm_inter_edges) > 0.9