Example #1
0
    def test_independence(self, graph_gen, seed, num_nodes):
        """
        test whether the relation is accurate, implicitely tests sequence of
        nodes.
        """

        sm = graph_gen(num_nodes=num_nodes, seed=seed, weight=None)
        nodes = sm.nodes()

        df = generate_binary_dataframe(
            sm,
            n_samples=100000,
            distribution="normal",
            seed=seed,
            noise_scale=0.5,
            intercept=False,
        )

        tol = 0.05

        for node in nodes:
            if node == "aa":
                continue
            joint_proba, factored_proba = calculate_proba(df, "aa", node)
            if node == "ab":
                # this is the only link
                assert not np.isclose(
                    joint_proba, factored_proba, atol=tol, rtol=0), df.mean()
            else:
                assert np.isclose(joint_proba,
                                  factored_proba,
                                  atol=tol,
                                  rtol=0)
Example #2
0
    def test_independence(self, graph_gen, seed, num_nodes, n_categories,
                          distribution):
        """
        test whether the relation is accurate, implicitely tests sequence of
        nodes.
        """
        sm = graph_gen(num_nodes=num_nodes, seed=seed, weight=None)
        nodes = sm.nodes()

        df = generate_categorical_dataframe(
            sm,
            n_samples=100000,
            distribution=distribution,
            n_categories=n_categories,
            seed=seed,
            noise_scale=1,
            intercept=False,
        )

        tol = 0.05

        # independent links
        for node in nodes:
            if node == "aa":
                continue
            joint_proba, factored_proba = calculate_proba(
                df, "aa_0", node + "_0")
            if node == "ab":
                assert not np.isclose(
                    joint_proba, factored_proba, rtol=tol, atol=0)
            else:
                assert np.isclose(joint_proba,
                                  factored_proba,
                                  rtol=tol,
                                  atol=0)
Example #3
0
    def test_order_is_correct(self, graph_gen, num_nodes, seed):
        """
        Check if the order of the nodes is the same order as `sm.nodes`, which in turn is the same order as the
        adjacency matrix.

        To do so, we create graphs with degree in {0,1} by doing permutations on identity.
        The edge values are always 100 and the noise is 1, so we expect `edge_from` < `edge_to` in absolute value
        almost every time.
        """
        sm = graph_gen(num_nodes=num_nodes, seed=seed, weight=None)
        nodes = sm.nodes()
        node_seq = {node: ix for ix, node in enumerate(sm.nodes())}

        data = generate_binary_data(
            sm,
            n_samples=10000,
            distribution="normal",
            seed=seed,
            noise_scale=0.1,
            intercept=False,
        )
        tol = 0.15
        # since we dont have an intercept, the mean proba for the parent is 0.5,
        # which has the highest possible std for a binary feature (std= p(1-p)),
        # hence, any child necessarily has a lower probability.
        assert data[:, node_seq["aa"]].std() > data[:, node_seq["ab"]].std()

        for node in nodes:
            if node == "aa":
                continue
            joint_proba, factored_proba = calculate_proba(
                data, node_seq["aa"], node_seq[node])
            if node == "ab":
                # this is the only link
                assert not np.isclose(
                    joint_proba, factored_proba, rtol=tol, atol=0)
            else:
                assert np.isclose(joint_proba,
                                  factored_proba,
                                  rtol=tol,
                                  atol=0)