Ejemplo n.º 1
0
    def test_independence(self, graph_gen, seed, num_nodes):
        """
        test whether the relation is accurate, implicitely tests sequence of
        nodes.
        """

        sm = graph_gen(num_nodes=num_nodes, seed=seed, weight=None)
        nodes = sm.nodes()

        df = generate_binary_dataframe(
            sm,
            n_samples=100000,
            distribution="normal",
            seed=seed,
            noise_scale=0.5,
            intercept=False,
        )

        tol = 0.05

        for node in nodes:
            if node == "aa":
                continue
            joint_proba, factored_proba = calculate_proba(df, "aa", node)
            if node == "ab":
                # this is the only link
                assert not np.isclose(
                    joint_proba, factored_proba, atol=tol, rtol=0), df.mean()
            else:
                assert np.isclose(joint_proba,
                                  factored_proba,
                                  atol=tol,
                                  rtol=0)
Ejemplo n.º 2
0
    def test_f1score_generated_binary(self):
        """ Binary strucutre learned should have good f1 score """
        np.random.seed(10)
        sm = generate_structure(5, 2.0)
        df = generate_binary_dataframe(sm,
                                       1000,
                                       intercept=False,
                                       noise_scale=0.1,
                                       seed=10)

        dist_type_schema = {i: "bin" for i in range(df.shape[1])}
        sm_fitted = from_pandas(
            df,
            dist_type_schema=dist_type_schema,
            lasso_beta=0.1,
            ridge_beta=0.0,
            w_threshold=0.1,
            use_bias=False,
        )

        right_edges = sm.edges
        n_predictions_made = len(sm_fitted.edges)
        n_correct_predictions = len(
            set(sm_fitted.edges).intersection(set(right_edges)))
        n_relevant_predictions = len(right_edges)

        precision = n_correct_predictions / n_predictions_made
        recall = n_correct_predictions / n_relevant_predictions
        f1_score = 2 * (precision * recall) / (precision + recall)

        assert f1_score > 0.8
Ejemplo n.º 3
0
    def test_dataframe(self, graph, distribution, noise_std, intercept, seed,
                       kernel):
        """
        Tests equivalence of dataframe wrapper
        """
        data = generate_binary_data(
            graph,
            100,
            distribution,
            noise_scale=noise_std,
            seed=seed,
            intercept=intercept,
            kernel=kernel,
        )
        df = generate_binary_dataframe(
            graph,
            100,
            distribution,
            noise_scale=noise_std,
            seed=seed,
            intercept=intercept,
            kernel=kernel,
        )

        assert np.array_equal(data, df[list(graph.nodes())].values)