Example #1
0
    def test_f1score_generated(self, adjacency_mat_num_stability):
        """Structure learnt from regularisation should have very high f1 score relative to the ground truth"""
        df = pd.DataFrame(
            adjacency_mat_num_stability,
            columns=["a", "b", "c", "d", "e"],
            index=["a", "b", "c", "d", "e"],
        )
        train_model = StructureModel(df.values)
        X = generate_continuous_dataframe(StructureModel(df),
                                          50,
                                          noise_scale=1,
                                          seed=1)
        g = from_numpy(X[["a", "b", "c", "d", "e"]].values,
                       lasso_beta=0.1,
                       w_threshold=0.25)
        right_edges = train_model.edges

        n_predictions_made = len(g.edges)
        n_correct_predictions = len(
            set(g.edges).intersection(set(right_edges)))
        n_relevant_predictions = len(right_edges)
        precision = n_correct_predictions / n_predictions_made
        recall = n_correct_predictions / n_relevant_predictions
        f1_score = 2 * (precision * recall) / (precision + recall)

        assert f1_score > 0.85
Example #2
0
    def test_dataframe(self, graph, distribution, noise_std, intercept, seed,
                       kernel):
        """
        Tests equivalence of dataframe wrapper
        """
        data = generate_continuous_data(
            graph,
            1000,
            distribution,
            noise_scale=noise_std,
            seed=seed,
            intercept=intercept,
            kernel=kernel,
        )
        df = generate_continuous_dataframe(
            graph,
            1000,
            distribution,
            noise_scale=noise_std,
            seed=seed,
            intercept=intercept,
            kernel=kernel,
        )

        assert np.array_equal(data, df[list(graph.nodes())].values)
Example #3
0
    def test_nonlinear_performance(self, standardize):
        np.random.seed(42)
        sm = dg.generate_structure(num_nodes=10, degree=3)
        sm.threshold_till_dag()
        data = dg.generate_continuous_dataframe(sm,
                                                n_samples=1000,
                                                intercept=True,
                                                seed=42,
                                                noise_scale=0.1,
                                                kernel=RBF(1))
        node = 1
        y = data.iloc[:, node]
        X = data.drop(node, axis=1)

        reg = DAGRegressor(
            alpha=0.0,
            l1_ratio=0.0,
            fit_intercept=True,
            dependent_target=True,
            enforce_dag=False,
            hidden_layer_units=[0],
            standardize=standardize,
        )
        linear_score = cross_val_score(reg,
                                       X,
                                       y,
                                       cv=KFold(shuffle=True,
                                                random_state=42)).mean()

        reg = DAGRegressor(
            alpha=0.1,
            l1_ratio=1.0,
            fit_intercept=True,
            enforce_dag=False,
            hidden_layer_units=[2],
            standardize=standardize,
        )
        small_nl_score = cross_val_score(reg,
                                         X,
                                         y,
                                         cv=KFold(shuffle=True,
                                                  random_state=42)).mean()

        reg = DAGRegressor(
            alpha=0.1,
            l1_ratio=1.0,
            fit_intercept=True,
            enforce_dag=False,
            hidden_layer_units=[4],
            standardize=standardize,
        )
        medium_nl_score = cross_val_score(reg,
                                          X,
                                          y,
                                          cv=KFold(shuffle=True,
                                                   random_state=42)).mean()

        assert small_nl_score > linear_score
        assert medium_nl_score > small_nl_score
Example #4
0
    def test_nonlinear_performance(self, standardize):
        torch.manual_seed(42)
        np.random.seed(42)
        sm = dg.generate_structure(num_nodes=5, degree=3)
        sm.threshold_till_dag()
        data = dg.generate_continuous_dataframe(
            sm, n_samples=200, intercept=True, seed=42, noise_scale=0.1, kernel=RBF(1)
        )
        node = 1
        y = data.iloc[:, node]
        X = data.drop(node, axis=1)

        reg = DAGRegressor(
            alpha=0.0,
            fit_intercept=True,
            dependent_target=True,
            hidden_layer_units=[0],
            standardize=standardize,
        )
        linear_score = cross_val_score(
            reg, X, y, cv=KFold(n_splits=3, shuffle=True, random_state=42)
        ).mean()

        reg = DAGRegressor(
            alpha=0.1,
            fit_intercept=True,
            dependent_target=True,
            hidden_layer_units=[2],
            standardize=standardize,
        )
        small_nl_score = cross_val_score(
            reg, X, y, cv=KFold(n_splits=3, shuffle=True, random_state=42)
        ).mean()

        assert small_nl_score > linear_score or np.isclose(
            small_nl_score, linear_score, atol=1e-5
        )