예제 #1
0
 def test_min_max_weights_exception(self):
     """ Check that w_range is valid """
     with pytest.raises(
             ValueError,
             match=
             "Absolute minimum weight must be less than or equal to maximum weight",
     ):
         generate_structure(4, 1, w_min=0.5, w_max=0)
예제 #2
0
    def test_f1score_generated_binary(self):
        """ Binary strucutre learned should have good f1 score """
        np.random.seed(10)
        sm = generate_structure(5, 2.0)
        df = generate_binary_data(sm,
                                  1000,
                                  intercept=False,
                                  noise_scale=0.1,
                                  seed=10)

        dist_type_schema = {i: "bin" for i in range(df.shape[1])}
        sm_fitted = from_numpy(
            df,
            dist_type_schema=dist_type_schema,
            lasso_beta=0.1,
            ridge_beta=0.0,
            w_threshold=0.1,
            use_bias=False,
        )

        right_edges = sm.edges
        n_predictions_made = len(sm_fitted.edges)
        n_correct_predictions = len(
            set(sm_fitted.edges).intersection(set(right_edges)))
        n_relevant_predictions = len(right_edges)

        precision = n_correct_predictions / n_predictions_made
        recall = n_correct_predictions / n_relevant_predictions
        f1_score = 2 * (precision * recall) / (precision + recall)

        assert f1_score > 0.8
예제 #3
0
    def test_f1score_generated_poisson(self):
        """ Poisson strucutre learned should have good f1 score """
        np.random.seed(10)
        sm = generate_structure(5, 3.0)
        df = generate_count_dataframe(
            sm, 1000, intercept=False, zero_inflation_factor=0.0, seed=10
        )
        df = np.asarray(df)

        dist_type_schema = {i: "poiss" for i in range(df.shape[1])}
        sm_fitted = from_numpy(
            df,
            dist_type_schema=dist_type_schema,
            lasso_beta=0.1,
            ridge_beta=0.0,
            w_threshold=0.1,
            use_bias=False,
        )

        right_edges = sm.edges
        n_predictions_made = len(sm_fitted.edges)
        n_correct_predictions = len(set(sm_fitted.edges).intersection(set(right_edges)))
        n_relevant_predictions = len(right_edges)

        precision = n_correct_predictions / n_predictions_made
        recall = n_correct_predictions / n_relevant_predictions
        f1_score = 2 * (precision * recall) / (precision + recall)

        assert f1_score > 0.7
예제 #4
0
 def test_weight_range(self, num_nodes, degree, w_range):
     """ Test that w_range is respected in output """
     w_min = w_range[0]
     w_max = w_range[1]
     sm = generate_structure(num_nodes, degree, w_min=w_min, w_max=w_max)
     assert all(abs(sm[u][v]["weight"]) >= w_min for u, v in sm.edges)
     assert all(abs(sm[u][v]["weight"]) <= w_max for u, v in sm.edges)
예제 #5
0
    def test_nonlinear_performance(self, standardize):
        np.random.seed(42)
        sm = dg.generate_structure(num_nodes=10, degree=3)
        sm.threshold_till_dag()
        data = dg.generate_continuous_dataframe(sm,
                                                n_samples=1000,
                                                intercept=True,
                                                seed=42,
                                                noise_scale=0.1,
                                                kernel=RBF(1))
        node = 1
        y = data.iloc[:, node]
        X = data.drop(node, axis=1)

        reg = DAGRegressor(
            alpha=0.0,
            l1_ratio=0.0,
            fit_intercept=True,
            dependent_target=True,
            enforce_dag=False,
            hidden_layer_units=[0],
            standardize=standardize,
        )
        linear_score = cross_val_score(reg,
                                       X,
                                       y,
                                       cv=KFold(shuffle=True,
                                                random_state=42)).mean()

        reg = DAGRegressor(
            alpha=0.1,
            l1_ratio=1.0,
            fit_intercept=True,
            enforce_dag=False,
            hidden_layer_units=[2],
            standardize=standardize,
        )
        small_nl_score = cross_val_score(reg,
                                         X,
                                         y,
                                         cv=KFold(shuffle=True,
                                                  random_state=42)).mean()

        reg = DAGRegressor(
            alpha=0.1,
            l1_ratio=1.0,
            fit_intercept=True,
            enforce_dag=False,
            hidden_layer_units=[4],
            standardize=standardize,
        )
        medium_nl_score = cross_val_score(reg,
                                          X,
                                          y,
                                          cv=KFold(shuffle=True,
                                                   random_state=42)).mean()

        assert small_nl_score > linear_score
        assert medium_nl_score > small_nl_score
예제 #6
0
 def test_returns_ndarray(self, distribution):
     """Return value is an ndarray - test over all sem_types"""
     graph_type, degree, d_nodes = "erdos-renyi", 4, 10
     sm = generate_structure(d_nodes, degree, graph_type)
     ndarray = generate_continuous_data(sm,
                                        distribution=distribution,
                                        n_samples=10)
     assert isinstance(ndarray, np.ndarray)
예제 #7
0
 def test_returns_dataframe(self, distribution):
     """Return value is an ndarray - test over all sem_types"""
     graph_type, degree, d_nodes = "erdos-renyi", 4, 10
     sm = generate_structure(d_nodes, degree, graph_type)
     ndarray = generate_categorical_dataframe(sm,
                                              distribution=distribution,
                                              n_samples=10)
     assert isinstance(ndarray, pd.DataFrame)
예제 #8
0
 def test_bad_distribution_type(self):
     """Test that invalid sem-type other than "probit", "normal", "logit" is not accepted"""
     graph_type, degree, d_nodes = "erdos-renyi", 4, 10
     sm = generate_structure(d_nodes, degree, graph_type)
     with pytest.raises(ValueError, match="Unknown binary distribution"):
         generate_binary_data(sm,
                              distribution="invalid",
                              n_samples=10,
                              seed=10)
예제 #9
0
    def test_erdos_renyi_degree_increases_edges(self):
        """ Erdos-Renyi degree increases edges """
        edge_counts = [
            max([
                len(generate_structure(100, degree, "erdos-renyi").edges)
                for _ in range(10)
            ]) for degree in [10, 90]
        ]

        assert edge_counts == sorted(edge_counts)
예제 #10
0
    def test_barabasi_albert_degree_increases_edges(self):
        """ Barabasi-Albert degree increases edges """
        edge_counts = [
            max([
                len(generate_structure(100, degree, "barabasi-albert").edges)
                for _ in range(10)
            ]) for degree in [10, 90]
        ]

        assert edge_counts == sorted(edge_counts)
예제 #11
0
 def test_bad_distribution_type(self):
     """Test that invalid sem-type other than "gaussian", "normal", "student-t",
     "exponential", "gumbel" is not accepted"""
     graph_type, degree, d_nodes = "erdos-renyi", 4, 10
     sm = generate_structure(d_nodes, degree, graph_type)
     with pytest.raises(ValueError,
                        match="Unknown continuous distribution"):
         generate_continuous_data(sm,
                                  distribution="invalid",
                                  n_samples=10,
                                  seed=10)
예제 #12
0
    def test_nonlinear_performance(self, standardize):
        torch.manual_seed(42)
        np.random.seed(42)
        sm = dg.generate_structure(num_nodes=5, degree=3)
        sm.threshold_till_dag()
        data = dg.generate_continuous_dataframe(
            sm, n_samples=200, intercept=True, seed=42, noise_scale=0.1, kernel=RBF(1)
        )
        node = 1
        y = data.iloc[:, node]
        X = data.drop(node, axis=1)

        reg = DAGRegressor(
            alpha=0.0,
            fit_intercept=True,
            dependent_target=True,
            hidden_layer_units=[0],
            standardize=standardize,
        )
        linear_score = cross_val_score(
            reg, X, y, cv=KFold(n_splits=3, shuffle=True, random_state=42)
        ).mean()

        reg = DAGRegressor(
            alpha=0.1,
            fit_intercept=True,
            dependent_target=True,
            hidden_layer_units=[2],
            standardize=standardize,
        )
        small_nl_score = cross_val_score(
            reg, X, y, cv=KFold(n_splits=3, shuffle=True, random_state=42)
        ).mean()

        assert small_nl_score > linear_score or np.isclose(
            small_nl_score, linear_score, atol=1e-5
        )
예제 #13
0
def generate_structure_dynamic(  # pylint: disable=too-many-arguments
    num_nodes: int,
    p: int,
    degree_intra: float,
    degree_inter: float,
    graph_type_intra: str = "erdos-renyi",
    graph_type_inter: str = "erdos-renyi",
    w_min_intra: float = 0.5,
    w_max_intra: float = 0.5,
    w_min_inter: float = 0.5,
    w_max_inter: float = 0.5,
    w_decay: float = 1.0,
) -> StructureModel:
    """
    Generates a dynamic DAG at random.

    Args:
        num_nodes: Number of nodes
        p: maximum lag to be considered in the structure
        degree_intra: expected degree on nodes from the current state
        degree_inter: expected degree on nodes from the lagged nodes
        graph_type_intra:
            - erdos-renyi: constructs a graph such that the probability of any given edge is degree / (num_nodes - 1)
            - barabasi-albert: constructs a scale-free graph from an initial connected graph of (degree / 2) nodes
            - full: constructs a fully-connected graph - degree has no effect
        graph_type_inter:
            - erdos-renyi: constructs a graph such that the probability of any given edge is degree / (num_nodes - 1)
            - full: connect all past nodes to all present nodes
        w_min_intra: minimum weight for intra-slice nodes
        w_max_intra: maximum weight for intra-slice nodes
        w_min_inter: minimum weight for inter-slice nodes
        w_max_inter: maximum weight for inter-slice nodes
        w_decay: exponent of weights decay for slices that are farther apart. Default is 1.0, which implies no decay

    Raises:
        ValueError: if graph type unknown or `num_nodes < 2`

    Returns:
        StructureModel containing all simulated nodes and edges (intra- and inter-slice)
    """
    sm_intra = generate_structure(
        num_nodes=num_nodes,
        degree=degree_intra,
        graph_type=graph_type_intra,
        w_min=w_min_intra,
        w_max=w_max_intra,
    )
    sm_inter = _generate_inter_structure(
        num_nodes=num_nodes,
        p=p,
        degree=degree_inter,
        graph_type=graph_type_inter,
        w_min=w_min_inter,
        w_max=w_max_inter,
        w_decay=w_decay,
    )
    res = StructureModel()
    res.add_nodes_from(sm_inter.nodes)
    res.add_nodes_from([f"{u}_lag0" for u in sm_intra.nodes])
    res.add_weighted_edges_from(sm_inter.edges.data("weight"))
    res.add_weighted_edges_from([(f"{u}_lag0", f"{v}_lag0", w)
                                 for u, v, w in sm_intra.edges.data("weight")])
    return res
예제 #14
0
 def test_is_dag_graph_type(self, graph_type):
     """ Tests that the generated graph is a dag for all graph types. """
     degree, d_nodes = 4, 10
     sm = generate_structure(d_nodes, degree, graph_type)
     assert is_directed_acyclic_graph(sm)
예제 #15
0
 def test_expected_num_nodes(self, num_nodes, degree):
     """ Test that generated structure has expected number of nodes = num_nodes """
     sm = generate_structure(num_nodes, degree)
     assert len(sm.nodes) == num_nodes
예제 #16
0
 def test_min_max_weights_equal(self):
     """ If w_range (w, w) has w=w, check abs value of all weights respect this """
     w = 1
     sm = generate_structure(4, 1, w_min=w, w_max=w)
     w_mat = nx.to_numpy_array(sm)
     assert np.all((w_mat == 0) | (w_mat == w) | (w_mat == -w))
예제 #17
0
 def test_is_dag_nodes_degrees(self, num_nodes, degree):
     """ Tests that generated graph is dag for different numbers of nodes and degrees
     """
     sm = generate_structure(num_nodes, degree)
     assert nx.is_directed_acyclic_graph(sm)
예제 #18
0
 def test_num_nodes_exception(self, num_nodes):
     """ Check a single node graph can't be generated """
     with pytest.raises(ValueError, match="DAG must have at least 2 nodes"):
         generate_structure(num_nodes, 1)
예제 #19
0
    def test_full_network(self):
        """ Fully connected network has expected edge counts """
        sm = generate_structure(40, degree=0, graph_type="full")

        assert len(sm.edges) == (40 * 39) / 2
예제 #20
0
 def test_bad_graph_type(self):
     """ Test that a value other than "erdos-renyi", "barabasi-albert", "full" throws ValueError """
     graph_type = "invalid"
     degree, d_nodes = 4, 10
     with pytest.raises(ValueError, match="unknown graph type"):
         generate_structure(d_nodes, degree, graph_type)