def test_min_max_weights_exception(self): """ Check that w_range is valid """ with pytest.raises( ValueError, match= "Absolute minimum weight must be less than or equal to maximum weight", ): generate_structure(4, 1, w_min=0.5, w_max=0)
def test_f1score_generated_binary(self): """ Binary strucutre learned should have good f1 score """ np.random.seed(10) sm = generate_structure(5, 2.0) df = generate_binary_data(sm, 1000, intercept=False, noise_scale=0.1, seed=10) dist_type_schema = {i: "bin" for i in range(df.shape[1])} sm_fitted = from_numpy( df, dist_type_schema=dist_type_schema, lasso_beta=0.1, ridge_beta=0.0, w_threshold=0.1, use_bias=False, ) right_edges = sm.edges n_predictions_made = len(sm_fitted.edges) n_correct_predictions = len( set(sm_fitted.edges).intersection(set(right_edges))) n_relevant_predictions = len(right_edges) precision = n_correct_predictions / n_predictions_made recall = n_correct_predictions / n_relevant_predictions f1_score = 2 * (precision * recall) / (precision + recall) assert f1_score > 0.8
def test_f1score_generated_poisson(self): """ Poisson strucutre learned should have good f1 score """ np.random.seed(10) sm = generate_structure(5, 3.0) df = generate_count_dataframe( sm, 1000, intercept=False, zero_inflation_factor=0.0, seed=10 ) df = np.asarray(df) dist_type_schema = {i: "poiss" for i in range(df.shape[1])} sm_fitted = from_numpy( df, dist_type_schema=dist_type_schema, lasso_beta=0.1, ridge_beta=0.0, w_threshold=0.1, use_bias=False, ) right_edges = sm.edges n_predictions_made = len(sm_fitted.edges) n_correct_predictions = len(set(sm_fitted.edges).intersection(set(right_edges))) n_relevant_predictions = len(right_edges) precision = n_correct_predictions / n_predictions_made recall = n_correct_predictions / n_relevant_predictions f1_score = 2 * (precision * recall) / (precision + recall) assert f1_score > 0.7
def test_weight_range(self, num_nodes, degree, w_range): """ Test that w_range is respected in output """ w_min = w_range[0] w_max = w_range[1] sm = generate_structure(num_nodes, degree, w_min=w_min, w_max=w_max) assert all(abs(sm[u][v]["weight"]) >= w_min for u, v in sm.edges) assert all(abs(sm[u][v]["weight"]) <= w_max for u, v in sm.edges)
def test_nonlinear_performance(self, standardize): np.random.seed(42) sm = dg.generate_structure(num_nodes=10, degree=3) sm.threshold_till_dag() data = dg.generate_continuous_dataframe(sm, n_samples=1000, intercept=True, seed=42, noise_scale=0.1, kernel=RBF(1)) node = 1 y = data.iloc[:, node] X = data.drop(node, axis=1) reg = DAGRegressor( alpha=0.0, l1_ratio=0.0, fit_intercept=True, dependent_target=True, enforce_dag=False, hidden_layer_units=[0], standardize=standardize, ) linear_score = cross_val_score(reg, X, y, cv=KFold(shuffle=True, random_state=42)).mean() reg = DAGRegressor( alpha=0.1, l1_ratio=1.0, fit_intercept=True, enforce_dag=False, hidden_layer_units=[2], standardize=standardize, ) small_nl_score = cross_val_score(reg, X, y, cv=KFold(shuffle=True, random_state=42)).mean() reg = DAGRegressor( alpha=0.1, l1_ratio=1.0, fit_intercept=True, enforce_dag=False, hidden_layer_units=[4], standardize=standardize, ) medium_nl_score = cross_val_score(reg, X, y, cv=KFold(shuffle=True, random_state=42)).mean() assert small_nl_score > linear_score assert medium_nl_score > small_nl_score
def test_returns_ndarray(self, distribution): """Return value is an ndarray - test over all sem_types""" graph_type, degree, d_nodes = "erdos-renyi", 4, 10 sm = generate_structure(d_nodes, degree, graph_type) ndarray = generate_continuous_data(sm, distribution=distribution, n_samples=10) assert isinstance(ndarray, np.ndarray)
def test_returns_dataframe(self, distribution): """Return value is an ndarray - test over all sem_types""" graph_type, degree, d_nodes = "erdos-renyi", 4, 10 sm = generate_structure(d_nodes, degree, graph_type) ndarray = generate_categorical_dataframe(sm, distribution=distribution, n_samples=10) assert isinstance(ndarray, pd.DataFrame)
def test_bad_distribution_type(self): """Test that invalid sem-type other than "probit", "normal", "logit" is not accepted""" graph_type, degree, d_nodes = "erdos-renyi", 4, 10 sm = generate_structure(d_nodes, degree, graph_type) with pytest.raises(ValueError, match="Unknown binary distribution"): generate_binary_data(sm, distribution="invalid", n_samples=10, seed=10)
def test_erdos_renyi_degree_increases_edges(self): """ Erdos-Renyi degree increases edges """ edge_counts = [ max([ len(generate_structure(100, degree, "erdos-renyi").edges) for _ in range(10) ]) for degree in [10, 90] ] assert edge_counts == sorted(edge_counts)
def test_barabasi_albert_degree_increases_edges(self): """ Barabasi-Albert degree increases edges """ edge_counts = [ max([ len(generate_structure(100, degree, "barabasi-albert").edges) for _ in range(10) ]) for degree in [10, 90] ] assert edge_counts == sorted(edge_counts)
def test_bad_distribution_type(self): """Test that invalid sem-type other than "gaussian", "normal", "student-t", "exponential", "gumbel" is not accepted""" graph_type, degree, d_nodes = "erdos-renyi", 4, 10 sm = generate_structure(d_nodes, degree, graph_type) with pytest.raises(ValueError, match="Unknown continuous distribution"): generate_continuous_data(sm, distribution="invalid", n_samples=10, seed=10)
def test_nonlinear_performance(self, standardize): torch.manual_seed(42) np.random.seed(42) sm = dg.generate_structure(num_nodes=5, degree=3) sm.threshold_till_dag() data = dg.generate_continuous_dataframe( sm, n_samples=200, intercept=True, seed=42, noise_scale=0.1, kernel=RBF(1) ) node = 1 y = data.iloc[:, node] X = data.drop(node, axis=1) reg = DAGRegressor( alpha=0.0, fit_intercept=True, dependent_target=True, hidden_layer_units=[0], standardize=standardize, ) linear_score = cross_val_score( reg, X, y, cv=KFold(n_splits=3, shuffle=True, random_state=42) ).mean() reg = DAGRegressor( alpha=0.1, fit_intercept=True, dependent_target=True, hidden_layer_units=[2], standardize=standardize, ) small_nl_score = cross_val_score( reg, X, y, cv=KFold(n_splits=3, shuffle=True, random_state=42) ).mean() assert small_nl_score > linear_score or np.isclose( small_nl_score, linear_score, atol=1e-5 )
def generate_structure_dynamic( # pylint: disable=too-many-arguments num_nodes: int, p: int, degree_intra: float, degree_inter: float, graph_type_intra: str = "erdos-renyi", graph_type_inter: str = "erdos-renyi", w_min_intra: float = 0.5, w_max_intra: float = 0.5, w_min_inter: float = 0.5, w_max_inter: float = 0.5, w_decay: float = 1.0, ) -> StructureModel: """ Generates a dynamic DAG at random. Args: num_nodes: Number of nodes p: maximum lag to be considered in the structure degree_intra: expected degree on nodes from the current state degree_inter: expected degree on nodes from the lagged nodes graph_type_intra: - erdos-renyi: constructs a graph such that the probability of any given edge is degree / (num_nodes - 1) - barabasi-albert: constructs a scale-free graph from an initial connected graph of (degree / 2) nodes - full: constructs a fully-connected graph - degree has no effect graph_type_inter: - erdos-renyi: constructs a graph such that the probability of any given edge is degree / (num_nodes - 1) - full: connect all past nodes to all present nodes w_min_intra: minimum weight for intra-slice nodes w_max_intra: maximum weight for intra-slice nodes w_min_inter: minimum weight for inter-slice nodes w_max_inter: maximum weight for inter-slice nodes w_decay: exponent of weights decay for slices that are farther apart. Default is 1.0, which implies no decay Raises: ValueError: if graph type unknown or `num_nodes < 2` Returns: StructureModel containing all simulated nodes and edges (intra- and inter-slice) """ sm_intra = generate_structure( num_nodes=num_nodes, degree=degree_intra, graph_type=graph_type_intra, w_min=w_min_intra, w_max=w_max_intra, ) sm_inter = _generate_inter_structure( num_nodes=num_nodes, p=p, degree=degree_inter, graph_type=graph_type_inter, w_min=w_min_inter, w_max=w_max_inter, w_decay=w_decay, ) res = StructureModel() res.add_nodes_from(sm_inter.nodes) res.add_nodes_from([f"{u}_lag0" for u in sm_intra.nodes]) res.add_weighted_edges_from(sm_inter.edges.data("weight")) res.add_weighted_edges_from([(f"{u}_lag0", f"{v}_lag0", w) for u, v, w in sm_intra.edges.data("weight")]) return res
def test_is_dag_graph_type(self, graph_type): """ Tests that the generated graph is a dag for all graph types. """ degree, d_nodes = 4, 10 sm = generate_structure(d_nodes, degree, graph_type) assert is_directed_acyclic_graph(sm)
def test_expected_num_nodes(self, num_nodes, degree): """ Test that generated structure has expected number of nodes = num_nodes """ sm = generate_structure(num_nodes, degree) assert len(sm.nodes) == num_nodes
def test_min_max_weights_equal(self): """ If w_range (w, w) has w=w, check abs value of all weights respect this """ w = 1 sm = generate_structure(4, 1, w_min=w, w_max=w) w_mat = nx.to_numpy_array(sm) assert np.all((w_mat == 0) | (w_mat == w) | (w_mat == -w))
def test_is_dag_nodes_degrees(self, num_nodes, degree): """ Tests that generated graph is dag for different numbers of nodes and degrees """ sm = generate_structure(num_nodes, degree) assert nx.is_directed_acyclic_graph(sm)
def test_num_nodes_exception(self, num_nodes): """ Check a single node graph can't be generated """ with pytest.raises(ValueError, match="DAG must have at least 2 nodes"): generate_structure(num_nodes, 1)
def test_full_network(self): """ Fully connected network has expected edge counts """ sm = generate_structure(40, degree=0, graph_type="full") assert len(sm.edges) == (40 * 39) / 2
def test_bad_graph_type(self): """ Test that a value other than "erdos-renyi", "barabasi-albert", "full" throws ValueError """ graph_type = "invalid" degree, d_nodes = 4, 10 with pytest.raises(ValueError, match="unknown graph type"): generate_structure(d_nodes, degree, graph_type)