def test_f1score_generated(self, adjacency_mat_num_stability): """Structure learnt from regularisation should have very high f1 score relative to the ground truth""" df = pd.DataFrame( adjacency_mat_num_stability, columns=["a", "b", "c", "d", "e"], index=["a", "b", "c", "d", "e"], ) train_model = StructureModel(df.values) X = generate_continuous_dataframe(StructureModel(df), 50, noise_scale=1, seed=1) g = from_numpy(X[["a", "b", "c", "d", "e"]].values, lasso_beta=0.1, w_threshold=0.25) right_edges = train_model.edges n_predictions_made = len(g.edges) n_correct_predictions = len( set(g.edges).intersection(set(right_edges))) n_relevant_predictions = len(right_edges) precision = n_correct_predictions / n_predictions_made recall = n_correct_predictions / n_relevant_predictions f1_score = 2 * (precision * recall) / (precision + recall) assert f1_score > 0.85
def test_dataframe(self, graph, distribution, noise_std, intercept, seed, kernel): """ Tests equivalence of dataframe wrapper """ data = generate_continuous_data( graph, 1000, distribution, noise_scale=noise_std, seed=seed, intercept=intercept, kernel=kernel, ) df = generate_continuous_dataframe( graph, 1000, distribution, noise_scale=noise_std, seed=seed, intercept=intercept, kernel=kernel, ) assert np.array_equal(data, df[list(graph.nodes())].values)
def test_nonlinear_performance(self, standardize): np.random.seed(42) sm = dg.generate_structure(num_nodes=10, degree=3) sm.threshold_till_dag() data = dg.generate_continuous_dataframe(sm, n_samples=1000, intercept=True, seed=42, noise_scale=0.1, kernel=RBF(1)) node = 1 y = data.iloc[:, node] X = data.drop(node, axis=1) reg = DAGRegressor( alpha=0.0, l1_ratio=0.0, fit_intercept=True, dependent_target=True, enforce_dag=False, hidden_layer_units=[0], standardize=standardize, ) linear_score = cross_val_score(reg, X, y, cv=KFold(shuffle=True, random_state=42)).mean() reg = DAGRegressor( alpha=0.1, l1_ratio=1.0, fit_intercept=True, enforce_dag=False, hidden_layer_units=[2], standardize=standardize, ) small_nl_score = cross_val_score(reg, X, y, cv=KFold(shuffle=True, random_state=42)).mean() reg = DAGRegressor( alpha=0.1, l1_ratio=1.0, fit_intercept=True, enforce_dag=False, hidden_layer_units=[4], standardize=standardize, ) medium_nl_score = cross_val_score(reg, X, y, cv=KFold(shuffle=True, random_state=42)).mean() assert small_nl_score > linear_score assert medium_nl_score > small_nl_score
def test_nonlinear_performance(self, standardize): torch.manual_seed(42) np.random.seed(42) sm = dg.generate_structure(num_nodes=5, degree=3) sm.threshold_till_dag() data = dg.generate_continuous_dataframe( sm, n_samples=200, intercept=True, seed=42, noise_scale=0.1, kernel=RBF(1) ) node = 1 y = data.iloc[:, node] X = data.drop(node, axis=1) reg = DAGRegressor( alpha=0.0, fit_intercept=True, dependent_target=True, hidden_layer_units=[0], standardize=standardize, ) linear_score = cross_val_score( reg, X, y, cv=KFold(n_splits=3, shuffle=True, random_state=42) ).mean() reg = DAGRegressor( alpha=0.1, fit_intercept=True, dependent_target=True, hidden_layer_units=[2], standardize=standardize, ) small_nl_score = cross_val_score( reg, X, y, cv=KFold(n_splits=3, shuffle=True, random_state=42) ).mean() assert small_nl_score > linear_score or np.isclose( small_nl_score, linear_score, atol=1e-5 )