def evaluate_single_graph(df_samples, graph, bn_truth, nb_repeat=3): testing_graph = BayesianModel() testing_graph.add_nodes_from(bn_truth.causal_graph.nodes()) for edge in remove_bidirected_edges(graph.edges()): try: testing_graph.add_edge(edge[0], edge[1]) except Exception as e: try: testing_graph.add_edge(edge[1], edge[0]) except Exception as e: print(e) continue testing_graph.fit(df_samples, estimator=BayesianEstimator) testing_graph.check_model() bn_test = BayesianNetwork(testing_graph) set_observe(bn_test.bn) set_observe(bn_truth.bn) bn_truth.set_state_names() bn_test.set_state_names() return { 'SID': SID(bn_truth.causal_graph, bn_test.causal_graph), 'SHD': SHD(bn_truth.causal_graph, bn_test.causal_graph), 'OD': np.mean([ ODist(bn_truth, bn_test, 1000, discrete=True) for _ in range(nb_repeat) ]), 'ID': np.mean([ IDist(bn_truth, bn_test, 1000, discrete=True) for _ in range(nb_repeat) ]) }
from cdt.causality.graph import CCDr from cdt.causality.graph import CGNN from cdt.causality.graph import GES from cdt.causality.graph import LiNGAM from cdt.causality.graph import PC from cdt.causality.graph import CAM from cdt.utils.graph import clr import networkx as nx device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Read the data data = pd.read_csv('combined.csv', header=None) true_CM = pd.read_csv('true_CM.csv', header=None) true_CM = np.array(true_CM) start_time = time.time() aupr_matrix, dir_adj_matrix = CDVAE() end_time = time.time() - start_time print("--- Execution time : %4.4s seconds ---" % end_time) #Retrieve SHD and AUPR shd = SHD(true_CM, dir_adj_matrix, double_for_anticausal=False) print('SHD:', shd) aupr, curve = precision_recall(true_CM, aupr_matrix) print('AUPR for aupr_matrix:', aupr)
def evaluate_single_graph(df_samples, graph, bn_truth, nb_repeat=1): testing_graph = BayesianModel() testing_graph.add_nodes_from(bn_truth.causal_graph.nodes()) for edge in remove_bidirected_edges(graph.edges()): try: testing_graph.add_edge(edge[0], edge[1]) except Exception as e: try: testing_graph.add_edge(edge[1], edge[0]) except Exception as e: print(e) continue testing_graph.fit(df_samples, estimator=BayesianEstimator) testing_graph.check_model() bn_test = BayesianNetwork(testing_graph) # print(bn_test.causal_graph.edges()) # exit() # model_test = BayesianModel() # model_test.add_nodes_from(graph.nodes()) # for a in graph_diff: # model_test.add_node(a) # for edge in remove_bidirected_edges(graph.edges()): # try: # model_test.add_edge(edge) # except Exception as e: # rev_edge = (edge[1], edge[0]) # try: # model_test.add_edge(rev_edge) # except Exception as e: # continue # model_test.fit(df_samples, estimator=BayesianEstimator) # model_test.check_model() # bn_test = BayesianNetwork(model_test) set_observe(bn_test.bn) set_observe(bn_truth.bn) bn_truth.set_state_names() bn_test.set_state_names() # mapping = dict((i, str(i)) for i in bn_truth.bn.nodes()) # nx.relabel_nodes(bn_truth.bn.graph, mapping) # nx.relabel_nodes(bn_test.bn.graph, mapping) # print() # print(bn_truth.causal_graph.edges()) # print(testing_graph.edges()) # print(SID(bn_truth.causal_graph, bn_test.causal_graph)) # print(SHD(bn_truth.causal_graph, bn_test.causal_graph)) return { 'SID': SID(bn_truth.causal_graph, bn_test.causal_graph), 'SHD': SHD(bn_truth.causal_graph, bn_test.causal_graph), 'Tau': Tau(bn_truth.causal_graph, bn_test.causal_graph), 'OD': np.mean([ ODist(bn_truth, bn_test, 1000, discrete=True) for _ in range(nb_repeat) ]), 'ID': np.mean([ IDist(bn_truth, bn_test, 1000, discrete=True) for _ in range(nb_repeat) ]) }
def test_SHD(): assert SHD(*init()) == 2 assert SHD(*init(), double_for_anticausal=False) == 1
def graph_score(self, test_graph: nx.DiGraph) -> float: pr_score, _ = precision_recall(self._ground_truth_graph, test_graph) shd_score = SHD(self._ground_truth_graph, test_graph) return float(pr_score - shd_score) # Higher better
def baselines(data): # Tests start_time = time.time() obj = PC() output = obj.predict(data) adj_mat = nx.adjacency_matrix(output).todense() output = clr(adj_mat) output[np.isnan(output)] = 0 output[np.isposinf(output)] = 1 predicted = retrieve_adjacency_matrix(output) true_matrix = pd.read_csv('true_CM.csv', header=None) true_matrix = np.array(true_matrix) shd = SHD(np.array(true_matrix), predicted, double_for_anticausal=False) aupr, curve = precision_recall(np.array(true_matrix), output) end_time = (time.time() - start_time) print("--- Execution time : %4.4s seconds ---" % end_time) results_pc = ['PC', aupr, shd, end_time] print(results_pc) # Tests start_time = time.time() obj = GES() output = obj.predict(data) adj_mat = nx.adjacency_matrix(output).todense() output = clr(adj_mat) output[np.isnan(output)] = 0 output[np.isposinf(output)] = 1 predicted = retrieve_adjacency_matrix(output) true_matrix = pd.read_csv('true_CM.csv', header=None) true_matrix = np.array(true_matrix) shd = SHD(np.array(true_matrix), predicted, double_for_anticausal=False) aupr, curve = precision_recall(np.array(true_matrix), output) end_time = (time.time() - start_time) print("--- Execution time : %4.4s seconds ---" % end_time) results_ges = ['GES', aupr, shd, end_time] print(results_ges) # Tests start_time = time.time() obj = LiNGAM() output = obj.predict(data) adj_mat = nx.adjacency_matrix(output).todense() output = clr(adj_mat) output[np.isnan(output)] = 0 output[np.isposinf(output)] = 1 predicted = retrieve_adjacency_matrix(output) true_matrix = pd.read_csv('true_CM.csv', header=None) true_matrix = np.array(true_matrix) shd = SHD(np.array(true_matrix), predicted, double_for_anticausal=False) aupr, curve = precision_recall(np.array(true_matrix), output) end_time = (time.time() - start_time) print("--- Execution time : %4.4s seconds ---" % end_time) results_lingam = ['LiNGAM', aupr, shd, end_time] print(results_lingam) # Tests start_time = time.time() obj = CCDr() output = obj.predict(data) adj_mat = nx.adjacency_matrix(output).todense() output = clr(adj_mat) output[np.isnan(output)] = 0 output[np.isposinf(output)] = 1 predicted = retrieve_adjacency_matrix(output) true_matrix = pd.read_csv('true_CM.csv', header=None) true_matrix = np.array(true_matrix) shd = SHD(np.array(true_matrix), predicted, double_for_anticausal=False) aupr, curve = precision_recall(np.array(true_matrix), output) end_time = (time.time() - start_time) print("--- Execution time : %4.4s seconds ---" % end_time) results_ccdr = ['CCDR', aupr, shd, end_time] print(results_ccdr) return results_pc, results_ges, results_lingam, results_ccdr
results_ccdr = ['CCDR', aupr, shd, end_time] print(results_ccdr) return results_pc, results_ges, results_lingam, results_ccdr # Tests start_time = time.time() obj = CGNN(nruns=1, train_epochs=500, test_epochs=500) output = obj.predict(data) adj_mat = nx.adjacency_matrix(output).todense() output = clr(adj_mat) output[np.isnan(output)] = 0 output[np.isposinf(output)] = 1 predicted = retrieve_adjacency_matrix(output) true_matrix = pd.read_csv('true_CM.csv', header=None) true_matrix = np.array(true_matrix) shd = SHD(np.array(true_matrix), predicted, double_for_anticausal=False) aupr, curve = precision_recall(np.array(true_matrix), output) end_time = (time.time() - start_time) print("--- Execution time : %4.4s seconds ---" % end_time) results_cgnn = ['CGNN', aupr, shd, end_time] print(results_cgnn)