def explain(self, num_samples=10, percentage=50, top_node=None, p_threshold=0.05, pred_threshold=0.1): num_nodes = self.X_feat.shape[0] if top_node == None: top_node = int(num_nodes / 20) # Round 1 Samples = self.batch_perturb_features_on_node(int(num_samples / 2), range(num_nodes), percentage, p_threshold, pred_threshold) data = pd.DataFrame(Samples) est = ConstraintBasedEstimator(data) p_values = [] candidate_nodes = [] target = num_nodes # The entry for the graph classification data is at "num_nodes" for node in range(num_nodes): chi2, p = chi_square(node, target, [], data) p_values.append(p) number_candidates = int(top_node * 4) candidate_nodes = np.argpartition( p_values, number_candidates)[0:number_candidates] # Round 2 Samples = self.batch_perturb_features_on_node(num_samples, candidate_nodes, percentage, p_threshold, pred_threshold) data = pd.DataFrame(Samples) est = ConstraintBasedEstimator(data) p_values = [] dependent_nodes = [] target = num_nodes for node in range(num_nodes): chi2, p = chi_square(node, target, [], data) p_values.append(p) if p < p_threshold: dependent_nodes.append(node) top_p = np.min((top_node, num_nodes - 1)) ind_top_p = np.argpartition(p_values, top_p)[0:top_p] pgm_nodes = list(ind_top_p) return pgm_nodes, p_values, candidate_nodes
def test_conditional_independence(self, X, Y, Zs=[], method="chi_square", tol=0.01, **kwargs): if method == "chi_square": param, p_value = chi_square(X=X, Y=Y, Z=Zs, data=self.data, state_names=self.state_names) if p_value >= tol: return True else: return False elif method == "pearsonr": param, p_value = pearsonr(X=X, Y=Y, Z=Zs, data=self.data, **kwargs) if abs(param) <= tol: return True else: return False
def assoc(X, Y, Zs): """Measure for (conditional) association between variables. Use negative p-value of independence test. """ return 1 - chi_square(X, Y, Zs, self.data)[1]
def explain(self, node_idx, num_samples=100, top_node=None, p_threshold=0.05, pred_threshold=0.1): print("Explaining node: " + str(node_idx)) nA = self.n_hops_A(self.num_layers) node_idx_new, sub_A, sub_X, neighbors = self.extract_n_hops_neighbors( nA, node_idx) if (node_idx not in neighbors): neighbors = np.append(neighbors, node_idx) X_torch = torch.tensor([self.X], dtype=torch.float) A_torch = torch.tensor([self.A], dtype=torch.float) pred_torch, _ = self.model.forward(X_torch, A_torch) soft_pred = np.asarray([ softmax(np.asarray(pred_torch[0][node_].data)) for node_ in range(self.X.shape[0]) ]) pred_node = np.asarray(pred_torch[0][node_idx].data) label_node = np.argmax(pred_node) soft_pred_node = softmax(pred_node) Samples = [] Pred_Samples = [] for iteration in range(num_samples): X_perturb = self.X.copy() sample = [] for node in neighbors: seed = np.random.randint(2) if seed == 1: latent = 1 X_perturb = self.perturb_features_on_node(X_perturb, node, random=seed) else: latent = 0 sample.append(latent) X_perturb_torch = torch.tensor([X_perturb], dtype=torch.float) pred_perturb_torch, _ = self.model.forward(X_perturb_torch, A_torch) soft_pred_perturb = np.asarray([ softmax(np.asarray(pred_perturb_torch[0][node_].data)) for node_ in range(self.X.shape[0]) ]) sample_bool = [] for node in neighbors: if (soft_pred_perturb[node, np.argmax(soft_pred[node])] + pred_threshold) < np.max(soft_pred[node]): sample_bool.append(1) else: sample_bool.append(0) Samples.append(sample) Pred_Samples.append(sample_bool) Samples = np.asarray(Samples) Pred_Samples = np.asarray(Pred_Samples) Combine_Samples = Samples - Samples for s in range(Samples.shape[0]): Combine_Samples[s] = np.asarray([ Samples[s, i] * 10 + Pred_Samples[s, i] + 1 for i in range(Samples.shape[1]) ]) data = pd.DataFrame(Combine_Samples) ind_sub_to_ori = dict(zip(list(data.columns), neighbors)) data = data.rename(columns={ 0: "A", 1: "B" }) # Trick to use chi_square test on first two data columns ind_ori_to_sub = dict(zip(neighbors, list(data.columns))) p_values = [] dependent_neighbors = [] dependent_neighbors_p_values = [] for node in neighbors: chi2, p = chi_square(ind_ori_to_sub[node], ind_ori_to_sub[node_idx], [], data) p_values.append(p) if p < p_threshold: dependent_neighbors.append(node) dependent_neighbors_p_values.append(p) pgm_stats = dict(zip(neighbors, p_values)) pgm_nodes = [] if top_node == None: pgm_nodes = dependent_neighbors else: top_p = np.min((top_node, len(neighbors) - 1)) ind_top_p = np.argpartition(p_values, top_p)[0:top_p] pgm_nodes = [ind_sub_to_ori[node] for node in ind_top_p] data = data.rename(columns={"A": 0, "B": 1}) data = data.rename(columns=ind_sub_to_ori) return pgm_nodes, data, pgm_stats
def explain_range(self, node_list, num_samples=1000, top_node=None, p_threshold=0.05, pred_threshold=0.1): nA = self.n_hops_A(self.num_layers) neighbors_list = {} all_neighbors = [] for node in node_list: _, _, _, neighbors = self.extract_n_hops_neighbors(nA, node) if (node not in neighbors): neighbors = np.append(neighbors, node) neighbors_list[node] = neighbors all_neighbors = list( set(all_neighbors) | set(np.append(neighbors, node))) X_torch = torch.tensor([self.X], dtype=torch.float) A_torch = torch.tensor([self.A], dtype=torch.float) pred_torch, _ = self.model.forward(X_torch, A_torch) soft_pred = np.asarray([ softmax(np.asarray(pred_torch[0][node_].data)) for node_ in range(self.X.shape[0]) ]) Samples = [] Pred_Samples = [] for iteration in range(num_samples): X_perturb = self.X.copy() sample = [] for node in all_neighbors: seed = np.random.randint(2) if seed == 1: latent = 1 X_perturb = self.perturb_features_on_node(X_perturb, node, random=seed, mode=self.mode) else: latent = 0 sample.append(latent) X_perturb_torch = torch.tensor([X_perturb], dtype=torch.float) pred_perturb_torch, _ = self.model.forward(X_perturb_torch, A_torch) soft_pred_perturb = np.asarray([ softmax(np.asarray(pred_perturb_torch[0][node_].data)) for node_ in range(self.X.shape[0]) ]) sample_bool = [] for node in all_neighbors: if (soft_pred_perturb[node, np.argmax(soft_pred[node])] + pred_threshold) < np.max(soft_pred[node]): sample_bool.append(1) else: sample_bool.append(0) Samples.append(sample) Pred_Samples.append(sample_bool) Samples = np.asarray(Samples) Pred_Samples = np.asarray(Pred_Samples) Combine_Samples = Samples - Samples for s in range(Samples.shape[0]): Combine_Samples[s] = np.asarray([ Samples[s, i] * 10 + Pred_Samples[s, i] + 1 for i in range(Samples.shape[1]) ]) data = pd.DataFrame(Combine_Samples) data = data.rename(columns={ 0: "A", 1: "B" }) # Trick to use chi_square test on first two data columns ind_sub_to_ori = dict(zip(list(data.columns), all_neighbors)) ind_ori_to_sub = dict(zip(all_neighbors, list(data.columns))) explanations = {} for target in node_list: print("Generating explanation for node: ", target) p_values = [] dependent_neighbors = [] dependent_neighbors_p_values = [] for node in neighbors_list[target]: p = 0 if node == target: p = 0 p_values.append(p) else: chi2, p = chi_square(ind_ori_to_sub[node], ind_ori_to_sub[target], [], data) p_values.append(p) if p < 0.05: dependent_neighbors.append(node) dependent_neighbors_p_values.append(p) pgm_nodes = [] if top_node == None: pgm_nodes = dependent_neighbors else: ind_subnei_to_ori = dict( zip(range(len(neighbors_list[target])), neighbors_list[target])) if top_node < len(neighbors_list[target]): ind_top = np.argpartition(p_values, top_node)[0:top_node] pgm_nodes = [ind_subnei_to_ori[node] for node in ind_top] else: pgm_nodes = neighbors_list[target] explanations[target] = pgm_nodes if self.print_result == 1: print(pgm_nodes) return explanations
def explain(self, node_idx, target, num_samples=100, top_node=None, p_threshold=0.05, pred_threshold=0.1): neighbors, _, _, _ = k_hop_subgraph(node_idx, self.num_layers, self.edge_index) neighbors = neighbors.cpu().detach().numpy() if (node_idx not in neighbors): neighbors = np.append(neighbors, node_idx) pred_torch = self.model(self.X, self.edge_index).cpu() soft_pred = np.asarray([softmax(np.asarray(pred_torch[node_].data)) for node_ in range(self.X.shape[0])]) pred_node = np.asarray(pred_torch[node_idx].data) label_node = np.argmax(pred_node) soft_pred_node = softmax(pred_node) Samples = [] Pred_Samples = [] for iteration in range(num_samples): X_perturb = self.X.cpu().detach().numpy() sample = [] for node in neighbors: seed = np.random.randint(2) if seed == 1: latent = 1 X_perturb = self.perturb_features_on_node(X_perturb, node, random=seed) else: latent = 0 sample.append(latent) X_perturb_torch = torch.tensor(X_perturb, dtype=torch.float).to(device) pred_perturb_torch = self.model(X_perturb_torch, self.edge_index).cpu() soft_pred_perturb = np.asarray( [softmax(np.asarray(pred_perturb_torch[node_].data)) for node_ in range(self.X.shape[0])]) sample_bool = [] for node in neighbors: if (soft_pred_perturb[node, target] + pred_threshold) < soft_pred[node, target]: sample_bool.append(1) else: sample_bool.append(0) Samples.append(sample) Pred_Samples.append(sample_bool) Samples = np.asarray(Samples) Pred_Samples = np.asarray(Pred_Samples) Combine_Samples = Samples - Samples for s in range(Samples.shape[0]): Combine_Samples[s] = np.asarray( [Samples[s, i] * 10 + Pred_Samples[s, i] + 1 for i in range(Samples.shape[1])]) data = pd.DataFrame(Combine_Samples) data = data.rename(columns={0: "A", 1: "B"}) # Trick to use chi_square test on first two data columns ind_ori_to_sub = dict(zip(neighbors, list(data.columns))) p_values = [] for node in neighbors: chi2, p = chi_square(ind_ori_to_sub[node], ind_ori_to_sub[node_idx], [], data) p_values.append(p) pgm_stats = dict(zip(neighbors, p_values)) return pgm_stats