Esempio n. 1
0
    def explain(self,
                num_samples=10,
                percentage=50,
                top_node=None,
                p_threshold=0.05,
                pred_threshold=0.1):

        num_nodes = self.X_feat.shape[0]
        if top_node == None:
            top_node = int(num_nodes / 20)

#         Round 1
        Samples = self.batch_perturb_features_on_node(int(num_samples / 2),
                                                      range(num_nodes),
                                                      percentage, p_threshold,
                                                      pred_threshold)

        data = pd.DataFrame(Samples)
        est = ConstraintBasedEstimator(data)

        p_values = []
        candidate_nodes = []

        target = num_nodes  # The entry for the graph classification data is at "num_nodes"
        for node in range(num_nodes):
            chi2, p = chi_square(node, target, [], data)
            p_values.append(p)

        number_candidates = int(top_node * 4)
        candidate_nodes = np.argpartition(
            p_values, number_candidates)[0:number_candidates]

        #         Round 2
        Samples = self.batch_perturb_features_on_node(num_samples,
                                                      candidate_nodes,
                                                      percentage, p_threshold,
                                                      pred_threshold)
        data = pd.DataFrame(Samples)
        est = ConstraintBasedEstimator(data)

        p_values = []
        dependent_nodes = []

        target = num_nodes
        for node in range(num_nodes):
            chi2, p = chi_square(node, target, [], data)
            p_values.append(p)
            if p < p_threshold:
                dependent_nodes.append(node)

        top_p = np.min((top_node, num_nodes - 1))
        ind_top_p = np.argpartition(p_values, top_p)[0:top_p]
        pgm_nodes = list(ind_top_p)

        return pgm_nodes, p_values, candidate_nodes
Esempio n. 2
0
    def test_conditional_independence(self,
                                      X,
                                      Y,
                                      Zs=[],
                                      method="chi_square",
                                      tol=0.01,
                                      **kwargs):
        if method == "chi_square":
            param, p_value = chi_square(X=X,
                                        Y=Y,
                                        Z=Zs,
                                        data=self.data,
                                        state_names=self.state_names)
            if p_value >= tol:
                return True
            else:
                return False

        elif method == "pearsonr":
            param, p_value = pearsonr(X=X, Y=Y, Z=Zs, data=self.data, **kwargs)
            if abs(param) <= tol:
                return True
            else:
                return False
Esempio n. 3
0
 def assoc(X, Y, Zs):
     """Measure for (conditional) association between variables. Use negative
     p-value of independence test.
     """
     return 1 - chi_square(X, Y, Zs, self.data)[1]
Esempio n. 4
0
    def explain(self,
                node_idx,
                num_samples=100,
                top_node=None,
                p_threshold=0.05,
                pred_threshold=0.1):
        print("Explaining node: " + str(node_idx))
        nA = self.n_hops_A(self.num_layers)
        node_idx_new, sub_A, sub_X, neighbors = self.extract_n_hops_neighbors(
            nA, node_idx)

        if (node_idx not in neighbors):
            neighbors = np.append(neighbors, node_idx)

        X_torch = torch.tensor([self.X], dtype=torch.float)
        A_torch = torch.tensor([self.A], dtype=torch.float)
        pred_torch, _ = self.model.forward(X_torch, A_torch)
        soft_pred = np.asarray([
            softmax(np.asarray(pred_torch[0][node_].data))
            for node_ in range(self.X.shape[0])
        ])

        pred_node = np.asarray(pred_torch[0][node_idx].data)
        label_node = np.argmax(pred_node)
        soft_pred_node = softmax(pred_node)

        Samples = []
        Pred_Samples = []

        for iteration in range(num_samples):

            X_perturb = self.X.copy()
            sample = []
            for node in neighbors:
                seed = np.random.randint(2)
                if seed == 1:
                    latent = 1
                    X_perturb = self.perturb_features_on_node(X_perturb,
                                                              node,
                                                              random=seed)
                else:
                    latent = 0
                sample.append(latent)

            X_perturb_torch = torch.tensor([X_perturb], dtype=torch.float)
            pred_perturb_torch, _ = self.model.forward(X_perturb_torch,
                                                       A_torch)
            soft_pred_perturb = np.asarray([
                softmax(np.asarray(pred_perturb_torch[0][node_].data))
                for node_ in range(self.X.shape[0])
            ])

            sample_bool = []
            for node in neighbors:
                if (soft_pred_perturb[node, np.argmax(soft_pred[node])] +
                        pred_threshold) < np.max(soft_pred[node]):
                    sample_bool.append(1)
                else:
                    sample_bool.append(0)

            Samples.append(sample)
            Pred_Samples.append(sample_bool)

        Samples = np.asarray(Samples)
        Pred_Samples = np.asarray(Pred_Samples)
        Combine_Samples = Samples - Samples
        for s in range(Samples.shape[0]):
            Combine_Samples[s] = np.asarray([
                Samples[s, i] * 10 + Pred_Samples[s, i] + 1
                for i in range(Samples.shape[1])
            ])

        data = pd.DataFrame(Combine_Samples)
        ind_sub_to_ori = dict(zip(list(data.columns), neighbors))
        data = data.rename(columns={
            0: "A",
            1: "B"
        })  # Trick to use chi_square test on first two data columns
        ind_ori_to_sub = dict(zip(neighbors, list(data.columns)))

        p_values = []
        dependent_neighbors = []
        dependent_neighbors_p_values = []
        for node in neighbors:

            chi2, p = chi_square(ind_ori_to_sub[node],
                                 ind_ori_to_sub[node_idx], [], data)
            p_values.append(p)
            if p < p_threshold:
                dependent_neighbors.append(node)
                dependent_neighbors_p_values.append(p)

        pgm_stats = dict(zip(neighbors, p_values))

        pgm_nodes = []
        if top_node == None:
            pgm_nodes = dependent_neighbors
        else:
            top_p = np.min((top_node, len(neighbors) - 1))
            ind_top_p = np.argpartition(p_values, top_p)[0:top_p]
            pgm_nodes = [ind_sub_to_ori[node] for node in ind_top_p]

        data = data.rename(columns={"A": 0, "B": 1})
        data = data.rename(columns=ind_sub_to_ori)

        return pgm_nodes, data, pgm_stats
Esempio n. 5
0
    def explain_range(self,
                      node_list,
                      num_samples=1000,
                      top_node=None,
                      p_threshold=0.05,
                      pred_threshold=0.1):
        nA = self.n_hops_A(self.num_layers)

        neighbors_list = {}
        all_neighbors = []
        for node in node_list:
            _, _, _, neighbors = self.extract_n_hops_neighbors(nA, node)
            if (node not in neighbors):
                neighbors = np.append(neighbors, node)
            neighbors_list[node] = neighbors
            all_neighbors = list(
                set(all_neighbors) | set(np.append(neighbors, node)))

        X_torch = torch.tensor([self.X], dtype=torch.float)
        A_torch = torch.tensor([self.A], dtype=torch.float)
        pred_torch, _ = self.model.forward(X_torch, A_torch)
        soft_pred = np.asarray([
            softmax(np.asarray(pred_torch[0][node_].data))
            for node_ in range(self.X.shape[0])
        ])

        Samples = []
        Pred_Samples = []

        for iteration in range(num_samples):

            X_perturb = self.X.copy()
            sample = []
            for node in all_neighbors:
                seed = np.random.randint(2)
                if seed == 1:
                    latent = 1
                    X_perturb = self.perturb_features_on_node(X_perturb,
                                                              node,
                                                              random=seed,
                                                              mode=self.mode)
                else:
                    latent = 0
                sample.append(latent)

            X_perturb_torch = torch.tensor([X_perturb], dtype=torch.float)
            pred_perturb_torch, _ = self.model.forward(X_perturb_torch,
                                                       A_torch)
            soft_pred_perturb = np.asarray([
                softmax(np.asarray(pred_perturb_torch[0][node_].data))
                for node_ in range(self.X.shape[0])
            ])

            sample_bool = []
            for node in all_neighbors:
                if (soft_pred_perturb[node, np.argmax(soft_pred[node])] +
                        pred_threshold) < np.max(soft_pred[node]):
                    sample_bool.append(1)
                else:
                    sample_bool.append(0)

            Samples.append(sample)
            Pred_Samples.append(sample_bool)

        Samples = np.asarray(Samples)
        Pred_Samples = np.asarray(Pred_Samples)
        Combine_Samples = Samples - Samples
        for s in range(Samples.shape[0]):
            Combine_Samples[s] = np.asarray([
                Samples[s, i] * 10 + Pred_Samples[s, i] + 1
                for i in range(Samples.shape[1])
            ])

        data = pd.DataFrame(Combine_Samples)
        data = data.rename(columns={
            0: "A",
            1: "B"
        })  # Trick to use chi_square test on first two data columns
        ind_sub_to_ori = dict(zip(list(data.columns), all_neighbors))
        ind_ori_to_sub = dict(zip(all_neighbors, list(data.columns)))

        explanations = {}
        for target in node_list:
            print("Generating explanation for node: ", target)

            p_values = []
            dependent_neighbors = []
            dependent_neighbors_p_values = []
            for node in neighbors_list[target]:
                p = 0
                if node == target:
                    p = 0
                    p_values.append(p)
                else:
                    chi2, p = chi_square(ind_ori_to_sub[node],
                                         ind_ori_to_sub[target], [], data)
                    p_values.append(p)
                if p < 0.05:
                    dependent_neighbors.append(node)
                    dependent_neighbors_p_values.append(p)

            pgm_nodes = []
            if top_node == None:
                pgm_nodes = dependent_neighbors
            else:
                ind_subnei_to_ori = dict(
                    zip(range(len(neighbors_list[target])),
                        neighbors_list[target]))
                if top_node < len(neighbors_list[target]):
                    ind_top = np.argpartition(p_values, top_node)[0:top_node]
                    pgm_nodes = [ind_subnei_to_ori[node] for node in ind_top]
                else:
                    pgm_nodes = neighbors_list[target]

            explanations[target] = pgm_nodes
            if self.print_result == 1:
                print(pgm_nodes)

        return explanations
    def explain(self, node_idx, target, num_samples=100, top_node=None, p_threshold=0.05, pred_threshold=0.1):
        neighbors, _, _, _ = k_hop_subgraph(node_idx, self.num_layers, self.edge_index)
        neighbors = neighbors.cpu().detach().numpy()

        if (node_idx not in neighbors):
            neighbors = np.append(neighbors, node_idx)

        pred_torch = self.model(self.X, self.edge_index).cpu()
        soft_pred = np.asarray([softmax(np.asarray(pred_torch[node_].data)) for node_ in range(self.X.shape[0])])

        pred_node = np.asarray(pred_torch[node_idx].data)
        label_node = np.argmax(pred_node)
        soft_pred_node = softmax(pred_node)

        Samples = []
        Pred_Samples = []

        for iteration in range(num_samples):

            X_perturb = self.X.cpu().detach().numpy()
            sample = []
            for node in neighbors:
                seed = np.random.randint(2)
                if seed == 1:
                    latent = 1
                    X_perturb = self.perturb_features_on_node(X_perturb, node, random=seed)
                else:
                    latent = 0
                sample.append(latent)

            X_perturb_torch = torch.tensor(X_perturb, dtype=torch.float).to(device)
            pred_perturb_torch = self.model(X_perturb_torch, self.edge_index).cpu()
            soft_pred_perturb = np.asarray(
                [softmax(np.asarray(pred_perturb_torch[node_].data)) for node_ in range(self.X.shape[0])])

            sample_bool = []
            for node in neighbors:
                if (soft_pred_perturb[node, target] + pred_threshold) < soft_pred[node, target]:
                    sample_bool.append(1)
                else:
                    sample_bool.append(0)

            Samples.append(sample)
            Pred_Samples.append(sample_bool)

        Samples = np.asarray(Samples)
        Pred_Samples = np.asarray(Pred_Samples)
        Combine_Samples = Samples - Samples
        for s in range(Samples.shape[0]):
            Combine_Samples[s] = np.asarray(
                [Samples[s, i] * 10 + Pred_Samples[s, i] + 1 for i in range(Samples.shape[1])])

        data = pd.DataFrame(Combine_Samples)
        data = data.rename(columns={0: "A", 1: "B"})  # Trick to use chi_square test on first two data columns
        ind_ori_to_sub = dict(zip(neighbors, list(data.columns)))

        p_values = []
        for node in neighbors:
            chi2, p = chi_square(ind_ori_to_sub[node], ind_ori_to_sub[node_idx], [], data)
            p_values.append(p)

        pgm_stats = dict(zip(neighbors, p_values))

        return pgm_stats