Ejemplo n.º 1
0
    def save_features(self, root=r'/tmp/', name='mod_features'):
        """Save attacked node feature matrix.

        Parameters
        ----------
        root :
            root directory where the variable should be saved
        name : str
            saved file name

        Returns
        -------
        None.

        """

        assert self.modified_features is not None, \
                'modified_features is None! Please perturb the graph first.'
        name = name + '.npz'
        modified_features = self.modified_features

        if type(modified_features) is torch.Tensor:
            sparse_features = utils.to_scipy(modified_features)
            sp.save_npz(osp.join(root, name), sparse_features)
        else:
            sp.save_npz(osp.join(root, name), modified_features)
Ejemplo n.º 2
0
    def save_features(self, root=r'/tmp/', name='mod_features'):
        assert self.modified_features is not None, \
                'modified_features is None! Please perturb the graph first.'
        name = name + '.npz'
        modified_features = self.modified_features

        if type(modified_features) is torch.Tensor:
            sparse_features = utils.to_scipy(modified_features)
            sp.save_npz(osp.join(root, name), sparse_features)
        else:
            sp.save_npz(osp.join(root, name), modified_features)
Ejemplo n.º 3
0
def normalize_adj_tensor(adj, sparse=False):
    device = adj.device
    if sparse:
        # TODO if this is too slow, uncomment the following code,
        # but you need to install torch_scatter
        # return normalize_sparse_tensor(adj)
        adj = utils.to_scipy(adj)
        mx = utils.normalize_adj(adj)
        return utils.sparse_mx_to_torch_sparse_tensor(mx).to(device)
    else:
        mx = adj + torch.eye(adj.shape[0]).to(device)
        rowsum = mx.sum(1)
        r_inv = rowsum.pow(-1 / 2).flatten()
        r_inv[torch.isinf(r_inv)] = 0.
        r_mat_inv = torch.diag(r_inv)
        mx = r_mat_inv @ mx
        mx = mx @ r_mat_inv
    return mx
Ejemplo n.º 4
0
    def attack(self,
               features,
               adj,
               labels,
               target_node,
               n_perturbations,
               direct=True,
               n_influencers=0,
               ll_cutoff=0.004,
               verbose=True,
               **kwargs):
        """Generate perturbations on the input graph.

        Parameters
        ----------
        ori_features : torch.Tensor or scipy.sparse.csr_matrix
            Origina (unperturbed) node feature matrix. Note that
            torch.Tensor will be automatically transformed into
            scipy.sparse.csr_matrix
        ori_adj : torch.Tensor or scipy.sparse.csr_matrix
            Original (unperturbed) adjacency matrix. Note that
            torch.Tensor will be automatically transformed into
            scipy.sparse.csr_matrix
        labels :
            node labels
        target_node : int
            target node index to be attacked
        n_perturbations : int
            Number of perturbations on the input graph. Perturbations could
            be edge removals/additions or feature removals/additions.
        direct: bool
            whether to conduct direct attack
        n_influencers:
            number of influencer nodes when performing indirect attack.
            (setting `direct` to False). When `direct` is True, it would be ignored.
        ll_cutoff : float
            The critical value for the likelihood ratio test of the power law distributions.
            See the Chi square distribution with one degree of freedom. Default value 0.004
            corresponds to a p-value of roughly 0.95.
        verbose : bool
            whether to show verbose logs
        """

        if self.nnodes is None:
            self.nnodes = adj.shape[0]

        self.target_node = target_node

        if type(adj) is torch.Tensor:
            self.ori_adj = utils.to_scipy(adj).tolil()
            self.modified_adj = utils.to_scipy(adj).tolil()
            self.ori_features = utils.to_scipy(features).tolil()
            self.modified_features = utils.to_scipy(features).tolil()
        else:
            self.ori_adj = adj.tolil()
            self.modified_adj = adj.tolil()
            self.ori_features = features.tolil()
            self.modified_features = features.tolil()

        self.cooc_matrix = self.modified_features.T.dot(
            self.modified_features).tolil()

        attack_features = self.attack_features
        attack_structure = self.attack_structure
        assert not (direct == False and n_influencers == 0
                    ), "indirect mode requires at least one influencer node"
        assert n_perturbations > 0, "need at least one perturbation"
        assert attack_features or attack_structure, "either attack_features or attack_structure must be true"

        # adj_norm = utils.normalize_adj_tensor(modified_adj, sparse=True)
        self.adj_norm = utils.normalize_adj(self.modified_adj)
        self.W = self.get_linearized_weight()

        logits = (self.adj_norm @ self.adj_norm @ self.modified_features
                  @ self.W)[target_node]

        self.label_u = labels[target_node]
        label_target_onehot = np.eye(int(self.nclass))[labels[target_node]]
        best_wrong_class = (logits - 1000 * label_target_onehot).argmax()
        surrogate_losses = [
            logits[labels[target_node]] - logits[best_wrong_class]
        ]

        if verbose:
            print("##### Starting attack #####")
            if attack_structure and attack_features:
                print(
                    "##### Attack node with ID {} using structure and feature perturbations #####"
                    .format(target_node))
            elif attack_features:
                print("##### Attack only using feature perturbations #####")
            elif attack_structure:
                print("##### Attack only using structure perturbations #####")
            if direct:
                print("##### Attacking the node directly #####")
            else:
                print(
                    "##### Attacking the node indirectly via {} influencer nodes #####"
                    .format(n_influencers))
            print("##### Performing {} perturbations #####".format(
                n_perturbations))

        if attack_structure:
            # Setup starting values of the likelihood ratio test.
            degree_sequence_start = self.ori_adj.sum(0).A1
            current_degree_sequence = self.modified_adj.sum(0).A1
            d_min = 2

            S_d_start = np.sum(
                np.log(degree_sequence_start[degree_sequence_start >= d_min]))
            current_S_d = np.sum(
                np.log(
                    current_degree_sequence[current_degree_sequence >= d_min]))
            n_start = np.sum(degree_sequence_start >= d_min)
            current_n = np.sum(current_degree_sequence >= d_min)
            alpha_start = compute_alpha(n_start, S_d_start, d_min)

            log_likelihood_orig = compute_log_likelihood(
                n_start, alpha_start, S_d_start, d_min)

        if len(self.influencer_nodes) == 0:
            if not direct:
                # Choose influencer nodes
                infls, add_infls = self.get_attacker_nodes(
                    n_influencers, add_additional_nodes=True)
                self.influencer_nodes = np.concatenate(
                    (infls, add_infls)).astype("int")
                # Potential edges are all edges from any attacker to any other node, except the respective
                # attacker itself or the node being attacked.
                self.potential_edges = np.row_stack([
                    np.column_stack(
                        (np.tile(infl, self.nnodes - 2),
                         np.setdiff1d(np.arange(self.nnodes),
                                      np.array([target_node, infl]))))
                    for infl in self.influencer_nodes
                ])
                if verbose:
                    print("Influencer nodes: {}".format(self.influencer_nodes))
            else:
                # direct attack
                influencers = [target_node]
                self.potential_edges = np.column_stack(
                    (np.tile(target_node, self.nnodes - 1),
                     np.setdiff1d(np.arange(self.nnodes), target_node)))
                self.influencer_nodes = np.array(influencers)

        self.potential_edges = self.potential_edges.astype("int32")

        for _ in range(n_perturbations):
            if verbose:
                print("##### ...{}/{} perturbations ... #####".format(
                    _ + 1, n_perturbations))
            if attack_structure:

                # Do not consider edges that, if removed, result in singleton edges in the graph.
                singleton_filter = filter_singletons(self.potential_edges,
                                                     self.modified_adj)
                filtered_edges = self.potential_edges[singleton_filter]

                # Update the values for the power law likelihood ratio test.

                deltas = 2 * (1 - self.modified_adj[tuple(
                    filtered_edges.T)].toarray()[0]) - 1
                d_edges_old = current_degree_sequence[filtered_edges]
                d_edges_new = current_degree_sequence[
                    filtered_edges] + deltas[:, None]
                new_S_d, new_n = update_Sx(current_S_d, current_n, d_edges_old,
                                           d_edges_new, d_min)
                new_alphas = compute_alpha(new_n, new_S_d, d_min)
                new_ll = compute_log_likelihood(new_n, new_alphas, new_S_d,
                                                d_min)
                alphas_combined = compute_alpha(new_n + n_start,
                                                new_S_d + S_d_start, d_min)
                new_ll_combined = compute_log_likelihood(
                    new_n + n_start, alphas_combined, new_S_d + S_d_start,
                    d_min)
                new_ratios = -2 * new_ll_combined + 2 * (new_ll +
                                                         log_likelihood_orig)

                # Do not consider edges that, if added/removed, would lead to a violation of the
                # likelihood ration Chi_square cutoff value.
                powerlaw_filter = filter_chisquare(new_ratios, ll_cutoff)
                filtered_edges_final = filtered_edges[powerlaw_filter]

                # Compute new entries in A_hat_square_uv
                a_hat_uv_new = self.compute_new_a_hat_uv(
                    filtered_edges_final, target_node)
                # Compute the struct scores for each potential edge
                struct_scores = self.struct_score(
                    a_hat_uv_new, self.modified_features @ self.W)
                best_edge_ix = struct_scores.argmin()
                best_edge_score = struct_scores.min()
                best_edge = filtered_edges_final[best_edge_ix]

            if attack_features:
                # Compute the feature scores for each potential feature perturbation
                feature_ixs, feature_scores = self.feature_scores()
                best_feature_ix = feature_ixs[0]
                best_feature_score = feature_scores[0]

            if attack_structure and attack_features:
                # decide whether to choose an edge or feature to change
                if best_edge_score < best_feature_score:
                    if verbose:
                        print("Edge perturbation: {}".format(best_edge))
                    change_structure = True
                else:
                    if verbose:
                        print(
                            "Feature perturbation: {}".format(best_feature_ix))
                    change_structure = False

            elif attack_structure:
                change_structure = True
            elif attack_features:
                change_structure = False

            if change_structure:
                # perform edge perturbation
                self.modified_adj[tuple(best_edge)] = self.modified_adj[tuple(
                    best_edge[::-1])] = 1 - self.modified_adj[tuple(best_edge)]
                self.adj_norm = utils.normalize_adj(self.modified_adj)

                self.structure_perturbations.append(tuple(best_edge))
                self.feature_perturbations.append(())
                surrogate_losses.append(best_edge_score)

                # Update likelihood ratio test values
                current_S_d = new_S_d[powerlaw_filter][best_edge_ix]
                current_n = new_n[powerlaw_filter][best_edge_ix]
                current_degree_sequence[best_edge] += deltas[powerlaw_filter][
                    best_edge_ix]

            else:
                self.modified_features[tuple(
                    best_feature_ix
                )] = 1 - self.modified_features[tuple(best_feature_ix)]
                self.feature_perturbations.append(tuple(best_feature_ix))
                self.structure_perturbations.append(())
                surrogate_losses.append(best_feature_score)
Ejemplo n.º 5
0
    def attack(self,
               features,
               adj,
               labels,
               target_node,
               n_perturbations,
               direct=True,
               n_influencers=3,
               **kwargs):
        """Generate perturbations on the input graph.

        Parameters
        ----------
        features :
            Original (unperturbed) node feature matrix
        adj :
            Original (unperturbed) adjacency matrix
        labels :
            node labels
        target_node : int
            target_node node index to be attacked
        n_perturbations : int
            Number of perturbations on the input graph. Perturbations could
            be edge removals/additions or feature removals/additions.
        direct: bool
            whether to conduct direct attack
        n_influencers : int
            number of the top influencers to choose. For direct attack, it will set as `n_perturbations`.
        """
        if sp.issparse(features):
            # to dense numpy matrix
            features = features.A

        if not torch.is_tensor(features):
            features = torch.tensor(features, device=self.device)

        if torch.is_tensor(adj):
            adj = utils.to_scipy(adj).csr()

        self.modified_features = features.requires_grad_(
            bool(self.attack_features))

        target_label = torch.LongTensor([labels[target_node]])
        best_wrong_label = torch.LongTensor([
            (self.logits[target_node].cpu() -
             1000 * torch.eye(self.logits.size(1))[target_label]).argmax()
        ])

        self.selfloop_degree = torch.tensor(adj.sum(1).A1 + 1,
                                            device=self.device)
        self.target_label = target_label.to(self.device)
        self.best_wrong_label = best_wrong_label.to(self.device)
        self.n_perturbations = n_perturbations
        self.ori_adj = adj
        self.target_node = target_node
        self.direct = direct

        attacker_nodes = torch.where(
            torch.as_tensor(labels) == best_wrong_label)[0]
        subgraph = self.get_subgraph(attacker_nodes, n_influencers)

        if not direct:
            # for indirect attack, the edges adjacent to targeted node should not be considered
            mask = torch.logical_or(subgraph.edge_index[0] == target_node,
                                    subgraph.edge_index[1] == target_node).to(
                                        self.device)

        structure_perturbations = []
        feature_perturbations = []
        num_features = features.shape[-1]
        for _ in range(n_perturbations):
            edge_grad, non_edge_grad, features_grad = self.compute_gradient(
                subgraph)
            max_structure_score = max_feature_score = 0.

            if self.attack_structure:
                edge_grad *= (-2 * subgraph.edge_weight + 1)
                non_edge_grad *= -2 * subgraph.non_edge_weight + 1
                min_grad = min(edge_grad.min().item(),
                               non_edge_grad.min().item())
                edge_grad -= min_grad
                non_edge_grad -= min_grad
                if not direct:
                    edge_grad[mask] = 0.
                max_edge_grad, max_edge_idx = torch.max(edge_grad, dim=0)
                max_non_edge_grad, max_non_edge_idx = torch.max(non_edge_grad,
                                                                dim=0)
                max_structure_score = max(max_edge_grad.item(),
                                          max_non_edge_grad.item())

            if self.attack_features:
                features_grad *= -2 * self.modified_features + 1
                features_grad -= features_grad.min()
                if not direct:
                    features_grad[target_node] = 0.
                max_feature_grad, max_feature_idx = torch.max(
                    features_grad.view(-1), dim=0)
                max_feature_score = max_feature_grad.item()

            if max_structure_score >= max_feature_score:
                if max_edge_grad > max_non_edge_grad:
                    # remove one edge
                    best_edge = subgraph.edge_index[:, max_edge_idx]
                    subgraph.edge_weight.data[max_edge_idx] = 0.0
                    self.selfloop_degree[best_edge] -= 1.0
                else:
                    # add one edge
                    best_edge = subgraph.non_edge_index[:, max_non_edge_idx]
                    subgraph.non_edge_weight.data[max_non_edge_idx] = 1.0
                    self.selfloop_degree[best_edge] += 1.0

                u, v = best_edge.tolist()
                structure_perturbations.append((u, v))
            else:
                u, v = divmod(max_feature_idx.item(), num_features)
                feature_perturbations.append((u, v))
                self.modified_features[u, v].data.fill_(
                    1. - self.modified_features[u, v].data)

        if structure_perturbations:
            modified_adj = adj.tolil(copy=True)
            row, col = list(zip(*structure_perturbations))
            modified_adj[row,
                         col] = modified_adj[col,
                                             row] = 1 - modified_adj[row,
                                                                     col].A
            modified_adj = modified_adj.tocsr(copy=False)
            modified_adj.eliminate_zeros()
        else:
            modified_adj = adj.copy()

        self.modified_adj = modified_adj
        self.modified_features = self.modified_features.detach().cpu().numpy()
        self.structure_perturbations = structure_perturbations
        self.feature_perturbations = feature_perturbations