def fit(self,
         features,
         adj,
         labels,
         idx_train,
         idx_val=None,
         threshold=0.01,
         train_iters=200,
         initialize=True,
         verbose=True):
     self.threshold = threshold
     modified_adj = self.drop_dissimilar_edges(features, adj)
     # modified_adj_tensor = utils.sparse_mx_to_torch_sparse_tensor(self.modified_adj)
     features, modified_adj, labels = utils.to_tensor(features,
                                                      modified_adj,
                                                      labels,
                                                      device=self.device)
     self.modified_adj = modified_adj
     self.features = features
     self.labels = labels
     super().fit(features,
                 modified_adj,
                 labels,
                 idx_train,
                 idx_val,
                 train_iters=train_iters,
                 initialize=initialize,
                 verbose=verbose)
Beispiel #2
0
    def attack(self, ori_features, ori_adj, labels, idx_train, perturbations):
        victim_model = self.surrogate
        self.sparse_features = sp.issparse(ori_features)
        ori_adj, ori_features, labels = utils.to_tensor(ori_adj,
                                                        ori_features,
                                                        labels,
                                                        device=self.device)
        modified_adj = ori_adj

        victim_model.eval()

        s_e = self.calc_importance_edge(ori_features,
                                        ori_adj,
                                        labels,
                                        idx_train,
                                        steps=10)

        import ipdb
        ipdb.set_trace()

        for t in tqdm(range(perturbations)):
            modified_adj

        self.adj_changes.data.copy_(torch.tensor(best_s))
        self.modified_adj = self.get_modified_adj(ori_adj).detach()
Beispiel #3
0
    def attack(self, ori_features, ori_adj, labels, idx_train, perturbations):
        victim_model = self.surrogate

        self.sparse_features = sp.issparse(ori_features)
        ori_adj, ori_features, labels = utils.to_tensor(ori_adj,
                                                        ori_features,
                                                        labels,
                                                        device=self.device)

        victim_model.eval()
        epochs = 200
        for t in tqdm(range(epochs)):
            modified_adj = self.get_modified_adj(ori_adj)
            adj_norm = utils.normalize_adj_tensor(modified_adj)
            output = victim_model(ori_features, adj_norm)
            # loss = F.nll_loss(output[idx_train], labels[idx_train])
            loss = self._loss(output[idx_train], labels[idx_train])
            adj_grad = torch.autograd.grad(loss, self.adj_changes)[0]

            if self.loss_type == 'CE':
                lr = 200 / np.sqrt(t + 1)
                self.adj_changes.data.add_(lr * adj_grad)

            if self.loss_type == 'CW':
                lr = 0.1 / np.sqrt(t + 1)
                self.adj_changes.data.add_(lr * adj_grad)

            self.projection(perturbations)

        self.random_sample(ori_adj, ori_features, labels, idx_train,
                           perturbations)
        self.modified_adj = self.get_modified_adj(ori_adj).detach()
    def fit(self,
            features,
            adj,
            labels,
            idx_train,
            idx_val=None,
            k=50,
            train_iters=200,
            initialize=True,
            verbose=True):

        modified_adj = self.truncatedSVD(adj, k=k)
        # modified_adj_tensor = utils.sparse_mx_to_torch_sparse_tensor(self.modified_adj)
        features, modified_adj, labels = utils.to_tensor(features,
                                                         modified_adj,
                                                         labels,
                                                         device=self.device)

        self.modified_adj = modified_adj
        self.features = features
        self.labels = labels
        super().fit(features,
                    modified_adj,
                    labels,
                    idx_train,
                    idx_val,
                    train_iters=train_iters,
                    initialize=initialize,
                    verbose=verbose)
Beispiel #5
0
    def fit(self, features, adj, labels, idx_train, idx_val=None, train_iters=200, initialize=True, verbose=False, normalize=True):
        '''
            train the gcn model, when idx_val is not None, pick the best model
            according to the validation loss
        '''
        self.device = self.gc1.weight.device
        if initialize:
            self.initialize()

        if type(adj) is not torch.Tensor:
            features, adj, labels = utils.to_tensor(features, adj, labels, device=self.device)
        else:
            features = features.to(self.device)
            adj = adj.to(self.device)
            labels = labels.to(self.device)

        if normalize:
            if utils.is_sparse_tensor(adj):
                adj_norm = utils.normalize_adj_tensor(adj, sparse=True)
            else:
                adj_norm = utils.normalize_adj_tensor(adj)
        else:
            adj_norm = adj

        self.adj_norm = adj_norm
        self.features = features
        self.labels = labels

        if idx_val is None:
            self._train_without_val(labels, idx_train, train_iters, verbose)
        else:
            self._train_with_val(labels, idx_train, idx_val, train_iters, verbose)
Beispiel #6
0
    def predict(self, features=None, adj=None):
        """By default, the inputs should be unnormalized data

        Parameters
        ----------
        features :
            node features. If `features` and `adj` are not given, this function will use previous stored `features` and `adj` from training to make predictions.
        adj :
            adjcency matrix. If `features` and `adj` are not given, this function will use previous stored `features` and `adj` from training to make predictions.


        Returns
        -------
        torch.FloatTensor
            output (log probabilities) of GCN
        """

        self.eval()
        if features is None and adj is None:
            return self.forward(self.features, self.adj_norm)
        else:
            if type(adj) is not torch.Tensor:
                features, adj = utils.to_tensor(features,
                                                adj,
                                                device=self.device)

            self.features = features
            if utils.is_sparse_tensor(adj):
                self.adj_norm = utils.normalize_adj_tensor(adj, sparse=True)
            else:
                self.adj_norm = utils.normalize_adj_tensor(adj)
            return self.forward(self.features, self.adj_norm)
Beispiel #7
0
    def fit(self, features, adj, labels, idx_train, idx_val=None, train_iters=200, initialize=True, verbose=False, normalize=True, patience=500, **kwargs):
        if initialize:
            self.initialize()

        if type(adj) is not torch.Tensor:
            features, adj, labels = utils.to_tensor(features, adj, labels, device=self.device)
        else:
            features = features.to(self.device)
            adj = adj.to(self.device)
            labels = labels.to(self.device)

        if normalize:
            if utils.is_sparse_tensor(adj):
                adj_norm = utils.normalize_adj_tensor(adj, sparse=True)
            else:
                adj_norm = utils.normalize_adj_tensor(adj)
        else:
            adj_norm = adj

        self.adj_norm = adj_norm
        self.features = features
        self.labels = labels

        if idx_val is None:
            self._train_without_val(labels, idx_train, train_iters, verbose)
        else:
            if patience < train_iters:
                self._train_with_early_stopping(labels, idx_train, idx_val, train_iters, patience, verbose)
            else:
                self._train_with_val(labels, idx_train, idx_val, train_iters, verbose)
Beispiel #8
0
    def fit(self,
            features,
            adj,
            labels,
            idx_train,
            idx_val=None,
            idx_test=None,
            train_iters=200,
            verbose=True,
            attention=None):

        adj, features, labels = utils.to_tensor(adj.todense(),
                                                features.todense(),
                                                labels,
                                                device=self.device)

        self.features, self.labels = features, labels
        self.adj_norm1 = self._normalize_adj(adj, power=-1 / 2)
        self.adj_norm2 = self._normalize_adj(adj, power=-1)
        print('=== training rgcn model ===')
        self._initialize()
        if idx_val is None:
            self._train_without_val(labels, idx_train, train_iters, verbose)
        else:
            self._train_with_val(labels, idx_train, idx_val, train_iters,
                                 verbose)
    def attack(self,
               ori_features,
               ori_adj,
               labels,
               idx_train,
               n_perturbations,
               epochs=200,
               **kwargs):
        """Generate perturbations on the input graph.

        Parameters
        ----------
        ori_features :
            Original (unperturbed) node feature matrix
        ori_adj :
            Original (unperturbed) adjacency matrix
        labels :
            node labels
        idx_train :
            node training indices
        n_perturbations : int
            Number of perturbations on the input graph. Perturbations could
            be edge removals/additions or feature removals/additions.
        epochs:
            number of training epochs

        """

        victim_model = self.surrogate

        self.sparse_features = sp.issparse(ori_features)
        ori_adj, ori_features, labels = utils.to_tensor(ori_adj,
                                                        ori_features,
                                                        labels,
                                                        device=self.device)

        victim_model.eval()
        for t in tqdm(range(epochs)):
            modified_adj = self.get_modified_adj(ori_adj)
            adj_norm = utils.normalize_adj_tensor(modified_adj)
            output = victim_model(ori_features, adj_norm)
            # loss = F.nll_loss(output[idx_train], labels[idx_train])
            loss = self._loss(output[idx_train], labels[idx_train])
            adj_grad = torch.autograd.grad(loss, self.adj_changes)[0]

            if self.loss_type == 'CE':
                lr = 200 / np.sqrt(t + 1)
                self.adj_changes.data.add_(lr * adj_grad)

            if self.loss_type == 'CW':
                lr = 0.1 / np.sqrt(t + 1)
                self.adj_changes.data.add_(lr * adj_grad)

            self.projection(n_perturbations)

        self.random_sample(ori_adj, ori_features, labels, idx_train,
                           n_perturbations)
        self.modified_adj = self.get_modified_adj(ori_adj).detach()
        self.check_adj_tensor(self.modified_adj)
Beispiel #10
0
    def fit(self,
            features,
            adj,
            labels,
            idx_train,
            idx_val=None,
            k=50,
            train_iters=200,
            initialize=True,
            verbose=True,
            **kwargs):
        """First perform rank-k approximation of adjacency matrix via
        truncated SVD, and then train the gcn model on the processed graph,
        when idx_val is not None, pick the best model according to
        the validation loss.

        Parameters
        ----------
        features :
            node features
        adj :
            the adjacency matrix. The format could be torch.tensor or scipy matrix
        labels :
            node labels
        idx_train :
            node training indices
        idx_val :
            node validation indices. If not given (None), GCN training process will not adpot early stopping
        k : int
            number of singular values and vectors to compute.
        train_iters : int
            number of training epochs
        initialize : bool
            whether to initialize parameters before training
        verbose : bool
            whether to show verbose logs
        """

        modified_adj = self.truncatedSVD(adj, k=k)
        self.k = k
        # modified_adj_tensor = utils.sparse_mx_to_torch_sparse_tensor(self.modified_adj)
        features, modified_adj, labels = utils.to_tensor(features,
                                                         modified_adj,
                                                         labels,
                                                         device=self.device)

        self.modified_adj = modified_adj
        self.features = features
        self.labels = labels
        super().fit(features,
                    modified_adj,
                    labels,
                    idx_train,
                    idx_val,
                    train_iters=train_iters,
                    initialize=initialize,
                    verbose=verbose)
Beispiel #11
0
    def fit(self,
            features,
            adj,
            labels,
            idx_train,
            idx_val=None,
            threshold=0.01,
            train_iters=200,
            initialize=True,
            verbose=True,
            **kwargs):
        """First drop dissimilar edges with similarity smaller than given
        threshold and then train the gcn model on the processed graph.
        When idx_val is not None, pick the best model according to the
        validation loss.

        Parameters
        ----------
        features :
            node features
        adj :
            the adjacency matrix. The format could be torch.tensor or scipy matrix
        labels :
            node labels
        idx_train :
            node training indices
        idx_val :
            node validation indices. If not given (None), GCN training process will not adpot early stopping
        threshold : float
            similarity threshold for dropping edges. If two connected nodes with similarity smaller than threshold, the edge between them will be removed.
        train_iters : int
            number of training epochs
        initialize : bool
            whether to initialize parameters before training
        verbose : bool
            whether to show verbose logs
        """

        self.threshold = threshold
        modified_adj = self.drop_dissimilar_edges(features, adj)
        # modified_adj_tensor = utils.sparse_mx_to_torch_sparse_tensor(self.modified_adj)
        features, modified_adj, labels = utils.to_tensor(features,
                                                         modified_adj,
                                                         labels,
                                                         device=self.device)
        self.modified_adj = modified_adj
        self.features = features
        self.labels = labels
        super().fit(features,
                    modified_adj,
                    labels,
                    idx_train,
                    idx_val,
                    train_iters=train_iters,
                    initialize=initialize,
                    verbose=verbose)
Beispiel #12
0
    def attack(self, ori_features, ori_adj, labels, idx_train, target_node, n_perturbations, verbose=False, **kwargs):
        """Generate perturbations on the input graph.

        Parameters
        ----------
        ori_features : scipy.sparse.csr_matrix
            Original (unperturbed) adjacency matrix
        ori_adj : scipy.sparse.csr_matrix
            Original (unperturbed) node feature matrix
        labels :
            node labels
        idx_train:
            training node indices
        target_node : int
            target node index to be attacked
        n_perturbations : int
            Number of perturbations on the input graph. Perturbations could
            be edge removals/additions or feature removals/additions.
        """

        modified_adj = ori_adj.todense()
        modified_features = ori_features.todense()
        modified_adj, modified_features, labels = utils.to_tensor(modified_adj, modified_features, labels, device=self.device)

        self.surrogate.eval()
        if verbose == True:
            print('number of pertubations: %s' % n_perturbations)

        pseudo_labels = self.surrogate.predict().detach().argmax(1)
        pseudo_labels[idx_train] = labels[idx_train]

        modified_adj.requires_grad = True
        for i in range(n_perturbations):
            adj_norm = utils.normalize_adj_tensor(modified_adj)

            if self.attack_structure:
                output = self.surrogate(modified_features, adj_norm)
                loss = F.nll_loss(output[[target_node]], pseudo_labels[[target_node]])
                grad = torch.autograd.grad(loss, modified_adj)[0]
                # bidirection
                grad = (grad[target_node] + grad[:, target_node]) * (-2*modified_adj[target_node] + 1)
                grad[target_node] = -10
                grad_argmax = torch.argmax(grad)

            value = -2*modified_adj[target_node][grad_argmax] + 1
            modified_adj.data[target_node][grad_argmax] += value
            modified_adj.data[grad_argmax][target_node] += value

            if self.attack_features:
                pass

        modified_adj = modified_adj.detach().cpu().numpy()
        modified_adj = sp.csr_matrix(modified_adj)
        self.check_adj(modified_adj)
        self.modified_adj = modified_adj
Beispiel #13
0
    def attack(self, ori_features, ori_adj, labels, idx_train, target_node,
               n_perturbations, **kwargs):
        """Generate perturbations on the input graph.

        Parameters
        ----------
        ori_features : scipy.sparse.csr_matrix
            Original (unperturbed) adjacency matrix
        ori_adj : scipy.sparse.csr_matrix
            Original (unperturbed) node feature matrix
        labels :
            node labels
        idx_train :
            node training indices
        target_node : int
            target node index to be attacked
        n_perturbations : int
            Number of perturbations on the input graph. Perturbations could
            be edge removals/additions or feature removals/additions.
        """

        modified_adj = ori_adj.todense()
        modified_features = ori_features.todense()
        modified_adj, modified_features, labels = utils.to_tensor(
            modified_adj, modified_features, labels, device=self.device)

        self.surrogate.eval()
        print('number of pertubations: %s' % n_perturbations)
        for i in range(n_perturbations):
            modified_row = modified_adj[target_node] + self.adj_changes
            modified_adj[target_node] = modified_row
            adj_norm = utils.normalize_adj_tensor(modified_adj)

            if self.attack_structure:
                output = self.surrogate(modified_features, adj_norm)
                loss = F.nll_loss(output[idx_train], labels[idx_train])
                # acc_train = accuracy(output[idx_train], labels[idx_train])
                grad = torch.autograd.grad(loss,
                                           self.adj_changes,
                                           retain_graph=True)[0]
                grad = grad * (-2 * modified_row + 1)
                grad[target_node] = 0
                grad_argmax = torch.argmax(grad)

            value = -2 * modified_row[grad_argmax] + 1
            modified_adj.data[target_node][grad_argmax] += value
            modified_adj.data[grad_argmax][target_node] += value

            if self.attack_features:
                pass

        modified_adj = modified_adj.detach().cpu().numpy()
        modified_adj = sp.csr_matrix(modified_adj)
        self.check_adj(modified_adj)
        self.modified_adj = modified_adj
    def fit(self,
            features,
            adj,
            labels,
            idx_train,
            idx_val=None,
            train_iters=200,
            initialize=True,
            verbose=True,
            normalize=True,
            patience=1000,
            adj2=None):
        '''
            train the gcn model, when idx_val is not None, pick the best model
            according to the validation loss
        '''
        self.device = self.gc1.weight_x.weight.device
        #if initialize:
        #    self.initialize()
        if type(adj) is not torch.Tensor:
            features, adj, labels = utils.to_tensor(features,
                                                    adj,
                                                    labels,
                                                    device=self.device)
        else:
            features = features.to(self.device)
            adj = adj.to(self.device)
            labels = labels.to(self.device)

        if normalize:
            if utils.is_sparse_tensor(adj):
                adj_norm = utils.normalize_adj_tensor(adj, sparse=True)
            else:
                adj_norm = utils.normalize_adj_tensor(adj)
        else:
            adj_norm = adj

        adj_norm = adj_norm * (1 - torch.eye(adj_norm.size(0)).cuda())
        adj_norm = self.normalize_adj(adj_norm)
        self.adj_norm_dense = adj_norm

        adj_norm = to_sparse(adj_norm)
        self.adj_norm = adj_norm
        self.features = features
        self.labels = labels

        if idx_val is None:
            self._train_without_val(labels, idx_train, train_iters, verbose)
        else:
            if patience < train_iters:
                self._train_with_early_stopping(labels, idx_train, idx_val,
                                                train_iters, patience, verbose)
            else:
                self._train_with_val(labels, idx_train, idx_val, train_iters,
                                     verbose)
Beispiel #15
0
    def attack(self, ori_features, ori_adj, labels, idx_train, n_perturbations, epochs=200, **kwargs):
        """
        Generate perturbations on the input graph
        """

        victim_model = self.surrogate

        self.sparse_features = sp.issparse(ori_features)
        ori_adj, ori_features, labels = utils.to_tensor(ori_adj, ori_features, labels, device=self.device)

        victim_model.eval()
        for t in tqdm(range(epochs)):
            modified_adj = self.get_modified_adj(ori_adj)
            adj_norm = utils.normalize_adj_tensor(modified_adj, device=self.device)
            output = victim_model(ori_features, adj_norm)
            self.loss = self._loss(output[idx_train], labels[idx_train])

            # New: add regularization term for spectral distance
            if self.regularization_weight != 0:
                ori_adj_norm = utils.normalize_adj_tensor(ori_adj, device=self.device)
                ori_e, ori_v = torch.symeig(ori_adj_norm, eigenvectors=True)
                e, v = torch.symeig(adj_norm, eigenvectors=True)
                self.regularization = F.mse_loss(ori_e, e)
                self.norm = torch.norm(ori_e)
                self.loss += self.regularization / self.norm * self.regularization_weight

            adj_grad = torch.autograd.grad(self.loss, self.adj_changes)[0]

            if self.loss_type == 'CE':
                lr = 200 / np.sqrt(t+1)
                self.adj_changes.data.add_(lr * adj_grad)

            if self.loss_type == 'CW':
                lr = 0.1 / np.sqrt(t+1)
                self.adj_changes.data.add_(lr * adj_grad)
            
            self.projection(n_perturbations)

        self.random_sample(ori_adj, ori_features, labels, idx_train, n_perturbations)
        self.modified_adj = self.get_modified_adj(ori_adj).detach()
        self.check_adj_tensor(self.modified_adj)

        # for sanity check
        ori_adj_norm = utils.normalize_adj_tensor(ori_adj, device=self.device)
        ori_e, ori_v = torch.symeig(ori_adj_norm, eigenvectors=True)
        adj_norm = utils.normalize_adj_tensor(self.modified_adj, device=self.device)
        e, v = torch.symeig(adj_norm, eigenvectors=True)

        self.adj = ori_adj.detach()
        self.labels = labels.detach()
        self.ori_e = ori_e
        self.ori_v = ori_v
        self.e = e
        self.v = v
Beispiel #16
0
    def attack(self, ori_features, ori_adj, labels, idx_train,
               n_perturbations):
        victim_model = self.surrogate

        self.sparse_features = sp.issparse(ori_features)
        ori_adj, ori_features, labels = utils.to_tensor(ori_adj,
                                                        ori_features,
                                                        labels,
                                                        device=self.device)

        # optimizer
        optimizer = optim.Adam(victim_model.parameters(), lr=0.01)

        epochs = 200
        victim_model.eval()
        for t in tqdm(range(epochs)):
            # update victim model
            victim_model.train()
            modified_adj = self.get_modified_adj(ori_adj)
            adj_norm = utils.normalize_adj_tensor(modified_adj)
            output = victim_model(ori_features, adj_norm)
            loss = self._loss(output[idx_train], labels[idx_train])

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # generate pgd attack
            victim_model.eval()
            modified_adj = self.get_modified_adj(ori_adj)
            adj_norm = utils.normalize_adj_tensor(modified_adj)
            output = victim_model(ori_features, adj_norm)
            loss = self._loss(output[idx_train], labels[idx_train])
            adj_grad = torch.autograd.grad(loss, self.adj_changes)[0]
            # adj_grad = self.adj_changes.grad

            if self.loss_type == 'CE':
                lr = 200 / np.sqrt(t + 1)
                self.adj_changes.data.add_(lr * adj_grad)

            if self.loss_type == 'CW':
                lr = 0.1 / np.sqrt(t + 1)
                self.adj_changes.data.add_(lr * adj_grad)

            # self.adj_changes.grad.zero_()
            self.projection(n_perturbations)

        self.random_sample(ori_adj, ori_features, labels, idx_train,
                           n_perturbations)
        self.modified_adj = self.get_modified_adj(ori_adj).detach()
Beispiel #17
0
    def predict(self, features=None, adj=None):
        '''By default, inputs are unnormalized data'''
        self.eval()
        if features is None and adj is None:
            return self.forward(self.features, self.adj_norm)
        else:
            if type(adj) is not torch.Tensor:
                features, adj = utils.to_tensor(features, adj, device=self.device)

            self.features = features
            if utils.is_sparse_tensor(adj):
                self.adj_norm = utils.normalize_adj_tensor(adj, sparse=True)
            else:
                self.adj_norm = utils.normalize_adj_tensor(adj)
            return self.forward(self.features, self.adj_norm)
Beispiel #18
0
    def predict(self, features=None, adj=None):
        '''By default, inputs are unnormalized data'''

        self.eval()
        if features is None and adj is None:
            return self.forward()
        else:
            if type(adj) is not torch.Tensor:
                adj, features = utils.to_tensor(adj.todense(),
                                                features.todense(),
                                                device=self.device)

            self.features = features
            self.adj_norm1 = self._normalize_adj(adj, power=-1 / 2)
            self.adj_norm2 = self._normalize_adj(adj, power=-1)
            return self.forward()
Beispiel #19
0
    def fit(self,
            features,
            adj,
            labels,
            idx_train,
            idx_val=None,
            train_iters=200,
            verbose=True,
            **kwargs):
        """Train RGCN.

        Parameters
        ----------
        features :
            node features
        adj :
            the adjacency matrix. The format could be torch.tensor or scipy matrix
        labels :
            node labels
        idx_train :
            node training indices
        idx_val :
            node validation indices. If not given (None), GCN training process will not adpot early stopping
        train_iters : int
            number of training epochs
        verbose : bool
            whether to show verbose logs
        """

        adj, features, labels = utils.to_tensor(adj.todense(),
                                                features.todense(),
                                                labels,
                                                device=self.device)

        self.features, self.labels = features, labels
        self.adj_norm1 = self._normalize_adj(adj, power=-1 / 2)
        self.adj_norm2 = self._normalize_adj(adj, power=-1)
        print('=== training rgcn model ===')
        self._initialize()
        if idx_val is None:
            self._train_without_val(labels, idx_train, train_iters, verbose)
        else:
            self._train_with_val(labels, idx_train, idx_val, train_iters,
                                 verbose)
Beispiel #20
0
    def attack(self,
               ori_features,
               ori_adj,
               labels,
               idx_train,
               target_node,
               n_perturbations,
               steps=10):
        self.surrogate.eval()
        self.target_node = target_node

        modified_adj = ori_adj.todense()
        modified_features = ori_features.todense()
        adj, features, labels = utils.to_tensor(modified_adj,
                                                modified_features,
                                                labels,
                                                device=self.device)

        adj_norm = utils.normalize_adj_tensor(adj)
        s_e = np.zeros(adj.shape[1])
        s_f = np.zeros(features.shape[1])
        if self.attack_structure:
            s_e = self.calc_importance_edge(features, adj_norm, labels,
                                            idx_train, steps)
        if self.attack_features:
            s_f = self.calc_importance_feature(features, adj_norm, labels,
                                               idx_train, steps)

        for t in (range(n_perturbations)):
            s_e_max = np.argmax(s_e)
            s_f_max = np.argmax(s_f)
            if s_e[s_e_max] >= s_f[s_f_max]:
                value = np.abs(1 - modified_adj[target_node, s_e_max])
                modified_adj[target_node, s_e_max] = value
                modified_adj[s_e_max, target_node] = value
                s_e[s_e_max] = 0
            else:
                modified_features[target_node, s_f_max] = np.abs(
                    1 - modified_features[target_node, s_f_max])
                s_f[s_f_max] = 0

        self.modified_adj = sp.csr_matrix(modified_adj)
        self.modified_features = sp.csr_matrix(modified_features)
        self.check_adj(modified_adj)
Beispiel #21
0
    def attack(self, ori_features, ori_adj, labels, idx_train, perturbations):
        victim_model = self.surrogate

        self.sparse_features = sp.issparse(ori_features)
        ori_adj, ori_features, labels = utils.to_tensor(ori_adj,
                                                        ori_features,
                                                        labels,
                                                        device=self.device)
        modified_adj = ori_adj

        victim_model.eval()
        epochs = 200
        for t in tqdm(range(epochs)):
            modified_adj = self.get_modified_adj(ori_adj)
            adj_norm = utils.normalize_adj_tensor(modified_adj)
            output = victim_model(ori_features, adj_norm)
            loss = F.nll_loss(output[idx_train], labels[idx_train])
            adj_grad = torch.autograd.grad(loss, self.adj_changes)[0]

            lr = 200 / np.sqrt(t + 1)
            self.adj_changes.data.add_(lr * adj_grad)
            self.projection(perturbations)

        K = 20
        best_loss = 0
        with torch.no_grad():
            s = self.adj_changes.cpu().numpy()
            for i in range(K):
                sampled = np.random.binomial(1, s)

                print(sampled.sum())
                if sampled.sum() > perturbations:
                    continue
                self.adj_changes.data.copy_(torch.tensor(sampled))
                modified_adj = self.get_modified_adj(ori_adj)
                adj_norm = utils.normalize_adj_tensor(modified_adj)
                output = victim_model(ori_features, adj_norm)
                loss = F.nll_loss(output[idx_train], labels[idx_train])
                print(loss)
                if best_loss < loss:
                    best_loss = loss
                    best_s = sampled
            self.adj_changes.data.copy_(torch.tensor(best_s))
            self.modified_adj = self.get_modified_adj(ori_adj).detach()
Beispiel #22
0
    def fit(self,
            features,
            adj,
            labels,
            idx_train,
            idx_val=None,
            idx_test=None,
            k=10,
            train_iters=200,
            initialize=True,
            verbose=True,
            attention=None):
        # try k=10, maybe it works better
        print('runing SVD')
        modified_adj = self.truncatedSVD(adj, k=k)
        """discard the edges lower than threshold, and set the residual as 1"""
        threshold = 0.2
        modified_adj[modified_adj < threshold] = 0
        modified_adj[modified_adj >= threshold] = 1

        id = modified_adj >= threshold
        print('Kept {} edges'.format(id.sum()))
        modified_adj = scipy.sparse.lil_matrix(
            modified_adj)  # change the dense tensor to lil sparse

        # modified_adj_tensor = utils.sparse_mx_to_torch_sparse_tensor(self.modified_adj)
        features, modified_adj, labels = utils.to_tensor(features,
                                                         modified_adj,
                                                         labels,
                                                         device=self.device)

        self.modified_adj = modified_adj
        self.features = features
        self.labels = labels
        super().fit(features,
                    modified_adj,
                    labels,
                    idx_train,
                    idx_val,
                    idx_test=None,
                    train_iters=train_iters,
                    initialize=initialize,
                    verbose=verbose)
Beispiel #23
0
    def attack(self, ori_features, ori_adj, labels, idx_train, idx_unlabeled, perturbations, ll_constraint=True, ll_cutoff=0.004):
        ori_adj, ori_features, labels = utils.to_tensor(ori_adj, ori_features, labels, device=self.device)
        labels_self_training = self.self_training_label(labels, idx_train)
        self.sparse_features = sp.issparse(ori_features)
        modified_adj = ori_adj
        modified_features = ori_features

        for i in tqdm(range(perturbations), desc="Perturbing graph"):
            self._initialize()

            if self.attack_structure:
                modified_adj = self.get_modified_adj(ori_adj)
                self.adj_grad_sum.data.fill_(0)
            if self.attack_features:
                modified_features = ori_features + self.feature_changes
                self.feature_grad_sum.data.fill_(0)

            self.inner_train(modified_features, modified_adj, idx_train, idx_unlabeled, labels, labels_self_training)

            adj_meta_score = torch.tensor(0.0).to(self.device)
            feature_meta_score = torch.tensor(0.0).to(self.device)

            if self.attack_structure:
                adj_meta_score = self.get_adj_score(self.adj_grad_sum, modified_adj, ori_adj, ll_constraint, ll_cutoff)
            if self.attack_features:
                feature_meta_score = self.get_feature_score(self.feature_grad_sum, modified_features)

            if adj_meta_score.max() >= feature_meta_score.max():
                adj_meta_argmax = torch.argmax(adj_meta_score)
                row_idx, col_idx = utils.unravel_index(adj_meta_argmax, ori_adj.shape)
                self.adj_changes.data[row_idx][col_idx] += (-2 * modified_adj[row_idx][col_idx] + 1)
                self.adj_changes.data[col_idx][row_idx] += (-2 * modified_adj[row_idx][col_idx] + 1)
            else:
                feature_meta_argmax = torch.argmax(feature_meta_score)
                row_idx, col_idx = utils.unravel_index(feature_meta_argmax, ori_features.shape)
                self.features_changes.data[row_idx][col_idx] += (-2 * modified_features[row_idx][col_idx] + 1)

        if self.attack_structure:
            self.modified_adj = self.get_modified_adj(ori_adj).detach()
        if self.attack_features:
            self.modified_features = self.get_modified_features(ori_features).detach()
    def attack(self, features, adj, labels, idx_train, target_node,
               n_perturbations):
        # adj: sp.csr_matrix

        modified_adj = adj.todense()
        features = features.todense()
        modified_adj, features, labels = utils.to_tensor(modified_adj,
                                                         features,
                                                         labels,
                                                         device=self.device)

        self.surrogate.eval()
        print('number of pertubations: %s' % n_perturbations)
        for i in range(n_perturbations):
            modified_row = modified_adj[target_node] + self.adj_changes
            modified_adj[target_node] = modified_row
            adj_norm = utils.normalize_adj_tensor(modified_adj)

            if self.attack_structure:
                output = self.surrogate(features, adj_norm)
                loss = F.nll_loss(output[idx_train], labels[idx_train])
                # acc_train = accuracy(output[idx_train], labels[idx_train])
                grad = torch.autograd.grad(loss,
                                           self.adj_changes,
                                           retain_graph=True)[0]
                grad = grad * (-2 * modified_row + 1)
                grad[target_node] = 0
                grad_argmax = torch.argmax(grad)

            value = -2 * modified_row[grad_argmax] + 1
            modified_adj.data[target_node][grad_argmax] += value
            modified_adj.data[grad_argmax][target_node] += value

            if self.attack_features:
                pass

        modified_adj = modified_adj.detach().cpu().numpy()
        modified_adj = sp.csr_matrix(modified_adj)
        self.check_adj(modified_adj)
        self.modified_adj = modified_adj
Beispiel #25
0
    def fit(self,
            features,
            adj,
            labels,
            idx_train,
            idx_val=None,
            train_iters=200,
            initialize=True,
            verbose=False,
            normalize=True,
            patience=500,
            **kwargs):
        """Train the gcn model, when idx_val is not None, pick the best model according to the validation loss.

        Parameters
        ----------
        features :
            node features
        adj :
            the adjacency matrix. The format could be torch.tensor or scipy matrix
        labels :
            node labels
        idx_train :
            node training indices
        idx_val :
            node validation indices. If not given (None), GCN training process will not adpot early stopping
        train_iters : int
            number of training epochs
        initialize : bool
            whether to initialize parameters before training
        verbose : bool
            whether to show verbose logs
        normalize : bool
            whether to normalize the input adjacency matrix.
        patience : int
            patience for early stopping, only valid when `idx_val` is given
        """

        self.device = self.gc1.weight.device
        if initialize:
            self.initialize()

        if type(adj) is not torch.Tensor:
            features, adj, labels = utils.to_tensor(features,
                                                    adj,
                                                    labels,
                                                    device=self.device)
        else:
            features = features.to(self.device)
            adj = adj.to(self.device)
            labels = labels.to(self.device)

        if normalize:
            if utils.is_sparse_tensor(adj):
                adj_norm = utils.normalize_adj_tensor(adj, sparse=True)
            else:
                adj_norm = utils.normalize_adj_tensor(adj)
        else:
            adj_norm = adj

        self.adj_norm = adj_norm
        self.features = features
        self.labels = labels

        if idx_val is None:
            self._train_without_val(labels, idx_train, train_iters, verbose)
        else:
            if patience < train_iters:
                self._train_with_early_stopping(labels, idx_train, idx_val,
                                                train_iters, patience, verbose)
            else:
                self._train_with_val(labels, idx_train, idx_val, train_iters,
                                     verbose)
Beispiel #26
0
    def fit(
        self,
        features,
        adj,
        labels,
        idx_train,
        idx_val=None,
        idx_test=None,
        train_iters=81,
        att_0=None,
        attention=False,
        model_name=None,
        initialize=True,
        verbose=False,
        normalize=False,
        patience=510,
    ):
        '''
            train the gcn model, when idx_val is not None, pick the best model
            according to the validation loss
        '''
        """SAINT Sampler"""
        """form data"""

        data = Data(adj=adj,
                    features=features.to_dense(),
                    labels=labels,
                    idx_train=idx_train,
                    idx_val=idx_val,
                    idx_test=idx_test,
                    num_node_features=int(features.shape[-1]),
                    num_classes=int(labels.max() + 1))
        data.num_nodes = 2110
        data.num_classes = int(labels.max() + 1)
        data.num_node_features = int(features.shape[-1])

        # loader = GraphSAINTRandomWalkSampler(data, batch_size=6000, walk_length=2,
        #                                      num_steps=5, sample_coverage=1000,
        #                                      save_dir='saint_data/',
        #                                      num_workers=1)

        self.sim = None
        self.idx_test = idx_test
        self.attention = attention

        if initialize:
            self.initialize()

        if type(adj) is not torch.Tensor:
            features, adj, labels = utils.to_tensor(features,
                                                    adj,
                                                    labels,
                                                    device=self.device)
        else:
            features = features.to(self.device)
            adj = adj.to(self.device)
            labels = labels.to(self.device)

        # normalize = False # we don't need normalize here, the norm is conducted in the GCN (self.gcn1) model
        # if normalize:
        #     if utils.is_sparse_tensor(adj):
        #         adj_norm = utils.normalize_adj_tensor(adj, sparse=True)
        #     else:
        #         adj_norm = utils.normalize_adj_tensor(adj)
        # else:
        #     adj_norm = adj
        # add self loop
        adj = self.add_loop_sparse(adj)
        """The normalization gonna be done in the GCNConv"""
        self.adj_norm = adj
        self.features = features
        self.labels = labels

        # if idx_val is None:
        #     self._train_without_val(labels, idx_train, train_iters, verbose)
        # else:
        #     if patience < train_iters:
        #         self._train_with_early_stopping(labels, idx_train, idx_val, train_iters, patience, verbose)
        #     else:
        self._train_with_val(labels, idx_train, idx_val, train_iters, verbose)
Beispiel #27
0
    def attack(self,
               ori_features,
               ori_adj,
               labels,
               idx_train,
               idx_unlabeled,
               n_perturbations,
               ll_constraint=True,
               ll_cutoff=0.004):
        """Generate n_perturbations on the input graph.
        Parameters
        ----------
        ori_features :
            Original (unperturbed) node feature matrix
        ori_adj :
            Original (unperturbed) adjacency matrix
        labels :
            node labels
        idx_train :
            node training indices
        idx_unlabeled:
            unlabeled nodes indices
        n_perturbations : int
            Number of perturbations on the input graph. Perturbations could
            be edge removals/additions or feature removals/additions.
        ll_constraint: bool
            whether to exert the likelihood ratio test constraint
        ll_cutoff : float
            The critical value for the likelihood ratio test of the power law distributions.
            See the Chi square distribution with one degree of freedom. Default value 0.004
            corresponds to a p-value of roughly 0.95. It would be ignored if `ll_constraint`
            is False.
        """
        ori_adj, ori_features, labels = utils.to_tensor(ori_adj,
                                                        ori_features,
                                                        labels,
                                                        device=self.device)
        labels_self_training = self.self_training_label(labels, idx_train)
        self.sparse_features = sp.issparse(ori_features)
        modified_adj = ori_adj
        modified_features = ori_features

        for i in tqdm(range(n_perturbations), desc="Perturbing graph"):
            self._initialize()

            if self.attack_structure:
                modified_adj = self.get_modified_adj(ori_adj)
                self.adj_grad_sum.data.fill_(0)
            if self.attack_features:
                modified_features = ori_features + self.feature_changes
                self.feature_grad_sum.data.fill_(0)

            self.inner_train(modified_features, modified_adj, idx_train,
                             idx_unlabeled, labels, labels_self_training)

            adj_meta_score = torch.tensor(0.0).to(self.device)
            feature_meta_score = torch.tensor(0.0).to(self.device)

            if self.attack_structure:
                adj_meta_score = self.get_adj_score(self.adj_grad_sum,
                                                    modified_adj, ori_adj,
                                                    ll_constraint, ll_cutoff)
            if self.attack_features:
                feature_meta_score = self.get_feature_score(
                    self.feature_grad_sum, modified_features)

            if adj_meta_score.max() >= feature_meta_score.max():
                adj_meta_argmax = torch.argmax(adj_meta_score)
                row_idx, col_idx = utils.unravel_index(adj_meta_argmax,
                                                       ori_adj.shape)
                self.adj_changes.data[row_idx][col_idx] += (
                    -2 * modified_adj[row_idx][col_idx] + 1)
                self.adj_changes.data[col_idx][row_idx] += (
                    -2 * modified_adj[row_idx][col_idx] + 1)
            else:
                feature_meta_argmax = torch.argmax(feature_meta_score)
                row_idx, col_idx = utils.unravel_index(feature_meta_argmax,
                                                       ori_features.shape)
                self.features_changes.data[row_idx][col_idx] += (
                    -2 * modified_features[row_idx][col_idx] + 1)

        if self.attack_structure:
            self.modified_adj = self.get_modified_adj(ori_adj).detach()
        if self.attack_features:
            self.modified_features = self.get_modified_features(
                ori_features).detach()
Beispiel #28
0
    def attack(self,
               ori_features,
               ori_adj,
               labels,
               idx_train,
               target_node,
               n_perturbations,
               steps=10,
               **kwargs):
        """Generate perturbations on the input graph.

        Parameters
        ----------
        ori_features :
            Original (unperturbed) node feature matrix
        ori_adj :
            Original (unperturbed) adjacency matrix
        labels :
            node labels
        idx_train :
            node training indices
        target_node : int
            target node index to be attacked
        n_perturbations : int
            Number of perturbations on the input graph. Perturbations could
            be edge removals/additions or feature removals/additions.
        steps : int
            steps for computing integrated gradients
        """

        self.surrogate.eval()
        self.target_node = target_node

        modified_adj = ori_adj.todense()
        modified_features = ori_features.todense()
        adj, features, labels = utils.to_tensor(modified_adj,
                                                modified_features,
                                                labels,
                                                device=self.device)

        adj_norm = utils.normalize_adj_tensor(adj)
        s_e = np.zeros(adj.shape[1])
        s_f = np.zeros(features.shape[1])
        if self.attack_structure:
            s_e = self.calc_importance_edge(features, adj_norm, labels,
                                            idx_train, steps)
        if self.attack_features:
            s_f = self.calc_importance_feature(features, adj_norm, labels,
                                               idx_train, steps)

        for t in (range(n_perturbations)):
            s_e_max = np.argmax(s_e)
            s_f_max = np.argmax(s_f)
            if s_e[s_e_max] >= s_f[s_f_max]:
                value = np.abs(1 - modified_adj[target_node, s_e_max])
                modified_adj[target_node, s_e_max] = value
                modified_adj[s_e_max, target_node] = value
                s_e[s_e_max] = 0
            else:
                modified_features[target_node, s_f_max] = np.abs(
                    1 - modified_features[target_node, s_f_max])
                s_f[s_f_max] = 0

        self.modified_adj = sp.csr_matrix(modified_adj)
        self.modified_features = sp.csr_matrix(modified_features)
        self.check_adj(modified_adj)
Beispiel #29
0
    def fit(
        self,
        features,
        adj,
        labels,
        idx_train,
        idx_val=None,
        idx_test=None,
        train_iters=101,
        att_0=None,
        attention=False,
        model_name=None,
        initialize=True,
        verbose=False,
        normalize=False,
        patience=500,
    ):
        '''
            train the gcn model, when idx_val is not None, pick the best model
            according to the validation loss
        '''
        self.sim = None
        self.attention = attention
        if self.attention:
            att_0 = att_coef(features, adj)
            adj = att_0  # update adj
            self.sim = att_0  # update att_0

        self.idx_test = idx_test
        # self.model_name = model_name
        # self.device = self.gc1.weight.device

        if initialize:
            self.initialize()

        if type(adj) is not torch.Tensor:
            features, adj, labels = utils.to_tensor(features,
                                                    adj,
                                                    labels,
                                                    device=self.device)
        else:
            features = features.to(self.device)
            adj = adj.to(self.device)
            labels = labels.to(self.device)

        normalize = False  # we don't need normalize here, the norm is conducted in the GCN (self.gcn1) model
        if normalize:
            if utils.is_sparse_tensor(adj):
                adj_norm = utils.normalize_adj_tensor(adj, sparse=True)
            else:
                adj_norm = utils.normalize_adj_tensor(adj)
        else:
            adj_norm = adj
        """Make the coefficient D^{-1/2}(A+I)D^{-1/2}"""
        self.adj_norm = adj_norm
        self.features = features
        self.labels = labels

        if idx_val is None:
            self._train_without_val(labels, idx_train, train_iters, verbose)
        else:
            if patience < train_iters:
                self._train_with_early_stopping(labels, idx_train, idx_val,
                                                train_iters, patience, verbose)
            else:
                self._train_with_val(labels, idx_train, idx_val, train_iters,
                                     verbose)
Beispiel #30
0
    def fit(self,
            features,
            adj,
            labels,
            idx_train,
            idx_val=None,
            train_iters=200,
            verbose=True,
            **kwargs):
        """Train RGCN.

        Parameters
        ----------
        features :
            node features
        adj :
            the adjacency matrix. The format could be torch.tensor or scipy matrix
        labels :
            node labels
        idx_train :
            node training indices
        idx_val :
            node validation indices. If not given (None), GCN training process will not adpot early stopping
        train_iters : int
            number of training epochs
        verbose : bool
            whether to show verbose logs

        Examples
        --------
        We can first load dataset and then train RGCN.

        >>> from deeprobust.graph.data import PrePtbDataset, Dataset
        >>> from deeprobust.graph.defense import RGCN
        >>> # load clean graph data
        >>> data = Dataset(root='/tmp/', name='cora', seed=15)
        >>> adj, features, labels = data.adj, data.features, data.labels
        >>> idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test
        >>> # load perturbed graph data
        >>> perturbed_data = PrePtbDataset(root='/tmp/', name='cora')
        >>> perturbed_adj = perturbed_data.adj
        >>> # train defense model
        >>> model = RGCN(nnodes=perturbed_adj.shape[0], nfeat=features.shape[1],
                         nclass=labels.max()+1, nhid=32, device='cpu')
        >>> model.fit(features, perturbed_adj, labels, idx_train, idx_val,
                      train_iters=200, verbose=True)
        >>> model.test(idx_test)

        """

        adj, features, labels = utils.to_tensor(adj.todense(),
                                                features.todense(),
                                                labels,
                                                device=self.device)

        self.features, self.labels = features, labels
        self.adj_norm1 = self._normalize_adj(adj, power=-1 / 2)
        self.adj_norm2 = self._normalize_adj(adj, power=-1)
        print('=== training rgcn model ===')
        self._initialize()
        if idx_val is None:
            self._train_without_val(labels, idx_train, train_iters, verbose)
        else:
            self._train_with_val(labels, idx_train, idx_val, train_iters,
                                 verbose)