Ejemplo n.º 1
0
class GCN(nn.Module):

    # 初始化层:输入feature,输出feature,权重,偏移
    def __init__(self, in_features, out_features, bias=True):
        super(GCN, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(torch.FloatTensor(
            in_features, out_features))  # FloatTensor建立tensor
        # 常见用法self.v = torch.nn.Parameter(torch.FloatTensor(hidden_size)):
        # 首先可以把这个函数理解为类型转换函数,将一个不可训练的类型Tensor转换成可以训练的类型parameter并将这个parameter
        # 绑定到这个module里面,所以经过类型转换这个self.v变成了模型的一部分,成为了模型中根据训练可以改动的参数了。
        # 使用这个函数的目的也是想让某些变量在学习的过程中不断的修改其值以达到最优化。
        if bias:
            self.bias = Parameter(torch.FloatTensor(out_features))
        else:
            self.register_parameter("bias", None)
            # Parameters与register_parameter都会向parameters写入参数,但是后者可以支持字符串命名
        # self.reset_parameters()

    # # 初始化权重
    # def reset_parameters(self):
    #     stdv = 1.0 / math.sqrt(self.weight.size(1))
    #     # size()函数主要是用来统计矩阵元素个数,或矩阵某一维上的元素个数的函数  size(1)为行
    #     self.weight.data.uniform_(-stdv, stdv)  # uniform() 方法将随机生成下一个实数,它在 [x, y] 范围内
    #     if self.bias is not None:
    #         self.bias.data.uniform_(-stdv, stdv)
    """
    前馈运算 即计算A~ X W(0)
    input X与权重W相乘,然后adj矩阵与他们的积稀疏乘
    直接输入与权重之间进行torch.mm操作,得到support,即XW
    support与adj进行torch.spmm操作,得到output,即AXW选择是否加bias
    """

    def forward(self, input, adj):
        support = torch.mm(input.cpu(), self.weight.cpu())
        # torch.mm(a, b)是矩阵a和b矩阵相乘,torch.mul(a, b)是矩阵a和b对应位相乘,a和b的维度必须相等
        output = torch.spmm(adj.cpu(), support.cpu())
        if self.bias is not None:
            return output + self.bias.cpu()
        else:
            return output

    # 通过设置断点,可以看出output的形式是0.01,0.01,0.01,0.01,0.01,#0.01,0.94],里面的值代表该x对应标签不同的概率,故此值可转换为#[0,0,0,0,0,0,1],对应我们之前把标签onthot后的第七种标签

    def __repr__(self):
        return (self.__class__.__name__ + " (" + str(self.in_features) +
                " -> " + str(self.out_features) + ")")
Ejemplo n.º 2
0
class CosineGraphAttentionLayer(nn.Module):
    def __init__(self, requires_grad=True):
        super(CosineGraphAttentionLayer, self).__init__()
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        if requires_grad:
            # unifrom initialization
            self.beta = Parameter(torch.Tensor(1).uniform_(0, 1),
                                  requires_grad=requires_grad)
        else:
            self.beta = torch.autograd.Variable(
                torch.zeros(1), requires_grad=requires_grad).to(device)
        self.epoch_count = 0
        self.timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())

    def forward(self, xi, xj, adj=1.0):
        xi_norm2 = torch.norm(xi, 2, 1).view(-1, 1)
        xj_norm2 = torch.norm(xj, 2, 1).view(-1, 1).t()

        # add a minor constant (1e-7) to denominator to prevent division by
        # zero error
        cos = self.beta * torch.div(torch.mm(xi, xj.t()),
                                    torch.mm(xi_norm2, xj_norm2) + 1e-7)

        # neighborhood masking (inspired by this repo:
        # https://github.com/danielegrattarola/keras-gat)
        if isinstance(adj, (float, int)):
            adj = torch.eye(xi.shape[0]).cuda()
        else:
            adj = to_dense(adj)
        mask = (1. - to_dense(adj)) * -1e9
        masked = cos + mask
        # masked = to_sparse(masked)
        # propagation matrix
        # sparsemax = Sparsemax(dim=1)
        # P = sparsemax(masked)
        P = F.softmax(masked, dim=1)
        # attention-guided propagation

        self.epoch_count += 1

        output = torch.mm(P, xj)
        return output

    def save_attention(self, P):
        if self.epoch_count % 25 == 0:
            print('Saving Attention for {}{}'.format(P.shape[0], P.shape[1]))
            save_file = 'tmp/GCNRx_attention_weight_{}_{}_{}'.format(
                self.timestamp, P.shape[0], P.shape[1]) + '.pkl'
            with open(save_file, 'wb') as f:
                pickle.dump([
                    P.cpu().detach().numpy(),
                    self.beta.cpu().detach().numpy()
                ], f)

    def __repr__(self):
        return self.__class__.__name__ + ' ()'
Ejemplo n.º 3
0
Archivo: model.py Proyecto: yogytes/GGN
class Gumbel_Generator(nn.Module):
    def __init__(self, sz=10, temp=10, temp_drop_frac=0.9999):
        super(Gumbel_Generator, self).__init__()
        self.gen_matrix = Parameter(torch.rand(sz, sz, 2))
        #gen_matrix 为邻接矩阵的概率
        self.temperature = temp
        self.temp_drop_frac = temp_drop_frac

    def drop_temperature(self):
        # 降温过程
        self.temperature = self.temperature * self.temp_drop_frac

    def sample(self, hard=False):
        # 采样——得到一个临近矩阵
        self.logp = self.gen_matrix.view(-1, 2)
        out = gumbel_softmax(self.logp, self.temperature, hard)
        if hard:
            hh = torch.zeros(self.gen_matrix.size()[0]**2, 2)
            for i in range(out.size()[0]):
                hh[i, out[i]] = 1
            out = hh
        if use_cuda:
            out = out.cuda()
        out_matrix = out[:, 0].view(self.gen_matrix.size()[0],
                                    self.gen_matrix.size()[0])
        return out_matrix

    def get_temperature(self):
        return self.temperature

    def get_cross_entropy(self, obj_matrix):
        # 计算与目标矩阵的距离
        logps = F.softmax(self.gen_matrix, 2)
        logps = torch.log(logps[:, :, 0] + 1e-10) * obj_matrix + torch.log(
            logps[:, :, 1] + 1e-10) * (1 - obj_matrix)
        result = -torch.sum(logps)
        result = result.cpu() if use_cuda else result
        return result.data.numpy()

    def get_entropy(self):
        logps = F.softmax(self.gen_matrix, 2)
        result = torch.mean(torch.sum(logps * torch.log(logps + 1e-10), 1))
        result = result.cpu() if use_cuda else result
        return (-result.data.numpy())

    def randomization(self, fraction):
        # 将gen_matrix重新随机初始化,fraction为重置比特的比例
        sz = self.gen_matrix.size()[0]
        numbers = int(fraction * sz * sz)
        original = self.gen_matrix.cpu().data.numpy()

        for i in range(numbers):
            ii = np.random.choice(range(sz), (2, 1))
            z = torch.rand(2).cuda() if use_cuda else torch.rand(2)
            self.gen_matrix.data[ii[0], ii[1], :] = z
class SpectralAttack(BaseAttack):
    """
    Spectral attack for graph data.

    Parameters

    """
    def __init__(self,
                 model=None,
                 nnodes=None,
                 loss_type='CE',
                 feature_shape=None,
                 attack_structure=True,
                 attack_features=False,
                 regularization_weight=0.0,
                 device='cpu'):

        super(SpectralAttack, self).__init__(model, nnodes, attack_structure,
                                             attack_features, device)

        assert attack_structure or attack_features, 'attack_feature or attack_structure cannot be both False'

        self.loss_type = loss_type
        self.modified_adj = None
        self.modified_features = None
        self.regularization_weight = regularization_weight

        if attack_features:
            assert True, 'Current Spectral Attack does not support attack feature'

        if attack_structure:
            assert nnodes is not None, 'Please give nnodes='
            self.adj_changes = Parameter(
                torch.FloatTensor(int(nnodes * (nnodes - 1) / 2)))
            self.adj_changes.data.fill_(0)

        self.complementary = None

    def attack(self,
               ori_features,
               ori_adj,
               labels,
               idx_target,
               idx_train,
               idx_test,
               n_perturbations,
               att_lr,
               epochs=200,
               verbose=False,
               reduction='mean',
               **kwargs):
        """
        Generate perturbations on the input graph
        """

        victim_model = self.surrogate

        self.sparse_features = sp.issparse(ori_features)
        # ori_adj, ori_features, labels = utils.to_tensor(ori_adj, ori_features, labels, device=self.device)
        ori_adj_norm = utils.normalize_adj_tensor(ori_adj, device=self.device)
        ori_e, ori_v = torch.symeig(ori_adj_norm, eigenvectors=True)

        victim_model.eval()
        for t in tqdm(range(epochs), desc='Perturb Adj'):
            modified_adj = self.get_modified_adj(ori_adj)
            adj_norm = utils.normalize_adj_tensor(modified_adj,
                                                  device=self.device)
            output = victim_model(
                ori_features,
                adj_norm)  # forward of gcn need to normalize adj first
            self.loss = self._loss(output[idx_target], labels[idx_target])

            # New: add regularization term for spectral distance
            eigen_mse = 0
            eigen_norm = self.norm = torch.norm(ori_e)
            if self.regularization_weight != 0:
                e, v = torch.symeig(adj_norm, eigenvectors=True)
                # eigen_mse = F.mse_loss(ori_e, e, reduction=reduction)
                eigen_mse = torch.norm(ori_e, e)
            reg_loss = eigen_mse / eigen_norm * self.regularization_weight

            if verbose and t % 20 == 0:
                loss_target, acc_target = calc_acc(output, labels, idx_target)
                print('-- Epoch {}, '.format(t),
                      'class loss = {:.4f} | '.format(self.loss.item()),
                      'reg loss = {:.8f} | '.format(reg_loss),
                      'eigen_mse = {:.8f} | '.format(eigen_mse),
                      'eigen_norm = {:.4f} | '.format(eigen_norm),
                      'acc = {}'.format(acc_target))

            self.loss += reg_loss
            adj_grad = torch.autograd.grad(self.loss, self.adj_changes)[0]

            if self.loss_type == 'CE':
                # lr = 200 / np.sqrt(t+1)
                lr = att_lr / np.sqrt(t + 1)
                self.adj_changes.data.add_(lr * adj_grad)

            if self.loss_type == 'CW':
                # lr = 0.1 / np.sqrt(t+1)
                lr = att_lr / np.sqrt(t + 1)
                self.adj_changes.data.add_(lr * adj_grad)

            self.projection(n_perturbations)

        self.random_sample(ori_adj, ori_features, labels, idx_target,
                           n_perturbations)
        self.modified_adj = self.get_modified_adj(ori_adj).detach()
        self.check_adj_tensor(self.modified_adj)

        # for sanity check
        ori_adj_norm = utils.normalize_adj_tensor(ori_adj, device=self.device)
        ori_e, ori_v = torch.symeig(ori_adj_norm, eigenvectors=True)
        adj_norm = utils.normalize_adj_tensor(self.modified_adj,
                                              device=self.device)
        e, v = torch.symeig(adj_norm, eigenvectors=True)

        self.adj = ori_adj.detach()
        self.labels = labels.detach()
        self.ori_e = ori_e
        self.ori_v = ori_v
        self.e = e
        self.v = v

    def random_sample(self, ori_adj, ori_features, labels, idx_target,
                      n_perturbations):
        K = 10
        best_loss = -1000
        victim_model = self.surrogate
        with torch.no_grad():
            s = self.adj_changes.cpu().detach().numpy()
            for i in range(K):
                sampled = np.random.binomial(1, s)
                # randm = np.random.uniform(size=s.shape[0])
                # sampled = np.where(s > randm, 1, 0)

                # if sampled.sum() > n_perturbations:
                #     continue
                while sampled.sum() > n_perturbations:
                    sampled = np.random.binomial(1, s)

                self.adj_changes.data.copy_(torch.tensor(sampled))
                modified_adj = self.get_modified_adj(ori_adj)
                adj_norm = utils.normalize_adj_tensor(modified_adj,
                                                      device=self.device)
                output = victim_model(ori_features, adj_norm)
                loss = self._loss(output[idx_target], labels[idx_target])
                # loss = F.nll_loss(output[idx_target], labels[idx_target])
                # print(loss)
                if best_loss < loss:
                    best_loss = loss
                    best_s = sampled
            self.adj_changes.data.copy_(torch.tensor(best_s))

    def get_modified_adj(self, ori_adj):

        if self.complementary is None:
            self.complementary = (torch.ones_like(ori_adj) - torch.eye(
                self.nnodes).to(self.device) - ori_adj) - ori_adj

        m = torch.zeros((self.nnodes, self.nnodes)).to(self.device)
        tril_indices = torch.tril_indices(row=self.nnodes,
                                          col=self.nnodes,
                                          offset=-1)
        m[tril_indices[0], tril_indices[1]] = self.adj_changes
        m = m + m.t()
        modified_adj = self.complementary * m + ori_adj

        return modified_adj

    def projection(self, n_perturbations):
        if torch.clamp(self.adj_changes, 0, 1).sum() > n_perturbations:
            left = (self.adj_changes - 1).min()
            right = self.adj_changes.max()
            miu = self.bisection(left, right, n_perturbations, epsilon=1e-5)
            self.adj_changes.data.copy_(
                torch.clamp(self.adj_changes.data - miu, min=0, max=1))
        else:
            self.adj_changes.data.copy_(
                torch.clamp(self.adj_changes.data, min=0, max=1))

    def _loss(self, output, labels):
        if self.loss_type == "CE":
            loss = F.nll_loss(output, labels)
        if self.loss_type == "CW":
            onehot = utils.tensor2onehot(labels)
            best_second_class = (output - 1000 * onehot).argmax(1).detach()
            margin = output[np.arange(len(output)), labels] - \
                   output[np.arange(len(output)), best_second_class]
            k = 0
            loss = -torch.clamp(margin, min=k).mean()
            # loss = torch.clamp(margin.sum()+50, min=k)
        return loss

    def bisection(self, a, b, n_perturbations, epsilon):
        def func(x):
            return torch.clamp(self.adj_changes - x, 0,
                               1).sum() - n_perturbations

        miu = a
        while ((b - a) >= epsilon):
            miu = (a + b) / 2
            # Check if middle point is root
            if (func(miu) == 0.0):
                break
            # Decide the side to repeat the steps
            if (func(miu) * func(a) < 0):
                b = miu
            else:
                a = miu
        # print("The value of root is : ","%.4f" % miu)
        return miu
Ejemplo n.º 5
0
class RBFI(nn.Module):
    def __init__(self,
                 in_features,
                 out_features,
                 andor="*",
                 modinf=False,
                 regular_deriv=False,
                 min_input=0.0,
                 max_input=1.0,
                 min_slope=0.001,
                 max_slope=10.0):
        """
        Implementation of RBF module with logloss.
        :param in_features: Number of input features.
        :param out_features: Number of output features.
        :param andor: '^' for and, 'v' for or, '*' for mixed.
        :param modinf: Whether to aggregate using max (if True) of sum (if False).
        :param regular_deriv: Whether to use regular derivatives or not.
        :param min_input: minimum value for w (and therefore min value for input)
        :param max_input: max, as above.
        :param min_slope: min value for u, defining the slope.
        :param max_slope: max value for u, defining the slope.
        """
        super(RBFI, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.andor = andor
        self.modinf = modinf
        self.regular_deriv = regular_deriv
        self.w = BoundedParameter(torch.Tensor(out_features, in_features),
                                  lower_bound=min_input,
                                  upper_bound=max_input)
        self.u = BoundedParameter(torch.Tensor(out_features, in_features),
                                  lower_bound=min_slope,
                                  upper_bound=max_slope)
        if andor == 'v':
            self.andor01 = Parameter(torch.ones((1, out_features)))
        elif andor == '^':
            self.andor01 = Parameter(torch.zeros((
                1,
                out_features,
            )))
        else:
            self.andor01 = Parameter(torch.Tensor(
                1,
                out_features,
            ))
            self.andor01.data.random_(0, 2)
        self.andor01.requires_grad = False
        self.w.data.uniform_(min_input, max_input)
        # Initialization of u.
        self.u.data.uniform_(0.2, 0.7)  # These could be parameters.
        self.u.data.clamp_(min_slope, max_slope)

    def dumps(self):
        """Writes itself to a string."""
        # Creates a dictionary
        d = dict(
            in_features=self.in_features,
            out_features=self.out_features,
            min_input=self.w.lower_bound,
            max_input=self.w.upper_bound,
            min_slope=self.u.lower_bound,
            max_slope=self.u.upper_bound,
            modinf=self.modinf,
            regular_deriv=self.regular_deriv,
            andor=self.andor,
            andor01=self.andor01.cpu().numpy(),
            u=self.u.data.cpu().numpy(),
            w=self.w.data.cpu().numpy(),
        )
        return Serializable.dumps(d)

    @staticmethod
    def loads(s, device):
        """Reads itself from string s."""
        d = Serializable.loads(s)
        m = RBFI(d['in_features'],
                 d['out_features'],
                 andor=d['andor'],
                 modinf=d['modinf'],
                 regular_deriv=d['regular_deriv'],
                 min_input=d['min_input'],
                 max_input=d['max_input'],
                 min_slope=d['min_slope'],
                 max_slope=d['max_slope'])
        m.u.data = torch.from_numpy(d['u']).to(device)
        m.w.data = torch.from_numpy(d['w']).to(device)
        m.andor01.data = torch.from_numpy(d['andor01']).to(device)
        return m

    def forward(self, x):
        # Let n be the input size, and m the output size.
        # The tensor x is of shape * n. To make room for the output,
        # we view it as of shape * 1 n.
        s = list(x.shape)
        new_s = s[:-1] + [1, s[-1]]
        xx = x.view(*new_s)
        xuw = self.u * (xx - self.w)
        xuwsq = xuw * xuw
        # Aggregates into a modulus.
        if self.modinf:
            # We want to get the largest square, which is the min one as we changed signs.
            if self.regular_deriv:
                z, _ = torch.max(xuwsq, -1)
                y = torch.exp(-z)
            else:
                z = SharedFeedbackMax.apply(xuwsq)
                y = LargeAttractorExp.apply(z)
        else:
            z = torch.sum(xuwsq, -1)
            if self.regular_deriv:
                y = torch.exp(-z)
            else:
                y = LargeAttractorExp.apply(z)
        # Takes into account and-orness.
        if self.andor == '^':
            return y
        elif self.andor == 'v':
            return 1.0 - y
        else:
            return y + self.andor01 * (1.0 - 2.0 * y)

    def overall_sensitivity(self):
        """Returns the sensitivity to adversarial examples of the layer."""
        if self.modinf:
            s = torch.max(torch.max(self.u, -1)[0], -1)[0].item()
        else:
            s = torch.max(torch.sqrt(torch.sum(self.u * self.u, -1)))[0].item()
        s *= np.sqrt(2. / np.e)
        return s

    def sensitivity(self, previous_layer):
        """Given the sensitivity of the previous layer (a vector of length equal
        to the number of inputs), it computes the sensitivity to adversarial examples
         of the current layer, as a vector of length equal to the output size of the
         layer.  If the input sensitivity of the previous layer is None, then unit
         sensitivity is assumed."""
        if previous_layer is None:
            previous_layer = self.w.new(1, self.in_features)
            previous_layer.fill_(1.)
        else:
            previous_layer = previous_layer.view(1, self.in_features)
        u_prod = previous_layer * self.u
        if self.modinf:
            # s = torch.max(u_prod, -1)[0]
            s = SharedFeedbackMax.apply(u_prod)
        else:
            s = torch.sqrt(torch.sum(u_prod * u_prod, -1))
        s = s * np.sqrt(2. / np.e)
        return s
Ejemplo n.º 6
0
class BiAAttention(nn.Module):
    '''
    Bi-Affine attention layer.
    '''
    def __init__(self,
                 input_size_decoder,
                 input_size_encoder,
                 num_labels,
                 biaffine=True,
                 **kwargs):
        '''

        Args:
            input_size_encoder: int
                the dimension of the encoder input.
            input_size_decoder: int
                the dimension of the decoder input.
            num_labels: int
                the number of labels of the crf layer
            biaffine: bool
                if apply bi-affine parameter.
            **kwargs:
        '''
        super(BiAAttention, self).__init__()
        self.input_size_encoder = input_size_encoder
        self.input_size_decoder = input_size_decoder
        self.num_labels = num_labels
        self.biaffine = biaffine

        self.W_d = Parameter(
            torch.Tensor(self.num_labels, self.input_size_decoder))
        self.W_e = Parameter(
            torch.Tensor(self.num_labels, self.input_size_encoder))
        self.b = Parameter(torch.Tensor(self.num_labels, 1, 1))
        if self.biaffine:
            self.U = Parameter(
                torch.Tensor(self.num_labels, self.input_size_decoder,
                             self.input_size_encoder))
        else:
            self.register_parameter('U', None)

        self.reset_parameters()

    def reset_parameters(self):
        nn.init.xavier_uniform(self.W_d)
        nn.init.xavier_uniform(self.W_e)
        nn.init.constant(self.b, 0.)
        if self.biaffine:
            nn.init.xavier_uniform(self.U)

    def forward(self, input_d, input_e, mask_d=None, mask_e=None):
        '''

        Args:
            input_d: Tensor
                the decoder input tensor with shape = [batch, length_decoder, input_size]
            input_e: Tensor
                the child input tensor with shape = [batch, length_encoder, input_size]
            mask_d: Tensor or None
                the mask tensor for decoder with shape = [batch, length_decoder]
            mask_e: Tensor or None
                the mask tensor for encoder with shape = [batch, length_encoder]

        Returns: Tensor
            the energy tensor with shape = [batch, num_label, length, length]

        '''
        assert input_d.size(0) == input_e.size(
            0), 'batch sizes of encoder and decoder are requires to be equal.'
        batch, length_decoder, _ = input_d.size()
        _, length_encoder, _ = input_e.size()

        # compute decoder part: [num_label, input_size_decoder] * [batch, input_size_decoder, length_decoder]
        # the output shape is [batch, num_label, length_decoder]
        out_d = torch.matmul(self.W_d, input_d.transpose(1, 2)).unsqueeze(3)
        # compute decoder part: [num_label, input_size_encoder] * [batch, input_size_encoder, length_encoder]
        # the output shape is [batch, num_label, length_encoder]
        out_e = torch.matmul(self.W_e, input_e.transpose(1, 2)).unsqueeze(2)

        # output shape [batch, num_label, length_decoder, length_encoder]
        if self.biaffine:
            # compute bi-affine part
            # [batch, 1, length_decoder, input_size_decoder] * [num_labels, input_size_decoder, input_size_encoder]
            # output shape [batch, num_label, length_decoder, input_size_encoder]
            # output = torch.matmul(input_d.unsqueeze(1), self.U)
            output = torch.matmul(input_d.unsqueeze(1).cpu(),
                                  self.U.cpu()).cuda()
            # [batch, num_label, length_decoder, input_size_encoder] * [batch, 1, input_size_encoder, length_encoder]
            # output shape [batch, num_label, length_decoder, length_encoder]
            # output = torch.matmul(output, input_e.unsqueeze(1).transpose(2, 3))
            output = torch.matmul(output.cpu(),
                                  input_e.unsqueeze(1).transpose(
                                      2, 3).cpu()).cuda()
            # output = output + self.b
            output = output + out_d + out_e + self.b
        else:
            output = out_d + out_e + self.b

        if mask_d is not None:
            output = output * mask_d.unsqueeze(1).unsqueeze(
                3) * mask_e.unsqueeze(1).unsqueeze(2)

        return output
Ejemplo n.º 7
0
class PGDAttack(BaseAttack):
    """PGD attack for graph data.

    Parameters
    ----------
    model :
        model to attack. Default `None`.
    nnodes : int
        number of nodes in the input graph
    loss_type: str
        attack loss type, chosen from ['CE', 'CW']
    feature_shape : tuple
        shape of the input node features
    attack_structure : bool
        whether to attack graph structure
    attack_features : bool
        whether to attack node features
    device: str
        'cpu' or 'cuda'

    Examples
    --------

    >>> from deeprobust.graph.data import Dataset
    >>> from deeprobust.graph.defense import GCN
    >>> from deeprobust.graph.global_attack import PGDAttack
    >>> from deeprobust.graph.utils import preprocess
    >>> data = Dataset(root='/tmp/', name='cora')
    >>> adj, features, labels = data.adj, data.features, data.labels
    >>> adj, features, labels = preprocess(adj, features, labels, preprocess_adj=False) # conver to tensor
    >>> idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test
    >>> # Setup Victim Model
    >>> victim_model = GCN(nfeat=features.shape[1], nclass=labels.max().item()+1,
                        nhid=16, dropout=0.5, weight_decay=5e-4, device='cpu').to('cpu')
    >>> victim_model.fit(features, adj, labels, idx_train)
    >>> # Setup Attack Model
    >>> model = PGDAttack(model=victim_model, nnodes=adj.shape[0], loss_type='CE', device='cpu').to('cpu')
    >>> model.attack(features, adj, labels, idx_train, n_perturbations=10)
    >>> modified_adj = model.modified_adj

    """

    def __init__(self, model=None, nnodes=None, loss_type='CE', feature_shape=None, attack_structure=True, attack_features=False, device='cpu'):

        super(PGDAttack, self).__init__(model, nnodes, attack_structure, attack_features, device)

        assert attack_features or attack_structure, 'attack_features or attack_structure cannot be both False'

        self.loss_type = loss_type
        self.modified_adj = None
        self.modified_features = None

        if attack_structure:
            assert nnodes is not None, 'Please give nnodes='
            self.adj_changes = Parameter(torch.FloatTensor(int(nnodes*(nnodes-1)/2)))
            self.adj_changes.data.fill_(0)

        if attack_features:
            assert True, 'Topology Attack does not support attack feature'

        self.complementary = None

    def attack(self, ori_features, ori_adj, labels, idx_train, n_perturbations, epochs=200, **kwargs):
        """Generate perturbations on the input graph.

        Parameters
        ----------
        ori_features :
            Original (unperturbed) node feature matrix
        ori_adj :
            Original (unperturbed) adjacency matrix
        labels :
            node labels
        idx_train :
            node training indices
        n_perturbations : int
            Number of perturbations on the input graph. Perturbations could
            be edge removals/additions or feature removals/additions.
        epochs:
            number of training epochs

        """

        victim_model = self.surrogate

        self.sparse_features = sp.issparse(ori_features)
        ori_adj, ori_features, labels = utils.to_tensor(ori_adj, ori_features, labels, device=self.device)

        victim_model.eval()
        for t in tqdm(range(epochs)):
            modified_adj = self.get_modified_adj(ori_adj)
            adj_norm = utils.normalize_adj_tensor(modified_adj)
            output = victim_model(ori_features, adj_norm)
            # loss = F.nll_loss(output[idx_train], labels[idx_train])
            loss = self._loss(output[idx_train], labels[idx_train])
            adj_grad = torch.autograd.grad(loss, self.adj_changes)[0]

            if self.loss_type == 'CE':
                lr = 200 / np.sqrt(t+1)
                self.adj_changes.data.add_(lr * adj_grad)

            if self.loss_type == 'CW':
                lr = 0.1 / np.sqrt(t+1)
                self.adj_changes.data.add_(lr * adj_grad)

            self.projection(n_perturbations)

        self.random_sample(ori_adj, ori_features, labels, idx_train, n_perturbations)
        self.modified_adj = self.get_modified_adj(ori_adj).detach()

    def random_sample(self, ori_adj, ori_features, labels, idx_train, n_perturbations):
        K = 20
        best_loss = -1000
        victim_model = self.surrogate
        with torch.no_grad():
            s = self.adj_changes.cpu().detach().numpy()
            for i in range(K):
                sampled = np.random.binomial(1, s)

                print(sampled.sum())
                if sampled.sum() > n_perturbations:
                    continue
                self.adj_changes.data.copy_(torch.tensor(sampled))
                modified_adj = self.get_modified_adj(ori_adj)
                adj_norm = utils.normalize_adj_tensor(modified_adj)
                output = victim_model(ori_features, adj_norm)
                loss = self._loss(output[idx_train], labels[idx_train])
                # loss = F.nll_loss(output[idx_train], labels[idx_train])
                print(loss)
                if best_loss < loss:
                    best_loss = loss
                    best_s = sampled
            self.adj_changes.data.copy_(torch.tensor(best_s))

    def _loss(self, output, labels):
        if self.loss_type == "CE":
            loss = F.nll_loss(output, labels)
        if self.loss_type == "CW":
            onehot = utils.tensor2onehot(labels)
            best_second_class = (output - 1000*onehot).argmax(1)
            margin = output[np.arange(len(output)), labels] - \
                   output[np.arange(len(output)), best_second_class]
            k = 0
            loss = -torch.clamp(margin, min=k).mean()
            # loss = torch.clamp(margin.sum()+50, min=k)
        return loss

    def projection(self, n_perturbations):
        # projected = torch.clamp(self.adj_changes, 0, 1)
        if torch.clamp(self.adj_changes, 0, 1).sum() > n_perturbations:
            left = (self.adj_changes - 1).min()
            right = self.adj_changes.max()
            miu = self.bisection(left, right, n_perturbations, epsilon=1e-5)
            self.adj_changes.data.copy_(torch.clamp(self.adj_changes.data - miu, min=0, max=1))
        else:
            self.adj_changes.data.copy_(torch.clamp(self.adj_changes.data, min=0, max=1))

    def get_modified_adj(self, ori_adj):

        if self.complementary is None:
            self.complementary = (torch.ones_like(ori_adj) - torch.eye(self.nnodes).to(self.device) - ori_adj) - ori_adj

        m = torch.zeros((self.nnodes, self.nnodes)).to(self.device)
        tril_indices = torch.tril_indices(row=self.nnodes-1, col=self.nnodes-1, offset=0)
        m[tril_indices[0], tril_indices[1]] = self.adj_changes
        # m += m.t()
        m = m + m.t()
        modified_adj = self.complementary * m + ori_adj

        return modified_adj

    def bisection(self, a, b, n_perturbations, epsilon):
        def func(x):
            return torch.clamp(self.adj_changes-x, 0, 1).sum() - n_perturbations

        miu = a
        while ((b-a) >= epsilon):
            miu = (a+b)/2
            # Check if middle point is root
            if (func(miu) == 0.0):
                break
            # Decide the side to repeat the steps
            if (func(miu)*func(a) < 0):
                b = miu
            else:
                a = miu
        # print("The value of root is : ","%.4f" % miu)
        return miu
class PGDAttack(BaseAttack):
    """
    Spectral attack for graph data
    """
    def __init__(self,
                 model=None,
                 nnodes=None,
                 loss_type='CE',
                 feature_shape=None,
                 attack_structure=True,
                 attack_features=False,
                 loss_weight=1.0,
                 regularization_weight=0.0,
                 device='cpu'):

        super(PGDAttack, self).__init__(model, nnodes, attack_structure,
                                        attack_features, device)

        assert attack_structure or attack_features, 'attack_feature or attack_structure cannot be both False'

        self.loss_type = loss_type
        self.modified_adj = None
        self.modified_features = None
        self.loss_weight = loss_weight
        self.regularization_weight = regularization_weight

        if attack_features:
            assert True, 'Current Spectral Attack does not support attack feature'

        if attack_structure:
            assert nnodes is not None, 'Please give nnodes='
            self.adj_changes = Parameter(
                torch.FloatTensor(int(nnodes * (nnodes - 1) / 2)))
            torch.nn.init.uniform_(self.adj_changes, 0.0, 0.001)
            # self.adj_changes.data.fill_(0)

        self.complementary = None

    def set_model(self, model):
        self.surrogate = model

    def attack(self,
               ori_features,
               ori_adj,
               labels,
               idx_target,
               n_perturbations,
               att_lr,
               epochs=200,
               distance_type='l2',
               sample_type='sample',
               opt_type='max',
               verbose=True,
               **kwargs):
        """
        Generate perturbations on the input graph
        """

        victim_model = self.surrogate

        self.sparse_features = sp.issparse(ori_features)
        # ori_adj, ori_features, labels = utils.to_tensor(ori_adj, ori_features, labels, device=self.device)
        ori_adj_norm = utils.normalize_adj_tensor(ori_adj, device=self.device)
        ori_e, ori_v = torch.symeig(ori_adj_norm, eigenvectors=True)

        l, r, m = 0, 0, 0
        victim_model.eval()
        # for t in tqdm(range(epochs), desc='Perturb Adj'):
        for t in tqdm(range(epochs)):
            modified_adj = self.get_modified_adj(ori_adj)
            adj_norm = utils.normalize_adj_tensor(modified_adj,
                                                  device=self.device)
            output = victim_model(
                ori_features,
                adj_norm)  # forward of gcn need to normalize adj first
            task_loss = self._loss(output[idx_target], labels[idx_target])

            # spectral distance term for spectral distance
            eigen_mse = torch.tensor(0)
            eigen_self = torch.tensor(0)
            eigen_gf = torch.tensor(0)
            eigen_norm = self.norm = torch.norm(ori_e)
            if self.regularization_weight != 0:
                # add noise to make the graph asymmetric
                modified_adj_noise = modified_adj
                # modified_adj_noise = self.add_random_noise(modified_adj)
                adj_norm_noise = utils.normalize_adj_tensor(modified_adj_noise,
                                                            device=self.device)
                e, v = torch.symeig(adj_norm_noise, eigenvectors=True)
                eigen_mse = torch.norm(ori_e - e)
                eigen_self = torch.norm(e)

                # low-rank loss in GF-attack
                idx = torch.argsort(e)[:128]
                mask = torch.zeros_like(e).bool()
                mask[idx] = True
                eigen_gf = torch.pow(torch.norm(e * mask, p=2), 2) * torch.pow(
                    torch.norm(torch.matmul(v.detach() * mask, ori_features),
                               p=2), 2)

            reg_loss = 0
            if distance_type == 'l2':
                reg_loss = eigen_mse / eigen_norm
            elif distance_type == 'normDiv':
                reg_loss = eigen_self / eigen_norm
            elif distance_type == 'gf':
                reg_loss = eigen_gf
            else:
                exit(f'unknown distance metric: {distance_type}')

            if verbose and t % 20 == 0:
                loss_target, acc_target = calc_acc(output, labels, idx_target)
                print(
                    '-- Epoch {}, '.format(t),
                    'ptb budget/true = {:.1f}/{:.1f}'.format(
                        n_perturbations,
                        torch.clamp(self.adj_changes, 0, 1).sum()),
                    'l/r/m = {:.4f}/{:.4f}/{:.4f}'.format(l, r, m),
                    'class loss = {:.4f} | '.format(task_loss.item()),
                    'reg loss = {:.4f} | '.format(reg_loss.item()),
                    'mse_norm = {:4f} | '.format(eigen_norm),
                    'eigen_mse = {:.4f} | '.format(eigen_mse),
                    'eigen_self = {:.4f} | '.format(eigen_self),
                    'acc/mis = {:.4f}/{:.4f}'.format(acc_target,
                                                     1 - acc_target))

            self.loss = self.loss_weight * task_loss + self.regularization_weight * reg_loss

            adj_grad = torch.autograd.grad(self.loss, self.adj_changes)[0]

            if self.loss_type == 'CE':
                lr = att_lr / np.sqrt(t + 1)
                self.adj_changes.data.add_(lr * adj_grad)

            if self.loss_type == 'CW':
                lr = att_lr / np.sqrt(t + 1)
                self.adj_changes.data.add_(lr * adj_grad)

            # return self.adj_changes.cpu().detach().numpy()

            if verbose and t % 20 == 0:
                print('budget/true={:.1f}/{:.1f}'.format(
                    n_perturbations,
                    torch.clamp(self.adj_changes, 0, 1).sum()))

            if sample_type == 'sample':
                l, r, m = self.projection(n_perturbations)
            elif sample_type == 'greedy':
                self.greedy(n_perturbations)
            elif sample_type == 'greedy2':
                self.greedy2(n_perturbations)
            elif sample_type == 'greedy3':
                self.greedy3(n_perturbations)
            else:
                exit(f"unkown sample type {sample_type}")

            if verbose and t % 20 == 0:
                print('budget/true={:.1f}/{:.1f}'.format(
                    n_perturbations,
                    torch.clamp(self.adj_changes, 0, 1).sum()))

        if sample_type == 'sample':
            self.random_sample(ori_adj, ori_features, labels, idx_target,
                               n_perturbations)
        elif sample_type == 'greedy':
            self.greedy(n_perturbations)
        elif sample_type == 'greedy2':
            self.greedy2(n_perturbations)
        elif sample_type == 'greedy3':
            self.greedy3(n_perturbations)
        else:
            exit(f"unkown sample type {sample_type}")

        print("final ptb budget/true= {:.1f}/{:.1f}".format(
            n_perturbations, self.adj_changes.sum()))
        self.modified_adj = self.get_modified_adj(ori_adj).detach()
        self.check_adj_tensor(self.modified_adj)

        # for sanity check
        ori_adj_norm = utils.normalize_adj_tensor(ori_adj, device=self.device)
        ori_e, ori_v = torch.symeig(ori_adj_norm, eigenvectors=True)
        adj_norm = utils.normalize_adj_tensor(self.modified_adj,
                                              device=self.device)
        e, v = torch.symeig(adj_norm, eigenvectors=True)

        self.adj = ori_adj.detach()
        self.labels = labels.detach()
        self.ori_e = ori_e
        self.ori_v = ori_v
        self.e = e
        self.v = v

    def greedy(self, n_perturbations):
        s = self.adj_changes.cpu().detach().numpy()
        # l = min(s)
        # r = max(s)
        # noise = np.random.normal((l+r)/2, 0.1*(r-l), s.shape)
        # s += noise

        s_vec = np.squeeze(np.reshape(s, (1, -1)))
        # max_index = (-np.absolute(s_vec)).argsort()[:n_perturbations]
        max_index = (-s_vec).argsort()[:n_perturbations]

        mask = np.zeros_like(s_vec)
        mask[max_index] = 1.0

        best_s = np.reshape(mask, s.shape)

        self.adj_changes.data.copy_(
            torch.clamp(torch.tensor(best_s), min=0, max=1))

    def greedy3(self, n_perturbations):
        s = self.adj_changes.cpu().detach().numpy()
        s_vec = np.squeeze(np.reshape(s, (1, -1)))
        # max_index = (-np.absolute(s_vec)).argsort()[:n_perturbations]
        max_index = (s_vec).argsort()[:n_perturbations]

        mask = np.zeros_like(s_vec)
        mask[max_index] = 1.0

        best_s = np.reshape(mask, s.shape)

        self.adj_changes.data.copy_(
            torch.clamp(torch.tensor(best_s), min=0, max=1))

    def greedy2(self, n_perturbations):
        s = self.adj_changes.cpu().detach().numpy()
        l = min(s)
        r = max(s)
        noise = np.random.normal((l + r) / 2, 0.4 * (r - l), s.shape)
        s += noise

        s_vec = np.squeeze(np.reshape(s, (1, -1)))
        max_index = (-np.absolute(s_vec)).argsort()[:n_perturbations]

        mask = np.zeros_like(s_vec)
        mask[max_index] = 1.0

        best_s = np.reshape(mask, s.shape)

        self.adj_changes.data.copy_(
            torch.clamp(torch.tensor(best_s), min=0, max=1))

    def random_sample(self, ori_adj, ori_features, labels, idx_target,
                      n_perturbations):
        K = 10
        best_loss = -1000
        victim_model = self.surrogate
        with torch.no_grad():
            s = self.adj_changes.cpu().detach().numpy()
            for i in range(K):
                sampled = np.random.binomial(1, s)
                # randm = np.random.uniform(size=s.shape[0])
                # sampled = np.where(s > randm, 1, 0)

                # if sampled.sum() > n_perturbations:
                #     continue
                while sampled.sum() > n_perturbations:
                    sampled = np.random.binomial(1, s)
                # if sampled.sum() > n_perturbations:
                #     indices = np.transpose(np.nonzero(sampled))
                #     candidate_idx = [m for m in range(indices.shape[0])]
                #     chosen_idx = np.random.choice(candidate_idx, n_perturbations, replace=False)
                #     chosen_indices = indices[chosen_idx, :]
                #     sampled = np.zeros_like(sampled)
                #     for idx in chosen_indices:
                #         sampled[idx] = 1

                self.adj_changes.data.copy_(torch.tensor(sampled))
                modified_adj = self.get_modified_adj(ori_adj)
                adj_norm = utils.normalize_adj_tensor(modified_adj,
                                                      device=self.device)
                output = victim_model(ori_features, adj_norm)
                loss = self._loss(output[idx_target], labels[idx_target])
                # loss = F.nll_loss(output[idx_target], labels[idx_target])
                # print(loss)
                if best_loss < loss:
                    best_loss = loss
                    best_s = sampled
            self.adj_changes.data.copy_(torch.tensor(best_s))

    def get_modified_adj(self, ori_adj):

        if self.complementary is None:
            self.complementary = (torch.ones_like(ori_adj) - torch.eye(
                self.nnodes).to(self.device) - ori_adj) - ori_adj

        m = torch.zeros((self.nnodes, self.nnodes)).to(self.device)
        tril_indices = torch.tril_indices(row=self.nnodes,
                                          col=self.nnodes,
                                          offset=-1)
        m[tril_indices[0], tril_indices[1]] = self.adj_changes
        m = m + m.t()
        modified_adj = self.complementary * m + ori_adj

        return modified_adj

    def add_random_noise(self, ori_adj):
        noise = 1e-4 * torch.rand(self.nnodes, self.nnodes).to(self.device)
        return (noise + torch.transpose(noise, 0, 1)) / 2.0 + ori_adj

    def projection2(self, n_perturbations):
        s = self.adj_changes.cpu().detach().numpy()
        n = np.squeeze(np.reshape(s, (1, -1))).shape[0]
        self.adj_changes.data.copy_(
            torch.clamp(self.adj_changes.data, min=0, max=n_perturbations / n))
        return 0, 0, 0

    def projection(self, n_perturbations):
        l, r, m = 0, 0, 0
        if torch.clamp(self.adj_changes, 0, 1).sum() > n_perturbations:
            left = (self.adj_changes).min()
            right = self.adj_changes.max()
            miu = self.bisection(left, right, n_perturbations, epsilon=1e-5)
            l = left.cpu().detach()
            r = right.cpu().detach()
            m = miu.cpu().detach()
            self.adj_changes.data.copy_(
                torch.clamp(self.adj_changes.data - miu, min=0, max=1))
        else:
            self.adj_changes.data.copy_(
                torch.clamp(self.adj_changes.data, min=0, max=1))

        return l, r, m

    def _loss(self, output, labels):
        if self.loss_type == "CE":
            loss = F.nll_loss(output, labels)
        if self.loss_type == "CW":
            onehot = utils.tensor2onehot(labels)
            best_second_class = (output - 1000 * onehot).argmax(1).detach()
            margin = output[np.arange(len(output)), labels] - \
                   output[np.arange(len(output)), best_second_class]
            k = 0
            loss = -torch.clamp(margin, min=k).mean()
            # loss = torch.clamp(margin.sum()+50, min=k)
        return loss

    def bisection(self, a, b, n_perturbations, epsilon):
        def func(x):
            return torch.clamp(self.adj_changes - x, 0,
                               1).sum() - n_perturbations

        miu = a
        while ((b - a) >= epsilon):
            miu = (a + b) / 2
            # Check if middle point is root
            if (func(miu) == 0.0):
                b = miu
                break
            # Decide the side to repeat the steps
            if (func(miu) * func(a) < 0):
                b = miu
            else:
                a = miu
        # print("The value of root is : ","%.4f" % miu)
        return miu
Ejemplo n.º 9
0
class GPRegressor(nn.Module):
    def __init__(self, kernel, sn=0.1, lr=1e-1, scheduler=False, prior=True):
        super(GPRegressor, self).__init__()
        self.sn = Parameter(torch.Tensor([sn]))
        self.kernel = kernel
        self.loss_func = NLMLLoss()
        opt = [p for p in self.parameters() if p.requires_grad]
        self.optimizer = optim.Adam(opt, lr=lr)
        if prior:
            self.prior = torch.distributions.Beta(2, 2).log_prob
        else:
            self.prior = None
        if scheduler:
            self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(
                self.optimizer, patience=2, verbose=True, mode='max')
        else:
            self.scheduler = None

    def loss(self, X, y, jitter, val=None):
        K = self.kernel(X, X)
        inds = list(range(len(K)))
        K[[inds], [inds]] += self.sn + jitter
        L = torch.potrf(K, upper=False)
        alpha = torch.trtrs(y, L, upper=False)[0]
        alpha = torch.trtrs(alpha, L.t(), upper=True)[0]
        loss = self.loss_func(L, alpha, y)
        if self.prior is not None:
            loss -= self.prior(self.sn)

        if val is not None:
            X_val, y_val = val
            k_star = self.kernel(X, X_val)
            mu = k_star.t() @ alpha
            mse = nn.MSELoss()(mu, y_val)
            return loss, mse
        else:
            return loss

    def forward(self, X):
        """ Gaussian process regression predictions.

        Parameters:
            X: m x d points to predict

        Returns:
            mu: m x 1 predicted means
            var: m x m predicted covariance

        Follows Algorithm 2.1 from GPML.
        """
        ### Implement prior ###
        ### Scaling
        k_star = self.kernel(self.X, X)
        mu = k_star.t() @ self.alpha
        v = torch.trtrs(k_star, self.L, upper=False)[0]
        k_ss = self.kernel(X, X)
        var = k_ss - v.t() @ v
        return mu, var

    def fit(self,
            X,
            y,
            its=100,
            jitter=1e-6,
            verbose=True,
            val=None,
            chkpt=None):
        self.X = X
        self.y = y
        self._fit(X, y, its, jitter, verbose, val, chkpt)
        self._set_pars(jitter)
        return self.history

    def _fit(self, X, y, its, jitter, verbose, val, chkpt):
        self.history = []
        if val is not None and chkpt is not None:
            best_mse = 1e14
        for it in range(its):
            if val is not None:
                loss, mse = self.loss(X, y, jitter, val=val)
                mse = mse.item()
                if chkpt is not None and mse < best_mse:
                    torch.save(self.state_dict(), chkpt)
            else:
                loss = self.loss(X, y, jitter)
            # backward
            self.optimizer.zero_grad()
            loss.backward(retain_graph=False)
            # update parameters
            self.optimizer.step()
            self.sn.data.clamp_(min=1e-6)
            # if self.scheduler is not None:
            #     self.scheduler.step(loss)
            if verbose:
                update = '\rIteration %d of %d\tNLML: %.4f\tsn: %.6f\t' \
                        %(it + 1, its, loss, self.sn.cpu().detach().numpy()[0])
                print(update, end='')
                if val is not None:
                    print('val mse: %.4f' % mse, end='')
            if val is None:
                h = (loss.item(), self.sn.item())
            else:
                h = (loss.item(), self.sn.item(), mse)
                del mse
            self.history.append(h)
            del loss

    def _set_pars(self, jitter):
        Ky = self.kernel(self.X, self.X)
        inds = list(range(len(Ky)))
        Ky[[inds], [inds]] += self.sn + jitter
        self.L = torch.potrf(Ky, upper=False)
        self.alpha = torch.trtrs(self.y, self.L, upper=False)[0]
        self.alpha = torch.trtrs(self.alpha, self.L.t(), upper=True)[0]
Ejemplo n.º 10
0
class PGDAttack(BaseAttack):
    def __init__(self,
                 model=None,
                 nnodes=None,
                 loss_type='CE',
                 feature_shape=None,
                 attack_structure=True,
                 attack_features=False,
                 device='cpu'):

        super(PGDAttack, self).__init__(model, nnodes, attack_structure,
                                        attack_features, device)

        assert attack_features or attack_structure, 'attack_features or attack_structure cannot be both False'

        self.loss_type = loss_type
        self.modified_adj = None
        self.modified_features = None

        if attack_structure:
            assert nnodes is not None, 'Please give nnodes='
            self.adj_changes = Parameter(
                torch.FloatTensor(int(nnodes * (nnodes - 1) / 2)))
            self.adj_changes.data.fill_(0)

        if attack_features:
            assert True, 'Topology Attack does not support attack feature'

        self.complementary = None

    def attack(self,
               ori_features,
               ori_adj,
               labels,
               idx_train,
               n_perturbations,
               epochs=200,
               **kwargs):
        victim_model = self.surrogate

        self.sparse_features = sp.issparse(ori_features)
        ori_adj, ori_features, labels = utils.to_tensor(ori_adj,
                                                        ori_features,
                                                        labels,
                                                        device=self.device)

        victim_model.eval()
        for t in tqdm(range(epochs)):
            modified_adj = self.get_modified_adj(ori_adj)
            adj_norm = utils.normalize_adj_tensor(modified_adj)
            output = victim_model(ori_features, adj_norm)
            # loss = F.nll_loss(output[idx_train], labels[idx_train])
            loss = self._loss(output[idx_train], labels[idx_train])
            adj_grad = torch.autograd.grad(loss, self.adj_changes)[0]

            if self.loss_type == 'CE':
                lr = 200 / np.sqrt(t + 1)
                self.adj_changes.data.add_(lr * adj_grad)

            if self.loss_type == 'CW':
                lr = 0.1 / np.sqrt(t + 1)
                self.adj_changes.data.add_(lr * adj_grad)

            self.projection(n_perturbations)

        self.random_sample(ori_adj, ori_features, labels, idx_train,
                           n_perturbations)
        self.modified_adj = self.get_modified_adj(ori_adj).detach()

    def random_sample(self, ori_adj, ori_features, labels, idx_train,
                      n_perturbations):
        K = 20
        best_loss = -1000
        victim_model = self.surrogate
        with torch.no_grad():
            s = self.adj_changes.cpu().detach().numpy()
            for i in range(K):
                sampled = np.random.binomial(1, s)

                print(sampled.sum())
                if sampled.sum() > n_perturbations:
                    continue
                self.adj_changes.data.copy_(torch.tensor(sampled))
                modified_adj = self.get_modified_adj(ori_adj)
                adj_norm = utils.normalize_adj_tensor(modified_adj)
                output = victim_model(ori_features, adj_norm)
                loss = self._loss(output[idx_train], labels[idx_train])
                # loss = F.nll_loss(output[idx_train], labels[idx_train])
                print(loss)
                if best_loss < loss:
                    best_loss = loss
                    best_s = sampled
            self.adj_changes.data.copy_(torch.tensor(best_s))

    def _loss(self, output, labels):
        if self.loss_type == "CE":
            loss = F.nll_loss(output, labels)
        if self.loss_type == "CW":
            onehot = utils.tensor2onehot(labels)
            best_second_class = (output - 1000 * onehot).argmax(1)
            margin = output[np.arange(len(output)), labels] - \
                   output[np.arange(len(output)), best_second_class]
            k = 0
            loss = -torch.clamp(margin, min=k).mean()
            # loss = torch.clamp(margin.sum()+50, min=k)
        return loss

    def projection(self, n_perturbations):
        # projected = torch.clamp(self.adj_changes, 0, 1)
        if torch.clamp(self.adj_changes, 0, 1).sum() > n_perturbations:
            left = (self.adj_changes - 1).min()
            right = self.adj_changes.max()
            miu = self.bisection(left, right, n_perturbations, epsilon=1e-5)
            self.adj_changes.data.copy_(
                torch.clamp(self.adj_changes.data - miu, min=0, max=1))
        else:
            self.adj_changes.data.copy_(
                torch.clamp(self.adj_changes.data, min=0, max=1))

    def get_modified_adj(self, ori_adj):

        if self.complementary is None:
            self.complementary = (torch.ones_like(ori_adj) - torch.eye(
                self.nnodes).to(self.device) - ori_adj) - ori_adj

        m = torch.zeros((self.nnodes, self.nnodes)).to(self.device)
        tril_indices = torch.tril_indices(row=self.nnodes - 1,
                                          col=self.nnodes - 1,
                                          offset=0)
        m[tril_indices[0], tril_indices[1]] = self.adj_changes
        # m += m.t()
        m = m + m.t()
        modified_adj = self.complementary * m + ori_adj

        return modified_adj

    def bisection(self, a, b, n_perturbations, epsilon):
        def func(x):
            return torch.clamp(self.adj_changes - x, 0,
                               1).sum() - n_perturbations

        miu = a
        while ((b - a) >= epsilon):
            miu = (a + b) / 2
            # Check if middle point is root
            if (func(miu) == 0.0):
                break
            # Decide the side to repeat the steps
            if (func(miu) * func(a) < 0):
                b = miu
            else:
                a = miu
        # print("The value of root is : ","%.4f" % miu)
        return miu
Ejemplo n.º 11
0
class MixedOp(nn.Module):
    """mixed operation
    """
    MODE = None  # full, two, None, full_v2

    def __init__(self, C_in, C_out, stride):
        super(MixedOp, self).__init__()
        self._ops = nn.ModuleList()
        self.shortcut = None
        self.candidate = PRIMITIVES
        self.active_index = [0]
        self.inactive_index = None

        self.current_prob_over_ops = None

        if stride == 1 and C_in == C_out:
            OPS.update(OPS_ZERO)
            self.candidate = PRIMITIVES + ['zero']
            self.shortcut = Identity(C_in, C_out, stride)
        for primitive in self.candidate:
            #   if primitive == 'identity' and C_in != C_out:
            #     continue
            op = OPS[primitive](C_in, C_out, stride, False)
            self._ops.append(op)
        self.n_choices = len(self._ops)
        self.path_gate = Parameter(torch.Tensor(
            self.n_choices))  # binary gates
        self.alpha = Parameter(torch.Tensor(
            self.n_choices))  # architecture parameters
        #self.alpha = Variable(
        #1e-3*torch.randn(1, len(self._ops)).cuda(), requires_grad=True)

    def binarize(self):
        # reset binary gates
        self.path_gate.data.zero_()
        # sample two ops according to `probs`
        probs = F.softmax(self.alpha, dim=0)
        sample_op = torch.multinomial(probs.data, 2, replacement=False)
        probs_slice = F.softmax(torch.stack(
            [self.alpha[idx] for idx in sample_op]),
                                dim=0)
        self.current_prob_over_ops = torch.zeros_like(probs)
        for i, idx in enumerate(sample_op):
            self.current_prob_over_ops[idx] = probs_slice[i]
        # chose one to be active and the other to be inactive according to probs_slice
        c = torch.multinomial(probs_slice.data, 1)[0]  # 0 or 1
        active_op = sample_op[c].item()
        inactive_op = sample_op[1 - c].item()
        self.active_index = [active_op]
        self.inactive_index = [inactive_op]
        # set binary gate
        self.path_gate.data[active_op] = 1.0
        # avoid over-regularization
        for _i in range(len(probs)):
            for name, param in self._ops[_i].named_parameters():
                param.grad = None

    @property
    def chosen_index(self):
        probs = self.alpha.cpu().numpy()
        index = int(np.argmax(probs))
        return index, probs[index]

    def set_chosen_op_active(self):
        chosen_idx, _ = self.chosen_index
        self.active_index = [chosen_idx]
        self.inactive_index = [_i for _i in range(0, chosen_idx)] + \
                              [_i for _i in range(
                                  chosen_idx + 1, self.n_choices)]

    def is_zero_layer(self):
        return self.active_op.is_zero_layer()

    @property
    def active_op(self):
        """ assume only one path is active """
        return self._ops[self.active_index[0]]

    @property
    def active_op_name(self):
        """ assume only one path is active """
        return self.candidate[self.active_index[0]]

    def set_arch_param_grad(self):
        binary_grads = self.path_gate.grad.data
        if self.active_op.is_zero_layer():
            self.alpha.grad = None
            return
        if self.alpha.grad is None:
            self.alpha.grad = torch.zeros_like(self.alpha.data)

        involved_idx = self.active_index + self.inactive_index
        probs_slice = F.softmax(torch.stack(
            [self.alpha[idx] for idx in involved_idx]),
                                dim=0).data
        for i in range(2):
            for j in range(2):
                origin_i = involved_idx[i]
                origin_j = involved_idx[j]
                self.alpha.grad.data[origin_i] += \
                    binary_grads[origin_j] * probs_slice[j] * \
                    (delta_ij(i, j) - probs_slice[i])
        for _i, idx in enumerate(self.active_index):
            self.active_index[_i] = (idx, self.alpha.data[idx].item())
        for _i, idx in enumerate(self.inactive_index):
            self.inactive_index[_i] = (idx, self.alpha.data[idx].item())
        return

    def rescale_updated_arch_param(self):
        if not isinstance(self.active_index[0], tuple):
            assert self.active_op.is_zero_layer()
            return
        involved_idx = [
            idx for idx, _ in (self.active_index + self.inactive_index)
        ]
        old_alphas = [
            alpha for _, alpha in (self.active_index + self.inactive_index)
        ]
        new_alphas = [self.alpha.data[idx] for idx in involved_idx]

        offset = math.log(
            sum([math.exp(alpha) for alpha in new_alphas]) /
            sum([math.exp(alpha) for alpha in old_alphas]))

        for idx in involved_idx:
            self.alpha.data[idx] -= offset

    @property
    def module_str(self):
        chosen_index, probs = self.chosen_index
        return 'MixedOp(%s, %.3f)' % (self.candidate[chosen_index], probs)

    def forward(self, x):
        output = 0
        # Only 2 of N op weights input, and only activate one op
        for _i in self.active_index:
            oi = self._ops[_i](x)
            output = output + self.path_gate[_i] * oi
        for _i in self.inactive_index:
            oi = self._ops[_i](x)
            output = output + self.path_gate[_i] * oi.detach()
        return output
Ejemplo n.º 12
0
class ONN(nn.Module):
    def __init__(self,
                 features_size,
                 max_num_hidden_layers,
                 qtd_neuron_per_hidden_layer,
                 n_classes,
                 loss_fun,
                 batch_size=1,
                 b=0.99,
                 n=0.001,
                 s=0.2,
                 use_cuda=False):
        super(ONN, self).__init__()

        if torch.cuda.is_available() and use_cuda:
            print("Using CUDA :]")

        self.device = torch.device(
            "cuda:0" if torch.cuda.is_available() and use_cuda else "cpu")

        self.features_size = features_size
        self.max_num_hidden_layers = max_num_hidden_layers
        self.qtd_neuron_per_hidden_layer = qtd_neuron_per_hidden_layer
        self.n_classes = n_classes
        self.batch_size = batch_size
        self.b = Parameter(torch.tensor(b),
                           requires_grad=False).to(self.device)
        self.n = Parameter(torch.tensor(n),
                           requires_grad=False).to(self.device)
        self.s = Parameter(torch.tensor(s),
                           requires_grad=False).to(self.device)
        self.loss_fun = loss_fun
        self.t = 0

        self.hidden_layers = []
        self.output_layers = []

        self.hidden_layers.append(
            nn.Linear(features_size, qtd_neuron_per_hidden_layer))

        for i in range(max_num_hidden_layers - 1):
            self.hidden_layers.append(
                nn.Linear(qtd_neuron_per_hidden_layer,
                          qtd_neuron_per_hidden_layer))

        for i in range(max_num_hidden_layers):
            self.output_layers.append(
                nn.Linear(qtd_neuron_per_hidden_layer, n_classes))

        self.hidden_layers = nn.ModuleList(self.hidden_layers).to(self.device)
        self.output_layers = nn.ModuleList(self.output_layers).to(self.device)

        self.alpha = Parameter(torch.Tensor(self.max_num_hidden_layers).fill_(
            1 / (self.max_num_hidden_layers + 1)),
                               requires_grad=False).to(self.device)

        self.loss_array = []

    def monitor_updates(self, W, alpha, dW):
        W = W.cpu().numpy()
        dW = dW.cpu().numpy()
        alpha = alpha.cpu().numpy()
        n = self.n.cpu().numpy()
        param_scale = np.linalg.norm(W.ravel())
        update = n * alpha * dW  # simple SGD update
        update_scale = np.linalg.norm(update.ravel())
        W += update
        self.t += 1
        update_ratio = update_scale / param_scale
        if update_ratio > 1e-3:
            print('%d frame : update_ratio : %.5f ' % (self.t, update_ratio))

    def zero_grad(self):
        for i in range(self.max_num_hidden_layers):
            self.output_layers[i].weight.grad.data.fill_(0)
            self.output_layers[i].bias.grad.data.fill_(0)
            self.hidden_layers[i].weight.grad.data.fill_(0)
            self.hidden_layers[i].bias.grad.data.fill_(0)

    def update_weights(self, X, Y, show_loss):
        if self.loss_fun == 'mse':
            Y = torch.from_numpy(Y).to(self.device)

        predictions_per_layer = self.forward(X)

        losses_per_layer = []

        for out in predictions_per_layer:
            #            print ('out = ', out)
            if self.loss_fun == 'cel':
                criterion = nn.CrossEntropyLoss().to(self.device)
                loss = criterion(out.view(self.batch_size, self.n_classes),
                                 Y.view(self.batch_size).long())
            if self.loss_fun == 'mse':
                criterion = nn.MSELoss().to(self.device)
                loss = criterion(
                    out.view(self.batch_size, self.n_classes),
                    Y.view(self.batch_size, self.n_classes).float())
            losses_per_layer.append(loss)

        w = [None] * len(losses_per_layer)
        b = [None] * len(losses_per_layer)

        with torch.no_grad():

            for i in range(len(losses_per_layer)):
                losses_per_layer[i].backward(retain_graph=True)
                self.output_layers[i].weight.data -= self.n * \
                                                     self.alpha[i] * self.output_layers[i].weight.grad.data
                self.output_layers[i].bias.data -= self.n * \
                                                   self.alpha[i] * self.output_layers[i].bias.grad.data
                #              self.monitor_updates(self.output_layers[i].weight.data,self.alpha[i],self.output_layers[i].weight.grad.data)
                for j in range(i + 1):
                    if w[j] is None:
                        w[j] = self.alpha[i] * self.hidden_layers[
                            j].weight.grad.data
                        b[j] = self.alpha[i] * self.hidden_layers[
                            j].bias.grad.data
                    else:
                        w[j] += self.alpha[i] * self.hidden_layers[
                            j].weight.grad.data
                        b[j] += self.alpha[i] * self.hidden_layers[
                            j].bias.grad.data

                self.zero_grad()

            for i in range(len(losses_per_layer)):
                self.hidden_layers[i].weight.data -= self.n * w[i]
                self.hidden_layers[i].bias.data -= self.n * b[i]

            for i in range(len(losses_per_layer)):
                self.alpha[i] *= torch.pow(self.b, losses_per_layer[i])
                self.alpha[i] = torch.max(self.alpha[i],
                                          self.s / self.max_num_hidden_layers)

        z_t = torch.sum(self.alpha)

        self.alpha = Parameter(self.alpha / z_t,
                               requires_grad=False).to(self.device)

        if show_loss:
            real_output = torch.sum(
                torch.mul(
                    self.alpha.view(self.max_num_hidden_layers, 1).repeat(
                        1, self.batch_size).view(self.max_num_hidden_layers,
                                                 self.batch_size, 1),
                    predictions_per_layer), 0)
            if self.loss_fun == 'cel':
                criterion = nn.CrossEntropyLoss().to(self.device)
                loss = criterion(out.view(self.batch_size, self.n_classes),
                                 Y.view(self.batch_size).long())
            if self.loss_fun == 'mse':
                criterion = nn.MSELoss().to(self.device)
                loss = criterion(
                    out.view(self.batch_size, self.n_classes),
                    Y.view(self.batch_size, self.n_classes).float())


#            criterion = nn.CrossEntropyLoss().to(self.device)
#            loss = criterion(real_output.view(self.batch_size, self.n_classes), Y.view(self.batch_size).long())
            self.loss_array.append(loss)
            if (len(self.loss_array) % 1000) == 0:
                print(
                    "WARNING: Set 'show_loss' to 'False' when not debugging. "
                    "It will deteriorate the fitting performance.")
                loss = torch.Tensor(self.loss_array).mean().cpu().numpy()
                print("Alpha:" + str(self.alpha.data.cpu().numpy()))
                print("Training Loss: " + str(loss))
                self.loss_array.clear()

    def forward(self, X):
        hidden_connections = []

        X = torch.from_numpy(X).float().to(self.device)

        x = F.relu(self.hidden_layers[0](X))
        hidden_connections.append(x)

        for i in range(1, self.max_num_hidden_layers):
            hidden_connections.append(
                F.relu(self.hidden_layers[i](hidden_connections[i - 1])))

        output_class = []

        for i in range(self.max_num_hidden_layers):
            output_class.append(self.output_layers[i](hidden_connections[i]))

        pred_per_layer = torch.stack(output_class)
        #        print('pred_per_layer : ', pred_per_layer)

        return pred_per_layer

    def validate_input_X(self, data):
        if len(data.shape) != 2:
            raise Exception(
                "Wrong dimension for this X data. It should have only two dimensions."
            )

    def validate_input_Y(self, data):
        if len(data.shape) != 2:
            raise Exception(
                "Wrong dimension for this Y data. It should have only one dimensions."
            )

    def partial_fit_(self, X_data, Y_data, show_loss=True):
        self.validate_input_X(X_data)
        self.validate_input_Y(Y_data)
        self.update_weights(X_data, Y_data, show_loss)

    def partial_fit(self, X_data, Y_data, show_loss=False):
        self.partial_fit_(X_data, Y_data, show_loss)

    def predict_(self, X_data):
        self.validate_input_X(X_data)
        if self.loss_fun == 'cel':
            return torch.argmax(torch.sum(
                torch.mul(
                    self.alpha.view(self.max_num_hidden_layers, 1).repeat(
                        1, len(X_data)).view(self.max_num_hidden_layers,
                                             len(X_data), 1),
                    self.forward(X_data)), 0),
                                dim=1).cpu().numpy()
        if self.loss_fun == 'mse':
            return torch.sum(
                torch.mul(
                    self.alpha.view(self.max_num_hidden_layers, 1).repeat(
                        1, len(X_data)).view(self.max_num_hidden_layers,
                                             len(X_data), 1),
                    self.forward(X_data)), 0).cpu().detach().numpy()

    def predict(self, X_data):
        pred = self.predict_(X_data)
        return pred

    def export_params_to_json(self):
        state_dict = self.state_dict()
        params_gp = {}
        for key, tensor in state_dict.items():
            params_gp[key] = tensor.cpu().numpy().tolist()

        return json.dumps(params_gp)

    def load_params_from_json(self, json_data):
        params = json.loads(json_data)
        o_dict = collections.OrderedDict()
        for key, tensor in params.items():
            o_dict[key] = torch.tensor(tensor).to(self.device)
        self.load_state_dict(o_dict)
Ejemplo n.º 13
0
class Gumbel_Generator_Old(nn.Module):
    def __init__(self, sz=10, temp=10, temp_drop_frac=0.9999):
        super(Gumbel_Generator_Old,
              self).__init__()  # 将类Gumbe_Generator_Old 对象转换为类 nn.Module 的对象
        self.sz = sz
        # 将一个不可训练的类型Tensor 转换成可以训练的类型Parameter 经过这个类型转换这个self,***就成了模型的中一部分
        # 成为了模型中根据训练可以改动的参数了
        self.gen_matrix = Parameter(torch.rand(
            sz, sz, 2))  # torch.rand()返回一个张量,包含了从区间(0,1)随机抽取的一组随机数
        self.new_matrix = Parameter(torch.zeros(5, 5, 2))
        # gen_matrix 为邻接矩阵的概率
        self.temperature = temp
        self.temp_drop_frac = temp_drop_frac

    def symmetry(self):
        matrix = self.gen_matrix.permute(2, 0, 1)
        temp_matrix = torch.triu(matrix, 1) + torch.triu(matrix, 1).permute(
            0, 2, 1)
        self.gen_matrix.data = temp_matrix.permute(1, 2, 0)

    def drop_temp(self):
        # 降温过程
        self.temperature = self.temperature * self.temp_drop_frac

    def sample_all(self, hard=False, epoch=1):
        # 采样——得到一个邻接矩阵
        # self.symmetry()
        self.logp = self.gen_matrix.view(
            -1, 2)  # view先变成一维的tensor,再按照参数转换成对应维度的tensor
        out = gumbel_softmax(self.logp, self.temperature, hard)
        if hard:
            hh = torch.zeros(self.gen_matrix.size()[0]**2, 2)
            for i in range(out.size()[0]):
                hh[i, out[i]] = 1
            out = hh
        if use_cuda:
            out = out.cuda()
        out_matrix = out[:, 0].view(self.gen_matrix.size()[0],
                                    self.gen_matrix.size()[0])
        return out_matrix

    def sample_small(self, list, hard=False):
        indices = np.ix_(list, list)
        self.logp = self.gen_matrix[indices].view(
            -1, 2)  # view先变成一维的tensor,再按照参数转换成对应维度的tensor

        out = gumbel_softmax(self.logp, self.temperature, hard)

        # hard 干什么用的
        if hard:
            hh = torch.zeros(self.gen_matrix[indices].size()[0]**2, 2)
            for i in range(out.size()[0]):
                hh[i, out[i]] = 1
            out = hh
        if use_cuda:
            out = out.cuda()
        out_matrix = out[:, 0].view(len(list), len(list))
        return out_matrix

    def sample_adj_ij(self, list, j, hard=False, sample_time=1):
        # self.logp = self.gen_matrix[:,i]
        self.logp = self.gen_matrix[list, j]

        out = gumbel_softmax(self.logp, self.temperature, hard=hard)
        if use_cuda:
            out = out.cuda()
        # print(out)
        if hard:
            out_matrix = out.float()
        else:
            out_matrix = out[:, 0]
        return out_matrix

    def sample_adj_i(self, i, hard=False, sample_time=1):
        # self.symmetry()
        self.logp = self.gen_matrix[:, i]
        out = gumbel_softmax(self.logp, self.temperature, hard=hard)
        if use_cuda:
            out = out.cuda()
        # print(out)
        if hard:
            out_matrix = out.float()
        else:
            out_matrix = out[:, 0]
        return out_matrix

    def get_temperature(self):
        return self.temperature

    def get_cross_entropy(self, obj_matrix):
        # 计算与目标矩阵的距离
        logps = F.softmax(self.gen_matrix, 2)
        logps = torch.log(logps[:, :, 0] + 1e-10) * obj_matrix + torch.log(
            logps[:, :, 1] + 1e-10) * (1 - obj_matrix)
        result = -torch.sum(logps)
        result = result.cpu() if use_cuda else result
        return result.data.numpy()

    def get_entropy(self):
        logps = F.softmax(self.gen_matrix, 2)
        result = torch.mean(torch.sum(logps * torch.log(logps + 1e-10), 1))
        result = result.cpu() if use_cuda else result
        return (-result.data.numpy())

    def randomization(self, fraction):
        # 将gen_matrix重新随机初始化,fraction为重置比特的比例
        sz = self.gen_matrix.size()[0]
        numbers = int(fraction * sz * sz)
        original = self.gen_matrix.cpu().data.numpy()

        for i in range(numbers):
            ii = np.random.choice(range(sz), (2, 1))
            z = torch.rand(2).cuda() if use_cuda else torch.rand(2)
            self.gen_matrix.data[ii[0], ii[1], :] = z

    def init(self, mean, var):
        init.normal_(self.gen_matrix, mean=mean, std=var)