Example #1
0
    def i_fgsm(self,
               x,
               y,
               targeted=False,
               eps=0.03,
               alpha=1,
               iteration=1,
               x_val_min=-1,
               x_val_max=1):
        x_adv = Variable(x.data, requires_grad=True)
        for i in range(iteration):
            h_adv = self.net(x_adv)
            if targeted:
                cost = self.criterion(h_adv, y)
            else:
                cost = -self.criterion(h_adv, y)

            self.net.zero_grad()
            if x_adv.grad is not None:
                x_adv.grad.data.fill_(0)
            cost.backward()

            x_adv.grad.sign_()
            x_adv = x_adv - alpha * x_adv.grad
            x_adv = where(x_adv > x + eps, x + eps, x_adv)
            x_adv = where(x_adv < x - eps, x - eps, x_adv)
            x_adv = torch.clamp(x_adv, x_val_min, x_val_max)
            x_adv = Variable(x_adv.data, requires_grad=True)

        h = self.net(x)
        h_adv = self.net(x_adv)

        return x_adv, h_adv, h
Example #2
0
    def FGSM(self,
             x,
             y_true,
             y_target=None,
             eps=0.03,
             alpha=2 / 255,
             iteration=1):
        self.set_mode('eval')

        x = Variable(cuda(x, self.cuda), requires_grad=True)
        y_true = Variable(cuda(y_true, self.cuda), requires_grad=False)
        if y_target is not None:
            targeted = True
            y_target = Variable(cuda(y_target, self.cuda), requires_grad=False)
        else:
            targeted = False

        h = self.net(x)
        prediction = h.max(1)[1]
        accuracy = torch.eq(prediction, y_true).float().mean()
        cost = F.cross_entropy(h, y_true)

        if iteration == 1:
            if targeted:
                x_adv, h_adv, h = self.attack.fgsm(x, y_target, True, eps)
            else:
                x_adv, h_adv, h = self.attack.fgsm(x, y_true, False, eps)
        else:
            if targeted:
                x_adv, h_adv, h = self.attack.i_fgsm(x, y_target, True, eps,
                                                     alpha, iteration)
            else:
                x_adv, h_adv, h = self.attack.i_fgsm(x, y_true, False, eps,
                                                     alpha, iteration)

        prediction_adv = h_adv.max(1)[1]
        accuracy_adv = torch.eq(prediction_adv, y_true).float().mean()
        cost_adv = F.cross_entropy(h_adv, y_true)

        # make indication of perturbed images that changed predictions of the classifier
        if targeted:
            changed = torch.eq(y_target, prediction_adv)
        else:
            changed = torch.eq(prediction, prediction_adv)
            changed = torch.eq(changed, 0)
        changed = changed.float().view(-1, 1, 1, 1).repeat(1, 3, 28, 28)

        changed[:, 0, :, :] = where(changed[:, 0, :, :] == 1, 252, 91)
        changed[:, 1, :, :] = where(changed[:, 1, :, :] == 1, 39, 252)
        changed[:, 2, :, :] = where(changed[:, 2, :, :] == 1, 25, 25)
        changed = self.scale(changed / 255)
        changed[:, :, 3:-2, 3:-2] = x_adv.repeat(1, 3, 1, 1)[:, :, 3:-2, 3:-2]

        self.set_mode('train')

        return x_adv.data, changed.data,\
                (accuracy.data[0], cost.data[0], accuracy_adv.data[0], cost_adv.data[0])
Example #3
0
    def FGSM(self,
             x,
             y_true,
             y_target=None,
             eps=0.03,
             alpha=2 / 255,
             iteration=1):
        self.set_mode('eval')
        x = Variable(cuda(x, self.cuda), requires_grad=True)
        y_true = Variable(cuda(y_true, self.cuda), requires_grad=False)

        if y_target is not None:
            targeted = True
            y_target = Variable(cuda(y_target, self.cuda), requires_grad=False)
        else:
            targeted = False

        # original image classification
        h = self.net(x)
        prediction = h.max(1)[1]
        accuracy = torch.eq(prediction, y_true).float().mean()

        cost = F.cross_entropy(h, y_true)

        # adversarial image classification
        if targeted:
            x_adv, h_adv, h = self.attack.i_fgsm(x, y_target, True, eps, alpha)
        else:
            x_adv, h_adv, h = self.attack.i_fgsm(x, y_true, False, eps, alpha)

        prediction_adv = h_adv.max(1)[1]
        accuracy_adv = torch.eq(prediction_adv, y_true).float().mean()
        cost_adv = F.cross_entropy(h_adv, y_true)

        # make indication of perturbed images that changed predictions of the classifier
        # it draw green and red boxes
        if targeted:
            changed = torch.eq(y_target, prediction_adv)
        else:
            changed = torch.eq(prediction, prediction_adv)
            changed = torch.eq(changed, 0)

        if self.dataset == 'MNIST':
            changed = changed.float().view(-1, 1, 1, 1).repeat(1, 3, 28, 28)
        elif self.dataset == 'CIFAR10':
            changed = changed.float().view(-1, 1, 1, 1).repeat(1, 3, 32, 32)

        #fill the grid with color
        changed[:, 0, :, :] = where(changed[:, 0, :, :] == 1, 252, 91)
        changed[:, 1, :, :] = where(changed[:, 1, :, :] == 1, 39, 252)
        changed[:, 2, :, :] = where(changed[:, 2, :, :] == 1, 25, 25)
        changed = self.scale(changed / 255)

        #fil the inner part of grid with image
        if self.dataset == 'MNIST':
            changed[:, :, 3:-2, 3:-2] = x_adv.repeat(1, 3, 1, 1)[:, :, 3:-2,
                                                                 3:-2]
        elif self.dataset == 'CIFAR10':
            changed[:, :, 3:-2, 3:-2] = x_adv[:, :, 3:-2, 3:-2]

        self.set_mode('train')

        return x_adv.data, changed.data,\
                (accuracy.data.item(), cost.data.item(), accuracy_adv.data.item(), cost_adv.data.item())