def i_fgsm(self, x, y, targeted=False, eps=0.03, alpha=1, iteration=1, x_val_min=-1, x_val_max=1): x_adv = Variable(x.data, requires_grad=True) for i in range(iteration): h_adv = self.net(x_adv) if targeted: cost = self.criterion(h_adv, y) else: cost = -self.criterion(h_adv, y) self.net.zero_grad() if x_adv.grad is not None: x_adv.grad.data.fill_(0) cost.backward() x_adv.grad.sign_() x_adv = x_adv - alpha * x_adv.grad x_adv = where(x_adv > x + eps, x + eps, x_adv) x_adv = where(x_adv < x - eps, x - eps, x_adv) x_adv = torch.clamp(x_adv, x_val_min, x_val_max) x_adv = Variable(x_adv.data, requires_grad=True) h = self.net(x) h_adv = self.net(x_adv) return x_adv, h_adv, h
def FGSM(self, x, y_true, y_target=None, eps=0.03, alpha=2 / 255, iteration=1): self.set_mode('eval') x = Variable(cuda(x, self.cuda), requires_grad=True) y_true = Variable(cuda(y_true, self.cuda), requires_grad=False) if y_target is not None: targeted = True y_target = Variable(cuda(y_target, self.cuda), requires_grad=False) else: targeted = False h = self.net(x) prediction = h.max(1)[1] accuracy = torch.eq(prediction, y_true).float().mean() cost = F.cross_entropy(h, y_true) if iteration == 1: if targeted: x_adv, h_adv, h = self.attack.fgsm(x, y_target, True, eps) else: x_adv, h_adv, h = self.attack.fgsm(x, y_true, False, eps) else: if targeted: x_adv, h_adv, h = self.attack.i_fgsm(x, y_target, True, eps, alpha, iteration) else: x_adv, h_adv, h = self.attack.i_fgsm(x, y_true, False, eps, alpha, iteration) prediction_adv = h_adv.max(1)[1] accuracy_adv = torch.eq(prediction_adv, y_true).float().mean() cost_adv = F.cross_entropy(h_adv, y_true) # make indication of perturbed images that changed predictions of the classifier if targeted: changed = torch.eq(y_target, prediction_adv) else: changed = torch.eq(prediction, prediction_adv) changed = torch.eq(changed, 0) changed = changed.float().view(-1, 1, 1, 1).repeat(1, 3, 28, 28) changed[:, 0, :, :] = where(changed[:, 0, :, :] == 1, 252, 91) changed[:, 1, :, :] = where(changed[:, 1, :, :] == 1, 39, 252) changed[:, 2, :, :] = where(changed[:, 2, :, :] == 1, 25, 25) changed = self.scale(changed / 255) changed[:, :, 3:-2, 3:-2] = x_adv.repeat(1, 3, 1, 1)[:, :, 3:-2, 3:-2] self.set_mode('train') return x_adv.data, changed.data,\ (accuracy.data[0], cost.data[0], accuracy_adv.data[0], cost_adv.data[0])
def FGSM(self, x, y_true, y_target=None, eps=0.03, alpha=2 / 255, iteration=1): self.set_mode('eval') x = Variable(cuda(x, self.cuda), requires_grad=True) y_true = Variable(cuda(y_true, self.cuda), requires_grad=False) if y_target is not None: targeted = True y_target = Variable(cuda(y_target, self.cuda), requires_grad=False) else: targeted = False # original image classification h = self.net(x) prediction = h.max(1)[1] accuracy = torch.eq(prediction, y_true).float().mean() cost = F.cross_entropy(h, y_true) # adversarial image classification if targeted: x_adv, h_adv, h = self.attack.i_fgsm(x, y_target, True, eps, alpha) else: x_adv, h_adv, h = self.attack.i_fgsm(x, y_true, False, eps, alpha) prediction_adv = h_adv.max(1)[1] accuracy_adv = torch.eq(prediction_adv, y_true).float().mean() cost_adv = F.cross_entropy(h_adv, y_true) # make indication of perturbed images that changed predictions of the classifier # it draw green and red boxes if targeted: changed = torch.eq(y_target, prediction_adv) else: changed = torch.eq(prediction, prediction_adv) changed = torch.eq(changed, 0) if self.dataset == 'MNIST': changed = changed.float().view(-1, 1, 1, 1).repeat(1, 3, 28, 28) elif self.dataset == 'CIFAR10': changed = changed.float().view(-1, 1, 1, 1).repeat(1, 3, 32, 32) #fill the grid with color changed[:, 0, :, :] = where(changed[:, 0, :, :] == 1, 252, 91) changed[:, 1, :, :] = where(changed[:, 1, :, :] == 1, 39, 252) changed[:, 2, :, :] = where(changed[:, 2, :, :] == 1, 25, 25) changed = self.scale(changed / 255) #fil the inner part of grid with image if self.dataset == 'MNIST': changed[:, :, 3:-2, 3:-2] = x_adv.repeat(1, 3, 1, 1)[:, :, 3:-2, 3:-2] elif self.dataset == 'CIFAR10': changed[:, :, 3:-2, 3:-2] = x_adv[:, :, 3:-2, 3:-2] self.set_mode('train') return x_adv.data, changed.data,\ (accuracy.data.item(), cost.data.item(), accuracy_adv.data.item(), cost_adv.data.item())