def i_fgsm(self, x, y, targeted=False, eps=0.03, alpha=1, iteration=1, x_val_min=-1, x_val_max=1): x_adv = Variable(x.data, requires_grad=True) for i in range(iteration): h_adv = self.net(x_adv) if targeted: cost = self.criterion(h_adv, y) else: cost = -self.criterion(h_adv, y) self.net.zero_grad() if x_adv.grad is not None: x_adv.grad.data.fill_(0) cost.backward() x_adv.grad.sign_() x_adv = x_adv - alpha * x_adv.grad x_adv = where(x_adv > x + eps, x + eps, x_adv) x_adv = where(x_adv < x - eps, x - eps, x_adv) x_adv = torch.clamp(x_adv, x_val_min, x_val_max) x_adv = Variable(x_adv.data, requires_grad=True) h = self.net(x) h_adv = self.net(x_adv) return x_adv, h_adv, h
def test(model, test_dataloader, cost, print_freq=40, batch_num=None, denoise=None, random_layer=None): batch_time = AverageMeter() losses = AverageMeter() error = AverageMeter() meanD = AverageMeter() end = time.time() model.eval() for i, (images, adversarial_images, labels, diffs) in enumerate(test_dataloader): adversarial_images, labels = adversarial_images.cuda(), labels.cuda() if denoise: adversarial_images = denoise(adversarial_images) if random_layer: adversarial_images = random_layer(adversarial_images) #diffs=diffs.cu outputs = model(adversarial_images) loss = cost(outputs, labels) batch_size = labels.size(0) outputs = outputs.max(1)[1] error.update( torch.ne(outputs.cpu(), labels.cpu()).float().sum().item() / batch_size, batch_size) losses.update(loss.item(), batch_size) meanD.update( where((outputs == labels).cpu(), diffs.float(), 0.).mean().item(), batch_size) batch_time.update(time.time() - end) end = time.time() if i % print_freq == 0: res = '\t'.join([ 'test', 'Iter: [%d/%d]' % (i + 1, batch_num), 'Time %.3f (%.3f)' % (batch_time.val, batch_time.avg), 'Loss %.4f (%.4f)' % (losses.val, losses.avg), 'Error %.4f (%.4f)' % (error.val, error.avg), 'meanD %.4f (%.4f)' % (meanD.val, meanD.avg) ]) print(res) return batch_time.avg, losses.avg, error.avg, meanD.avg
def FGSM(self, x, y_true, y_target=None, eps=0.03, alpha=2 / 255, iteration=1): self.set_mode('eval') if type(x) == np.ndarray: x = torch.from_numpy(x) if type(y_true) == np.ndarray: y_true = torch.from_numpy(y_true) x = Variable(x, requires_grad=True).to(self.device) y_true = Variable(y_true, requires_grad=False).to(self.device) if y_target is not None: targeted = True y_target = Variable(y_target, requires_grad=False).to(self.device) else: targeted = False h = self.net(x) prediction = h.max(1)[1] accuracy = torch.eq(prediction, y_true).float().mean() cost = F.cross_entropy(h, y_true) if iteration == 1: if targeted: x_adv, h_adv, h = self.attack.fgsm(x, y_target, True, eps) else: x_adv, h_adv, h = self.attack.fgsm(x, y_true, False, eps) else: if targeted: x_adv, h_adv, h = self.attack.i_fgsm(x, y_target, True, eps, alpha, iteration) else: x_adv, h_adv, h = self.attack.i_fgsm(x, y_true, False, eps, alpha, iteration) prediction_adv = h_adv.max(1)[1] accuracy_adv = torch.eq(prediction_adv, y_true).float().mean() cost_adv = F.cross_entropy(h_adv, y_true) # make indication of perturbed images that changed predictions of the classifier if targeted: changed = torch.eq(y_target, prediction_adv) else: changed = torch.eq(prediction, prediction_adv) changed = torch.eq(changed, 0) changed = changed.float().view(-1, 1, 1, 1).repeat(1, 3, 28, 28) changed[:, 0, :, :] = where(changed[:, 0, :, :] == 1, 252, 91) changed[:, 1, :, :] = where(changed[:, 1, :, :] == 1, 39, 252) changed[:, 2, :, :] = where(changed[:, 2, :, :] == 1, 25, 25) changed = self.scale(changed / 255) changed[:, :, 3:-2, 3:-2] = x_adv.repeat(1, 3, 1, 1)[:, :, 3:-2, 3:-2] self.set_mode('train') return x_adv.data, changed.data,\ (accuracy.data[0], cost.data[0], accuracy_adv.data[0], cost_adv.data[0])
def kl_y_to_p(logits, y): y_logy = torch.sum(utils.where(y > 0, y * torch.log(y)), dim=-1) y_logp = torch.sum(utils.where( y > 0, y * torch.log(utils.logits_to_probs(logits))), dim=-1) return y_logy - y_logp
def entropy(probs): return -torch.sum(utils.where(probs > 0, probs * torch.log(probs)), dim=-1)
def train_epoch(model_denoise,model_classify,train_dataloader,optimizer, cost,epoch, n_epochs, print_freq=40,batch_num=None,random_layer=None): batch_time = AverageMeter() losses = AverageMeter() error1 = AverageMeter() error2 = AverageMeter() error3 = AverageMeter() error4 = AverageMeter() meanD=AverageMeter() end = time.time() model_denoise.train() model_classify.eval() for i,(clean_images,adversarial_images,labels,diffs) in enumerate(train_dataloader): #print(i) #print(type(images),type(labels)) #print(images.shape,labels.shape) #print(clean_images.shape,adversarial_images.shape) #if i>200: # return batch_time.avg, losses.avg, error1.avg,error2.avg,meanD.avg #print(labels) clean_images,adversarial_images,labels=clean_images.cuda(),adversarial_images.cuda(),labels.cuda() #print(clean_images.shape,adversarial_images.shape) #print(images.shape,labels.shape) optimizer.zero_grad() #if i%print_freq==0: # d_clean_images= model_denoise(clean_images) d_adversarial_images= model_denoise(adversarial_images) if random_layer: clean_images,d_adversarial_images=random_layer(clean_images,d_adversarial_images) #print(clean_images.shape,d_adversarial_images.shape) y1,f1=model_classify(clean_images,need_feature=True) y2,f2=model_classify(d_adversarial_images,need_feature=True) #if i%print_freq==0: # #d_clean_images= model_denoise(clean_images) # if random_layer: # d_clean_y,adv_y=random_layer(d_clean_images,adversarial_images) #d_clean_y=model_classify(clean_images) # adv_y=model_classify(adversarial_images) y1=y1.detach() f1=f1.detach() loss=cost(y1,y2).mean() loss.backward() optimizer.step() batch_size=labels.size(0) y1=y1.max(1)[1] y2=y2.max(1)[1] #if i%print_freq==0: # d_clean_y=d_clean_y.max(1)[1] # adv_y=adv_y.max(1)[1] error1.update(torch.ne(y1.cpu(), labels.cpu()).float().sum().item() / batch_size, batch_size) error2.update(torch.ne(y2.cpu(), labels.cpu()).float().sum().item() / batch_size, batch_size) meanD.update(where((y2==labels).cpu(),diffs.float(),0.).mean().item(),batch_size) #if i%print_freq==0: # error3.update(torch.ne(d_clean_y.cpu(), labels.cpu()).float().sum().item() / batch_size, batch_size) # error4.update(torch.ne(adv_y.cpu(), labels.cpu()).float().sum().item() / batch_size, batch_size) losses.update(loss.item(), batch_size) batch_time.update(time.time() - end) end = time.time() if i % print_freq == 0: res = '\t'.join([ 'Epoch: [%d/%d]' % (epoch + 1, n_epochs), 'Iter: [%d/%d]' % (i + 1, batch_num), 'Time %.3f (%.3f)' % (batch_time.val, batch_time.avg), 'Loss %.4f (%.4f)' % (losses.val, losses.avg), 'Error_clean %.4f (%.4f)' % (error1.val, error1.avg), 'Error_adversaril %.4f (%.4f)' % (error2.val, error2.avg), #'Error_row %.4f(clean with denoise)/%.4f(adversarial without denoise)' % (error3.avg, error4.avg), 'meanD %.4f (%.4f)' %(meanD.val,meanD.avg) ]) print(res) return batch_time.avg, losses.avg, error1.avg,error2.avg,meanD.avg
def valid_epoch(model_denoise,model_classify,valid_dataloader,cost,print_freq=40,batch_num=None,random_layer=None): batch_time = AverageMeter() losses = AverageMeter() error1 = AverageMeter() error2 = AverageMeter() error3 = AverageMeter() error4 = AverageMeter() meanD=AverageMeter() end = time.time() model_denoise.eval() model_classify.eval() for i,(clean_images,adversarial_images,labels,diffs) in enumerate(valid_dataloader): #print(type(images),type(labels)) #print(images.shape,labels.shape) #if i>10: # return batch_time.avg, losses.avg, error1.avg,error2.avg,meanD.avg #print(clean_images.shape,adversarial_images.shape) clean_images,adversarial_images,labels=clean_images.cuda(),adversarial_images.cuda(),labels.cuda() #print(images.shape,labels.shape) #d_clean_images= model_denoise(clean_images) d_adversarial_images= model_denoise(adversarial_images) if random_layer: clean_images=random_layer(clean_images) d_adversarial_images=random_layer(d_adversarial_images) y1,f1=model_classify(clean_images,need_feature=True) y2,f2=model_classify(d_adversarial_images,need_feature=True) #clean_y=model_classify(clean_images) #adv_y=model_classify(adversarial_images) loss=cost(y1,y2).mean() batch_size=labels.size(0) y1=y1.max(1)[1] y2=y2.max(1)[1] #clean_y=clean_y.max(1)[1] #adv_y=adv_y.max(1)[1] error1.update(torch.ne(y1.cpu(), labels.cpu()).float().sum().item() / batch_size, batch_size) error2.update(torch.ne(y2.cpu(), labels.cpu()).float().sum().item() / batch_size, batch_size) meanD.update(where((y2==labels).cpu(),diffs.float(),0.).mean().item(),batch_size) #error3.update(torch.ne(clean_y.cpu(), labels.cpu()).float().sum().item() / batch_size, batch_size) #error4.update(torch.ne(adv_y.cpu(), labels.cpu()).float().sum().item() / batch_size, batch_size) losses.update(loss.item(), batch_size) batch_time.update(time.time() - end) end = time.time() if i % print_freq == 0: res = '\t'.join([ 'Valid:', 'Iter: [%d/%d]' % (i + 1, batch_num), 'Time %.3f (%.3f)' % (batch_time.val, batch_time.avg), 'Loss %.4f (%.4f)' % (losses.val, losses.avg), 'Error_clean %.4f (%.4f)' % (error1.val, error1.avg), 'Error_adversaril %.4f (%.4f)' % (error2.val, error2.avg), 'meanD %.4f (%.4f)' %(meanD.val,meanD.avg) #'Error_row %.4f/%.4f' % (error3.avg, error4.avg), ]) print(res) return batch_time.avg, losses.avg, error1.avg,error2.avg,meanD.avg