def perturb(self, original_images, labels, reduction4loss='mean', random_start=False): # original_images: values are within self.min_val and self.max_val # The adversaries created from random close points to the original data if random_start: rand_perturb = torch.FloatTensor(original_images.shape).uniform_( -self.epsilon, self.epsilon) rand_perturb = tensor2cuda(rand_perturb) x = original_images + rand_perturb x.clamp_(self.min_val, self.max_val) else: x = original_images.clone() x.requires_grad = True # max_x = original_images + self.epsilon # min_x = original_images - self.epsilon self.model.eval() with torch.enable_grad(): for _iter in range(self.max_iters): outputs = self.model(x, _eval=True) loss = F.cross_entropy(outputs, labels, reduction=reduction4loss) if reduction4loss == 'none': grad_outputs = tensor2cuda(torch.ones(loss.shape)) else: grad_outputs = None grads = torch.autograd.grad(loss, x, grad_outputs=grad_outputs, only_inputs=True)[0] x.data += self.alpha * torch.sign(grads.data) # the adversaries' pixel value should within max_x and min_x due # to the l_infinity / l2 restriction x = project(x, original_images, self.epsilon, self._type) # the adversaries' value should be valid pixel value x.clamp_(self.min_val, self.max_val) self.model.train() return x
def generate_gradients(self, input_image, target_class): # Put model in evaluation mode self.model.eval() x = input_image.clone() x.requires_grad = True with torch.enable_grad(): # Forward model_output = self.model(x) # Zero grads self.model.zero_grad() grad_outputs = one_hot(target_class, model_output.shape[1]) grad_outputs = tensor2cuda(grad_outputs) grad = torch.autograd.grad(model_output, x, grad_outputs=grad_outputs, only_inputs=True)[0] self.model.train() return grad
def test(self, model, loader, adv_test=False, use_pseudo_label=False): # adv_test is False, return adv_acc as -1 total_acc = 0.0 num = 0 total_adv_acc = 0.0 with torch.no_grad(): for data, label in loader: data, label = tensor2cuda(data), tensor2cuda(label) model.eval() output = model(data) # output = model(data, _eval=True) pred = torch.max(output, dim=1)[1] te_acc = evaluate(pred.cpu().numpy(), label.cpu().numpy(), 'sum') total_acc += te_acc num += output.shape[0] if adv_test: # use predicted label as target label with torch.enable_grad(): adv_data = self.attack.perturb( data, pred if use_pseudo_label else label, 'mean', False) model.eval() adv_output = model(adv_data) # adv_output = model(adv_data, _eval=True) adv_pred = torch.max(adv_output, dim=1)[1] adv_acc = evaluate(adv_pred.cpu().numpy(), label.cpu().numpy(), 'sum') total_adv_acc += adv_acc else: total_adv_acc = -num return total_acc / num, total_adv_acc / num
args = parser() label_dict = LabelDict(args.dataset) te_dataset = tv.datasets.CIFAR10(args.data_root, train=False, transform=tv.transforms.ToTensor(), download=True) te_loader = DataLoader(te_dataset, batch_size=args.batch_size, shuffle=False, num_workers=4) for data, label in te_loader: data, label = tensor2cuda(data), tensor2cuda(label) break adv_list = [] pred_list = [] with torch.no_grad(): model = WideResNet(depth=34, num_classes=10, widen_factor=10, dropRate=0.0) load_model(model, args.load_checkpoint) if torch.cuda.is_available(): model.cuda() attack = FastGradientSignUntargeted(model,
def train(self, model, tr_loader, va_loader=None, adv_train=False): args = self.args logger = self.logger opt = torch.optim.SGD(model.parameters(), args.learning_rate, weight_decay=args.weight_decay, momentum=args.momentum) scheduler = torch.optim.lr_scheduler.MultiStepLR( opt, milestones=[40000, 60000], gamma=0.1) _iter = 0 begin_time = time() for epoch in range(1, args.max_epoch + 1): for data, label in tr_loader: data, label = tensor2cuda(data), tensor2cuda(label) if adv_train: # When training, the adversarial example is created from a random # close point to the original data point. If in evaluation mode, # just start from the original data point. adv_data = self.attack.perturb(data, label, 'mean', True) output = model(adv_data, _eval=False) else: output = model(data, _eval=False) loss = F.cross_entropy(output, label) opt.zero_grad() loss.backward() opt.step() if _iter % args.n_eval_step == 0: t1 = time() if adv_train: with torch.no_grad(): stand_output = model(data, _eval=True) pred = torch.max(stand_output, dim=1)[1] # print(pred) std_acc = evaluate(pred.cpu().numpy(), label.cpu().numpy()) * 100 pred = torch.max(output, dim=1)[1] # print(pred) adv_acc = evaluate(pred.cpu().numpy(), label.cpu().numpy()) * 100 else: adv_data = self.attack.perturb(data, label, 'mean', False) with torch.no_grad(): adv_output = model(adv_data, _eval=True) pred = torch.max(adv_output, dim=1)[1] # print(label) # print(pred) adv_acc = evaluate(pred.cpu().numpy(), label.cpu().numpy()) * 100 pred = torch.max(output, dim=1)[1] # print(pred) std_acc = evaluate(pred.cpu().numpy(), label.cpu().numpy()) * 100 t2 = time() logger.info( f'epoch: {epoch}, iter: {_iter}, lr={opt.param_groups[0]["lr"]}, ' f'spent {time()-begin_time:.2f} s, tr_loss: {loss.item():.3f}' ) logger.info( f'standard acc: {std_acc:.3f}%, robustness acc: {adv_acc:.3f}%' ) # begin_time = time() # if va_loader is not None: # va_acc, va_adv_acc = self.test(model, va_loader, True) # va_acc, va_adv_acc = va_acc * 100.0, va_adv_acc * 100.0 # logger.info('\n' + '='*30 + ' evaluation ' + '='*30) # logger.info('test acc: %.3f %%, test adv acc: %.3f %%, spent: %.3f' % ( # va_acc, va_adv_acc, time() - begin_time)) # logger.info('='*28 + ' end of evaluation ' + '='*28 + '\n') begin_time = time() if _iter % args.n_store_image_step == 0: tv.utils.save_image( torch.cat([data.cpu(), adv_data.cpu()], dim=0), os.path.join(args.log_folder, f'images_{_iter}.jpg'), nrow=16) if _iter % args.n_checkpoint_step == 0: file_name = os.path.join(args.model_folder, f'checkpoint_{_iter}.pth') save_model(model, file_name) _iter += 1 # scheduler depends on training interation scheduler.step() if va_loader is not None: t1 = time() va_acc, va_adv_acc = self.test(model, va_loader, True, False) va_acc, va_adv_acc = va_acc * 100.0, va_adv_acc * 100.0 t2 = time() logger.info('\n'+'='*20 +f' evaluation at epoch: {epoch} iteration: {_iter} ' \ +'='*20) logger.info( f'test acc: {va_acc:.3f}%, test adv acc: {va_adv_acc:.3f}%, spent: {t2-t1:.3f} s' ) logger.info('=' * 28 + ' end of evaluation ' + '=' * 28 + '\n')
def train(self, model, tr_loader, va_loader=None, adv_train=False): args = self.args logger = self.logger opt = torch.optim.Adam(model.parameters(), args.learning_rate, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.MultiStepLR(opt, milestones=[100, 150], gamma=0.1) _iter = 0 begin_time = time() for epoch in range(1, args.max_epoch + 1): scheduler.step() for data, label in tr_loader: data, label = tensor2cuda(data), tensor2cuda(label) if adv_train: # When training, the adversarial example is created from a random # close point to the original data point. If in evaluation mode, # just start from the original data point. adv_data = self.attack.perturb(data, label, 'mean', True) # output = model(adv_data, _eval=False) # ????????? don't know if this is the case########### model.train() output = model(adv_data) else: # output = model(data, _eval=False) model.train() output = model(data) loss = F.cross_entropy(output, label) opt.zero_grad() loss.backward() opt.step() if _iter % args.n_eval_step == 0: t1 = time() if adv_train: with torch.no_grad(): model.eval() stand_output = model(data) # stand_output = model(data, _eval=True) pred = torch.max(stand_output, dim=1)[1] # print(pred) std_acc = evaluate(pred.cpu().numpy(), label.cpu().numpy()) * 100 pred = torch.max(output, dim=1)[1] # print(pred) adv_acc = evaluate(pred.cpu().numpy(), label.cpu().numpy()) * 100 else: adv_data = self.attack.perturb(data, label, 'mean', False) with torch.no_grad(): model.eval() adv_output = model(adv_data) # adv_output = model(adv_data, _eval=True) pred = torch.max(adv_output, dim=1)[1] # print(label) # print(pred) adv_acc = evaluate(pred.cpu().numpy(), label.cpu().numpy()) * 100 pred = torch.max(output, dim=1)[1] # print(pred) std_acc = evaluate(pred.cpu().numpy(), label.cpu().numpy()) * 100 t2 = time() print('%.3f' % (t2 - t1)) logger.info( 'epoch: %d, iter: %d, spent %.2f s, tr_loss: %.3f' % (epoch, _iter, time() - begin_time, loss.item())) logger.info( 'standard acc: %.3f %%, robustness acc: %.3f %%' % (std_acc, adv_acc)) # begin_time = time() # if va_loader is not None: # va_acc, va_adv_acc = self.test(model, va_loader, True) # va_acc, va_adv_acc = va_acc * 100.0, va_adv_acc * 100.0 # logger.info('\n' + '='*30 + ' evaluation ' + '='*30) # logger.info('test acc: %.3f %%, test adv acc: %.3f %%, spent: %.3f' % ( # va_acc, va_adv_acc, time() - begin_time)) # logger.info('='*28 + ' end of evaluation ' + '='*28 + '\n') begin_time = time() if _iter % args.n_store_image_step == 0: tv.utils.save_image( torch.cat([data.cpu(), adv_data.cpu()], dim=0), os.path.join(args.log_folder, 'images_%d.jpg' % _iter), nrow=16) if _iter % args.n_checkpoint_step == 0: file_name = os.path.join(args.model_folder, 'checkpoint_%d.pth' % _iter) save_model(model, file_name) _iter += 1 if va_loader is not None: t1 = time() va_acc, va_adv_acc = self.test(model, va_loader, True, False) va_acc, va_adv_acc = va_acc * 100.0, va_adv_acc * 100.0 t2 = time() logger.info('\n'+'='*20 +' evaluation at epoch: %d iteration: %d '%(epoch, _iter) \ +'='*20) logger.info( 'test acc: %.3f %%, test adv acc: %.3f %%, spent: %.3f' % (va_acc, va_adv_acc, t2 - t1)) logger.info('=' * 28 + ' end of evaluation ' + '=' * 28 + '\n')