def get_metric_eval(self): utr_score = [] tr_score = [] for i in range(1): ##TODO: Customise input parameters to methods like LinfPGDAttack adversary = LinfPGDAttack( self.phi, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=0.10, nb_iter=40, eps_iter=0.01, rand_init=True, clip_min=0.0, clip_max=1.0, targeted=False) adv_untargeted = adversary.perturb(x_e, y_e) target = torch.ones_like(y_e) * 3 adversary.targeted = True adv_targeted = adversary.perturb(x_e, target) pred_cln = predict_from_logits(self.phi(x_e)) pred_untargeted_adv = predict_from_logits(self.phi(adv_untargeted)) pred_targeted_adv = predict_from_logits(self.phi(adv_targeted)) utr_score.append(torch.sum(pred_cln != pred_untargeted_adv)) tr_score.append(torch.sum(pred_cln != pred_targeted_adv)) batch_size = 5 plt.figure(figsize=(10, 8)) for ii in range(batch_size): plt.subplot(3, batch_size, ii + 1) _imshow(x_e[ii]) plt.title("clean \n pred: {}".format(pred_cln[ii])) plt.subplot(3, batch_size, ii + 1 + batch_size) _imshow(adv_untargeted[ii]) plt.title("untargeted \n adv \n pred: {}".format( pred_untargeted_adv[ii])) plt.subplot(3, batch_size, ii + 1 + batch_size * 2) _imshow(adv_targeted[ii]) plt.title("targeted to 3 \n adv \n pred: {}".format( pred_targeted_adv[ii])) plt.tight_layout() plt.savefig(self.save_path + '.png') utr_score = np.array(utr_score) tr_score = np.array(tr_score) print('MisClassifcation on Untargetted Attack ', np.mean(utr_score), np.std(utr_score)) print('MisClassifcation on Targeted Atttack', np.mean(tr_score), np.std(tr_score)) self.metric_score['Untargetted Method'] = np.mean(utr_score) self.metric_score['Targetted Method'] = np.mean(tr_score) return
def test_pgd(args, model, device, test_loader, epsilon=0.063): model.eval() model.reset() adversary = LinfPGDAttack(model.forward_adv, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=epsilon, nb_iter=args.nb_iter, eps_iter=args.eps_iter, rand_init=True, clip_min=-1.0, clip_max=1.0, targeted=False) correct = 0 for batch_idx, (data, target) in enumerate(test_loader): data, target = data.to(device), target.to(device) model.reset() with ctx_noparamgrad_and_eval(model): adv_images = adversary.perturb(data, target) output = model.run_cycles(adv_images) pred = output.argmax(dim=1, keepdim=True) correct += pred.eq(target.view_as(pred)).sum().item() acc = correct / len(test_loader.dataset) print('PGD attack Acc {:.3f}'.format(100. * acc)) return acc
def test_attack(threshold, arch, dataset, test_loader): target_model = StandardModel(dataset, arch, no_grad=False) if torch.cuda.is_available(): target_model = target_model.cuda() target_model.eval() attack = LinfPGDAttack(target_model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=threshold, nb_iter=30, eps_iter=0.01, rand_init=True, clip_min=0.0, clip_max=1.0, targeted=False) all_count = 0 success_count = 0 all_adv_images = [] all_true_labels = [] for idx, (img, true_label) in enumerate(test_loader): img = img.cuda() true_label = true_label.cuda().long() adv_image = attack.perturb(img, true_label) # (3, 224, 224), float if adv_image is None: continue adv_label = target_model.forward(adv_image).max(1)[1].detach().cpu().numpy().astype(np.int32) # adv_image = np.transpose(adv_image, (0, 2, 3, 1)) # N,C,H,W -> (N, H, W, 3), float all_count += len(img) true_label_np = true_label.detach().cpu().numpy().astype(np.int32) success_count+= len(np.where(true_label_np != adv_label)[0]) all_adv_images.append(adv_image.cpu().detach().numpy()) all_true_labels.append(true_label_np) attack_success_rate = success_count / float(all_count) log.info("Before train. Attack success rate is {:.3f}".format(attack_success_rate)) return target_model, np.concatenate(all_adv_images,0), np.concatenate(all_true_labels, 0) # N,224,224,3
def attack_pgd_transfer(self, model_attacker, clean_loader, epsilon=0.1, eps_iter=0.02, test='average', nb_iter=7): """ Use adversarial samples generated against model_attacker to attack the current model. """ self.model.eval() self.model.reset() model_attacker.eval() model_attacker.reset() adversary = LinfPGDAttack( model_attacker.forward_adv, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=epsilon, nb_iter=nb_iter, eps_iter=eps_iter, rand_init=True, clip_min=-1.0, clip_max=1.0, targeted=False) correct = 0 for batch_idx, (data, target) in enumerate(clean_loader): data, target = data.to(self.device), target.to(self.device) self.model.reset() model_attacker.reset() with ctx_noparamgrad_and_eval(model_attacker): adv_images = adversary.perturb(data, target) if(test=='last'): output = self.model.run_cycles(adv_images) elif(test=='average'): output = self.model.run_average(adv_images) else: self.model.reset() output = self.model(adv_images) pred = output.argmax(dim=1, keepdim=True) correct += pred.eq(target.view_as(pred)).sum().item() acc = correct / len(clean_loader.dataset) print('PGD attack Acc {:.3f}'.format(100. * acc)) return acc
def generate_attack_samples(model, cln_data, true_label, nb_iter, eps_iter): adversary = LinfPGDAttack( model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=0.25, nb_iter=nb_iter, eps_iter=eps_iter, rand_init=True, clip_min=0.0, clip_max=1.0, targeted=False) adv_untargeted = adversary.perturb(cln_data, true_label) adv_targeted_results = [] adv_target_labels = [] for target_label in range(0, 10): assert target_label >= 0 and target_label <= 10 and type( target_label) == int target = torch.ones_like(true_label) * target_label adversary.targeted = True adv_targeted = adversary.perturb(cln_data, target) adv_targeted_results.append(adv_targeted) adv_target_labels.append(target) return adv_targeted_results, adv_target_labels, adv_untargeted
def mifgsm_attack(max_count, model, train_loader, max_epsilon, learning_rate, iters=20, isnorm=False, num_classes=1000): if isnorm: mean = np.array([0.485, 0.456, 0.406]) std = np.array([0.229, 0.224, 0.225]) mean = torch.tensor(mean).float().view(3, 1, 1) std = torch.tensor(std).float().view(3, 1, 1) mmax = torch.ones(3, 224, 224) mmin = torch.zeros(3, 224, 224) mmax = ((mmax - mean) / std).cuda() mmin = ((mmin - mean) / std).cuda() learning_rate = learning_rate / (255 * 0.224) max_epsilon = max_epsilon / (255 * 0.224) else: learning_rate = float(learning_rate) max_epsilon = float(max_epsilon) mmax = 255 mmin = 0 # adversary = mifgsm(model,eps=max_epsilon,nb_iter=iters,eps_iter=learning_rate,clip_min=mmin,clip_max=mmax) adversary = LinfPGDAttack(model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=max_epsilon, nb_iter=iters, eps_iter=learning_rate, clip_min=mmin, clip_max=mmax, targeted=True) count = 0 total_correct = 0 # device = model.device() for x, y in train_loader: x = x.cuda() y = y.cuda() y1 = (y + 3) % num_classes count += len(x) ad_ex = adversary.perturb(x, y1) if not isnorm: ad_ex = torch.round(ad_ex) z1 = model(ad_ex).argmax(dim=1) diff = ad_ex - x total_correct += (z1 == y).sum() if count >= max_count: break return total_correct.cpu().numpy() / (count)
def whitebox_pgd(args, image, target, model, normalize=None): adversary = LinfPGDAttack( model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=0.3, nb_iter=40, eps_iter=0.01, rand_init=True, clip_min=0.0, clip_max=1.0, targeted=False) adv_image = adversary.perturb(image, target) print("Target is %d" %(target)) pred = model(adv_image) out = pred.max(1, keepdim=True)[1] # get the index of the max log-probability print("Adv Target is %d" %(out)) clean_image = (image)[0].detach() adv_image = adv_image[0].detach() if args.comet: plot_image_to_comet(args,clean_image,"clean.png") plot_image_to_comet(args,adv_image,"Adv.png") return pred, clamp(clean_image - adv_image,0.,1.)
def create_adv_input(self, x, y, model): # Prepare copied model model = copy.deepcopy(model) # Prepare input and corresponding label data = torch.from_numpy(np.expand_dims(x, axis=0).astype(np.float32)) target = torch.from_numpy(np.array([y]).astype(np.int64)) data.requires_grad = True from advertorch.attacks import LinfPGDAttack adversary = LinfPGDAttack(model.forward) perturbed_data = adversary.perturb(data, target) # Have to be different output = model.forward(perturbed_data) final_pred = output.max( 1, keepdim=True)[1] # get the index of the max log-probability if final_pred.item() == target.item(): return perturbed_data, 0 else: return perturbed_data, 1
clncorrect_nodefence += pred.eq(target.view_as(pred)).sum().item() # clean data with defence clndata_test_one = clndata with torch.no_grad(): output = model(clndata_test_one.float()) test_clnloss += F.cross_entropy(output, target, reduction='sum').item() pred = output.max(1, keepdim=True)[1] clncorrect += pred.eq(target.view_as(pred)).sum().item() if flag_advtrain: with ctx_noparamgrad_and_eval(model): advdata = adversary.perturb(clndata, target) # no defence with torch.no_grad(): output = model(advdata.float()) test_advloss_nodefence += F.cross_entropy( output, target, reduction='sum').item() pred = output.max(1, keepdim=True)[1] advcorrect_nodefence += pred.eq( target.view_as(pred)).sum().item() # with defence # # gaussian_block if args.gaussian_block: noise_data = add_gaussian_nosie(advdata, args.sigma)
def _get_test_adv(attack_method,epsilon): # define parameter parser = argparse.ArgumentParser(description='Train MNIST') parser.add_argument('--seed', default=0, type=int) parser.add_argument('--mode', default="adv", help="cln | adv") parser.add_argument('--sigma', default=75, type=int, help='noise level') parser.add_argument('--train_batch_size', default=50, type=int) parser.add_argument('--test_batch_size', default=1000, type=int) parser.add_argument('--log_interval', default=200, type=int) parser.add_argument('--result_dir', default='results', type=str, help='directory of test dataset') parser.add_argument('--monitor', default=False, type=bool, help='if monitor the training process') parser.add_argument('--start_save', default=90, type=int, help='the threshold epoch which will start to save imgs data using in testing') # attack parser.add_argument("--attack_method", default="PGD", type=str, choices=['FGSM', 'PGD', 'Momentum', 'STA']) parser.add_argument('--epsilon', type=float, default=8 / 255, help='if pd_block is used') parser.add_argument('--dataset', default='cifar10', type=str, help='dataset = [cifar10/MNIST]') # net parser.add_argument('--net_type', default='wide-resnet', type=str, help='model') parser.add_argument('--depth', default=28, type=int, help='depth of model') parser.add_argument('--widen_factor', default=10, type=int, help='width of model') parser.add_argument('--dropout', default=0.3, type=float, help='dropout_rate') parser.add_argument('--num_classes', default=10, type=int) args = parser.parse_args() torch.manual_seed(args.seed) use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") # load basic data # 测试包装的loader test_loader = get_handled_cifar10_test_loader(num_workers=4, shuffle=False, batch_size=50) # 加载网络模型 # Load checkpoint print('| Resuming from checkpoint...') assert os.path.isdir('checkpoint'), 'Error: No checkpoint directory found!' _, file_name = getNetwork(args) checkpoint = torch.load('./checkpoint/' + args.dataset + os.sep + file_name + '.t7') # os.sep提供跨平台的分隔符 model = checkpoint['net'] # model = model.to(device) optimizer = optim.Adam(model.parameters(), lr=1e-4) # 定义对抗攻击类型:C&W from advertorch.attacks import LinfPGDAttack if attack_method == "PGD": adversary = LinfPGDAttack( model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=epsilon, nb_iter=40, eps_iter=0.01, rand_init=True, clip_min=0.0, clip_max=1.0, targeted=False) elif attack_method == "FGSM": adversary = GradientSignAttack( model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), clip_min=0.0, clip_max=1.0, eps=0.007, targeted=False) # 先测试一下不含扰动范围限制的,FGSM的eps代表的是一般的eps_iter elif attack_method == "Momentum": adversary = MomentumIterativeAttack( model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=epsilon, nb_iter=40, decay_factor=1.0, eps_iter=1.0, clip_min=0.0, clip_max=1.0, targeted=False, ord=np.inf) elif attack_method == "STA": adversary = SpatialTransformAttack( model, num_classes=args.num_classes, loss_fn=nn.CrossEntropyLoss(reduction="sum"), initial_const=0.05, max_iterations=1000, search_steps=1, confidence=0, clip_min=0.0, clip_max=1.0, targeted=False, abort_early=True) # 先测试一下不含扰动范围限制的 # generate for train.h5 | save as train_adv_attackMethod_epsilon test_adv = [] test_true_target = [] for clndata, target in test_loader: print("clndata:{}".format(clndata.size())) clndata, target = clndata.to(device), target.to(device) with ctx_noparamgrad_and_eval(model): advdata = adversary.perturb(clndata, target) test_adv.append(advdata.detach().cpu().numpy()) test_true_target.append(target.cpu().numpy()) test_adv = np.reshape(np.asarray(test_adv),[-1,3,32,32]) test_true_target = np.reshape(np.asarray(test_true_target),[-1]) print("test_adv.shape:{}".format(test_adv.shape)) print("test_true_target.shape:{}".format(test_true_target.shape)) del model return test_adv, test_true_target
acc_count_train = 0 fp_sum_train = 0 fp_count_train = 0 fn_sum_train = 0 fn_count_train = 0 if do_train: for batch_idx, sample in enumerate(tqdm(train_loader)): img = sample['img'].to(device) orig_img = img.clone() label = sample['label'].to(device) with ctx_noparamgrad_and_eval(att_model): data = adversary.perturb(img, label) if one_way: data[torch.flatten(label) == 1] = orig_img[ torch.flatten(label) == 1] outputs = att_model(data) optimizer.zero_grad() loss = criterion(outputs, label, gamma=adv_floss_gamma, alpha=adv_floss_alpha) if isinstance(model, torch.nn.DataParallel): loss = loss.sum()
loss_fn=nn.CrossEntropyLoss(), eps=args.eps, nb_iter=40, eps_iter=0.01, rand_init=True, clip_min=0.0, clip_max=1.0, targeted=False) net.eval() correct = 0 total = 0 for step, data in enumerate(testloader, 0): inputs, labels = data inputs = inputs.cuda() labels = labels.cuda() adv_inputs = adversary.perturb(inputs, labels) adv_inputs = Variable(adv_inputs) labels = Variable(labels) outputs_adv = net(adv_inputs) _, predicted = torch.max(outputs_adv.data, 1) total += labels.size(0) correct += predicted.eq(labels.data).cpu().sum() correct = correct.item() print("Classification accuracy : {}%".format(100. * correct / total))
def main(): data = [] torch.cuda.empty_cache() parser = argparse.ArgumentParser(description='PyTorch MNIST Example') parser.add_argument('--batch-size', type=int, default=60000, metavar='N', help='input batch size for training (default: 60000)') parser.add_argument('--test-batch-size', type=int, default=10000, metavar='N', help='input batch size for testing (default: 1000)') parser.add_argument('--epochs', type=int, default=1, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--lr', type=float, default=0.01, metavar='LR', help='learning rate (default: 0.01)') parser.add_argument('--momentum', type=float, default=0.5, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument( '--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--save-model', action='store_true', default=False, help='For Saving the current Model') args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") kwargs = {'num_workers': 1} if use_cuda else {} if os.path.exists('/local2'): dr_t = 'local2/data' else: dr_t = '/home/jung/hypothesis/data' train_loader = torch.utils.data.DataLoader(datasets.MNIST( dr_t, train=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])), batch_size=60000, shuffle=False, **kwargs) test_loader = torch.utils.data.DataLoader(datasets.MNIST( dr_t, train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])), batch_size=10000, shuffle=False, **kwargs) test_loader_small = torch.utils.data.DataLoader(datasets.MNIST( dr_t, train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])), batch_size=200, shuffle=False, **kwargs) strage = device model = Net() model.to(device) checkpoint = torch.load('mnist_cnn.pt', map_location=lambda storage, loc: storage) model.load_state_dict(checkpoint) adv_ = True Associations = [] for xin in range(4): temp = torch.load('map_association_' + str(xin) + '.pt') Associations.append(temp) hookF = [Hook(layer[1]) for layer in list(model._modules.items())] # Let's test how this association is predictive of the test set adversary = LinfPGDAttack(model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=0.7, nb_iter=40, eps_iter=0.01, rand_init=True, clip_min=0.0, clip_max=1.0, targeted=False) for data, target in test_loader_small: break labels_ = target data = data.to(device) labels_ = labels_.to(device) if adv_: adv_untargeted = adversary.perturb(data, labels_) fn = 'adv' else: adv_untargeted = data fn = 'norm' print(adv_untargeted.size()) pred_n = test_adv(args, model, device, test_loader_small, hookF, adv_untargeted) activity_layer = {} for ttin in range(4): layer_sel_ = ttin act_map = Associations[layer_sel_] roV = intermediate_output[layer_sel_] sel = Associations[layer_sel_] sel = sel.numpy() wm = torch.load('wm_' + str(layer_sel_) + '.pt', map_location=lambda storage, loc: storage) fp = open('labels_' + str(layer_sel_) + '.json') # labels for wm i.e., the labels of the test set. label = json.load(fp) fp.close() cog = CogMem_load(wm, label) for data, target in test_loader_small: break labels_ = target cog.foward(roV) pred = cog.pred.long() #pred=cog.pred.long().cpu().numpy() total_1 = 0 total_2 = 0 total_3 = 0 total_4 = 0 total_5 = 0 cons1 = 0 cons2 = 0 temp = 0 corr = np.zeros((10, 10)) mem = [] #print ('sel shape',sel.shape) #print (cog.image.size()) #print ('pred',pred.size()) temp_vec = [] for xi, xin in enumerate(pred_n): cls = xin.item() label_t = labels_[xi].long().item() v2 = cog.image[:, xi] idx = torch.argsort(v2).cpu().numpy() mem.append(v2.cpu().numpy()) idx = np.flip(idx, 0)[:3] tar = sel[idx, :] temp_v = np.zeros(10) for zin in idx: temp_v = temp_v + sel[zin, :] * v2[zin].item() temp_v = np.exp(temp_v) temp_v = temp_v / np.sum(temp_v) temp_vec.append(temp_v) activity_layer[layer_sel_] = np.array(temp_vec) layer_corr = {} values = [] for xin in activity_layer: pred = activity_layer[xin] for yin in activity_layer: post = activity_layer[yin] if xin == yin: pass else: temp = [] for zin in range(len(pred_n)): temp.append(np.dot(pred[zin], post[zin])) #temp=np.array(temp) layer_corr[str(xin) + '_' + str(yin)] = temp fp = open('layer-' + fn + '_' + '.json', 'w') json.dump(layer_corr, fp)
def main(): # Training settings parser = argparse.ArgumentParser( description='gen adversarial examples via advertorch') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--eps', default=0.3, help='eps for LinfPGDAttack') parser.add_argument('--norm', action='store_true', default=False, help='adversarial?') args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) normalize = transforms.Normalize((0.1307, ), (0.3081, )) device = torch.device("cuda" if use_cuda else "cpu") kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} dr_t = './data' test_loader = torch.utils.data.DataLoader(datasets.MNIST( dr_t, train=False, transform=transforms.Compose([transforms.ToTensor(), normalize])), batch_size=200, shuffle=False, **kwargs) model = Net() checkpoint = torch.load('pretrained_models/mnist_cnn.pt', map_location=lambda storage, loc: storage) model.load_state_dict(checkpoint) model.to(device) adversary = LinfPGDAttack(model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=float(args.eps), nb_iter=40, eps_iter=0.01, rand_init=True, clip_min=0.0, clip_max=1.0, targeted=False) for data, target in test_loader: break labels_ = target data = data.to(device) labels_ = labels_.to(device) if os.path.exists("adversarial_examples"): pass else: os.mkdir("adversarial_examples") if args.norm: torch.save(data, 'adversarial_examples/norm.pt') torch.save(labels_, 'adversarial_examples/norm_label_.pt') else: adv_untargeted = adversary.perturb(data, labels_) torch.save(adv_untargeted, 'adversarial_examples/adv_' + str(args.eps) + '.pt') torch.save(labels_, 'adversarial_examples/adv_label_' + str(args.eps) + '.pt')
def run(args): torch.manual_seed(args.seed) use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") if args.mode == "cln": flag_advtrain = False nb_epoch = 10 model_filename = "mnist_lenet5_clntrained_%i.pt" % args.seed elif args.mode == "adv": flag_advtrain = True nb_epoch = 90 model_filename = "mnist_lenet5_advtrained_%i.pt" % args.seed else: raise train_loader, test_loader = load_mnist(args, augment=False, root='../data/') if args.architecture == 'LeNet': model = Net(1, 28, 28).to(device) elif args.architecture == 'MadryLeNet': model = MadryNet(1, 28, 28).to(device) print(device) else: raise (f'Architecture {args.architecture} not implemented') optimizer = optim.Adam(model.parameters(), lr=1e-4) if flag_advtrain: from advertorch.attacks import LinfPGDAttack adversary = LinfPGDAttack(model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=0.3, nb_iter=40, eps_iter=0.01, rand_init=True, clip_min=0.0, clip_max=1.0, targeted=False) save_path = os.path.join(args.save_path, "adv_trained") else: save_path = os.path.join(args.save_path, "natural") if not os.path.exists(save_path): os.makedirs(save_path) save_path = os.path.join(save_path, model_filename) for epoch in range(nb_epoch): model.train() for batch_idx, (data, target) in enumerate(train_loader): data, target = data.to(device), target.to(device) ori = data if flag_advtrain: # when performing attack, the model needs to be in eval mode # also the parameters should NOT be accumulating gradients with ctx_noparamgrad_and_eval(model): data = adversary.perturb(data, target) optimizer.zero_grad() output = model(data) loss = F.cross_entropy(output, target, reduction='mean') loss.backward() optimizer.step() if batch_idx % args.log_interval == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.item())) model.eval() test_clnloss = 0 clncorrect = 0 if flag_advtrain: test_advloss = 0 advcorrect = 0 for clndata, target in test_loader: clndata, target = clndata.to(device), target.to(device) with torch.no_grad(): output = model(clndata) test_clnloss += F.cross_entropy(output, target, reduction='sum').item() pred = output.max(1, keepdim=True)[1] clncorrect += pred.eq(target.view_as(pred)).sum().item() if flag_advtrain: advdata = adversary.perturb(clndata, target) with torch.no_grad(): output = model(advdata) test_advloss += F.cross_entropy(output, target, reduction='sum').item() pred = output.max(1, keepdim=True)[1] advcorrect += pred.eq(target.view_as(pred)).sum().item() test_clnloss /= len(test_loader.dataset) print('\nTest set: avg cln loss: {:.4f},' ' cln acc: {}/{} ({:.0f}%)\n'.format( test_clnloss, clncorrect, len(test_loader.dataset), 100. * clncorrect / len(test_loader.dataset))) if flag_advtrain: test_advloss /= len(test_loader.dataset) print('Test set: avg adv loss: {:.4f},' ' adv acc: {}/{} ({:.0f}%)\n'.format( test_advloss, advcorrect, len(test_loader.dataset), 100. * advcorrect / len(test_loader.dataset))) torch.save(model.state_dict(), save_path)
class AIGAN: def __init__(self, device, model, model_num_labels, image_nc, epoch_of_change, box_min, box_max, c_tresh, dataset_name, is_targeted): output_nc = image_nc self.device = device self.model_num_labels = model_num_labels self.model = model self.input_nc = image_nc self.output_nc = output_nc self.box_min = box_min self.box_max = box_max self.c_treshold = c_tresh self.dataset_name = dataset_name self.is_targeted = is_targeted self.models_path = './models/' self.writer = SummaryWriter('./checkpoints/logs/', max_queue=100) self.gen_input_nc = image_nc self.epoch_of_change = epoch_of_change self.attacker = LinfPGDAttack(self.model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=0.3, nb_iter=40, eps_iter=0.01, rand_init=True, clip_min=box_min, clip_max=box_max, targeted=self.is_targeted) if dataset_name=="mnist": from models import Generator, Discriminator elif dataset_name=="imagenet": from imagenet_models import PatchDiscriminator as Discriminator from imagenet_models import Resnet224Generator as Generator else: raise NotImplementedError('dataset [%s] is not implemented' % dataset_name) self.netG = Generator(self.gen_input_nc, image_nc).to(device) self.netDisc = Discriminator(image_nc).to(device) self.netG_file_name = self.models_path + 'netG.pth.tar' self.netDisc_file_name = self.models_path + 'netD.pth.tar' os.makedirs(self.models_path, exist_ok=True) # initialize all weights last_netG = find_last_checkpoint(self.netG_file_name) last_netDisc = find_last_checkpoint(self.netDisc_file_name) if last_netG is not None: self.netG.load_state_dict(torch.load(last_netG)) self.netDisc.load_state_dict(torch.load(last_netDisc)) *_, self.start_epoch = last_netG.split('.') self.iteration = None self.start_epoch = int(self.start_epoch)+1 else: self.netG.apply(weights_init) self.netDisc.apply(weights_init) self.start_epoch = 1 self.iteration = 0 # initialize optimizers if self.dataset_name == "mnist": lr = 10**(-3) elif self.dataset_name == "imagenet": lr = 10**(-5) else: raise NotImplementedError('dataset [%s] is not implemented' % dataset_name) self.optimizer_G = torch.optim.Adam(self.netG.parameters(), lr=lr) self.optimizer_D = torch.optim.Adam(self.netDisc.parameters(), lr=lr) self.optG_file_name = self.models_path + 'optG.pth.tar' self.optD_file_name = self.models_path + 'optD.pth.tar' last_optG = find_last_checkpoint(self.optG_file_name) last_optD = find_last_checkpoint(self.optD_file_name) if last_optG is not None: self.optimizer_G.load_state_dict(torch.load(last_optG)) self.optimizer_D.load_state_dict((torch.load(last_optD))) self._use_attacker = (self.start_epoch < self.epoch_of_change) def train_batch(self, x, labels): # if training is targeted, labels = targets # optimize D for _ in range(1): # # add a clipping trick perturbation = torch.clamp(self.netG(x), -self.c_treshold, self.c_treshold) # perturbation = self.netG(x) adv_images = perturbation + x adv_images = torch.clamp(adv_images, self.box_min, self.box_max) self.optimizer_D.zero_grad() if self._use_attacker: pgd_images = self.attacker.perturb(x, labels) d_real_logits, d_real_probs = self.netDisc(pgd_images) else: d_real_logits, d_real_probs = self.netDisc(x) d_fake_logits, d_fake_probs = self.netDisc(adv_images.detach()) # generate labels for discriminator (optionally smooth labels for stability) smooth = 0.0 d_labels_real = torch.ones_like(d_real_probs, device=self.device) * (1 - smooth) d_labels_fake = torch.zeros_like(d_fake_probs, device=self.device) # discriminator loss loss_D_real = F.mse_loss(d_real_probs, d_labels_real) loss_D_real.backward() loss_D_fake = F.mse_loss(d_fake_probs, d_labels_fake) loss_D_fake.backward() loss_D_GAN = (loss_D_fake + loss_D_real) #/2 # loss_D_GAN.backward() self.optimizer_D.step() gc.collect() # optimize G for _ in range(1): self.optimizer_G.zero_grad() # cal G's loss in GAN d_fake_logits, d_fake_probs = self.netDisc(adv_images.detach()) loss_G_fake = F.mse_loss(d_fake_probs, torch.ones_like(d_fake_probs, device=self.device)) loss_G_fake.backward(retain_graph=True) # # calculate perturbation norm loss_perturb = torch.norm(perturbation.view(perturbation.shape[0], -1), 2, dim=1) loss_perturb = torch.max(loss_perturb - self.c_treshold, torch.zeros(1, device=self.device)) loss_perturb = torch.mean(loss_perturb) # cal adv loss # f_real_logits = self.model(x) # f_real_probs = F.softmax(f_real_logits, dim=1) f_fake_logits = self.model(adv_images) f_fake_probs = F.softmax(f_fake_logits, dim=1) # if training is targeted, indicate how many examples classified as targets # else show accuraccy on adversarial images fake_accuracy = torch.mean((torch.argmax(f_fake_probs, 1) == labels).float()) onehot_labels = torch.eye(self.model_num_labels, device=self.device)[labels.long()] loss_adv = adv_loss(f_fake_probs, onehot_labels, self.is_targeted) if self.dataset_name == "mnist": alambda = 1. alpha = 1. beta = 1.5 elif self.dataset_name == "imagenet": alambda = 10.0# alpha = 1. beta = 0.5 else: raise NotImplementedError('dataset [%s] is not implemented' % self.dataset_name) loss_G = alambda*loss_adv + alpha*loss_G_fake + beta*loss_perturb loss_G.backward() self.optimizer_G.step() self.writer.add_scalar('iter/train/loss_D_real', loss_D_real.data, global_step=self.iteration) self.writer.add_scalar('iter/train/loss_D_fake', loss_D_fake.data, global_step=self.iteration) self.writer.add_scalar('iter/train/loss_G_fake', loss_G_fake.data, global_step=self.iteration) self.writer.add_scalar('iter/train/loss_perturb', loss_perturb.data, global_step=self.iteration) self.writer.add_scalar('iter/train/loss_adv', loss_adv.data, global_step=self.iteration) self.writer.add_scalar('iter/train/loss_G', loss_G.data, global_step=self.iteration) self.writer.add_scalar('iter/train/fake_acc', fake_accuracy.data, global_step=self.iteration) self.iteration += 1 return loss_D_GAN.item(), loss_G_fake.item(), loss_perturb.item(), loss_adv.item(), loss_G.item(), fake_accuracy def train(self, train_dataloader, epochs): if self.iteration is None: self.iteration = (self.start_epoch-1)*len(train_dataloader)+1 for epoch in range(self.start_epoch, epochs+1): if epoch == self.epoch_of_change: self._use_attacker = False if epoch == 120 and self.dataset_name == "mnist": self.optimizer_G = torch.optim.Adam(self.netG.parameters(), lr=0.0001) self.optimizer_D = torch.optim.Adam(self.netDisc.parameters(), lr=0.0001) if epoch == 60 and self.dataset_name == "imagenet": self.optimizer_G = torch.optim.Adam(self.netG.parameters(), lr=10**(-7)) self.optimizer_D = torch.optim.Adam(self.netDisc.parameters(), lr=10**(-7)) if epoch == 200 and self.dataset_name == "mnist": self.optimizer_G = torch.optim.Adam(self.netG.parameters(), lr=0.00001) self.optimizer_D = torch.optim.Adam(self.netDisc.parameters(), lr=0.00001) if epoch == 200 and self.dataset_name == "imagenet": self.optimizer_G = torch.optim.Adam(self.netG.parameters(), lr=10**(-9)) self.optimizer_D = torch.optim.Adam(self.netDisc.parameters(), lr=10**(-9)) loss_D_sum = 0 loss_G_fake_sum = 0 loss_perturb_sum = 0 loss_adv_sum = 0 loss_G_sum = 0 fake_acc_sum = 0 for i, data in enumerate(train_dataloader, start=0): gc.collect() images, labels = data images, labels = images.to(self.device), labels.to(self.device) # # if targeted, create one hot vectors of the target # if self.is_targeted: # assert(targets is not None) # # this statement can be used when all targets is equal # # targets = torch.zeros_like(labels) + target # # commmented because labels will be converted to one hot during training on batch # # labels = torch.eye(self.model_num_labels, device=self.device)[targets] #onehot targets # labels = targets loss_D_batch, loss_G_fake_batch, loss_perturb_batch, loss_adv_batch, loss_G_batch, fake_acc_batch = \ self.train_batch(images, labels) loss_D_sum += loss_D_batch loss_G_fake_sum += loss_G_fake_batch loss_perturb_sum += loss_perturb_batch loss_adv_sum += loss_adv_batch loss_G_sum += loss_G_batch fake_acc_sum += fake_acc_batch if i == len(train_dataloader)-2: perturbation = self.netG(images) self.writer.add_images('train/adversarial_perturbation', perturbation, global_step=epoch) self.writer.add_images('train/adversarial_images', images+perturbation, global_step=epoch) self.writer.add_images('train/adversarial_images_cl', torch.clamp(images+perturbation, self.box_min, self.box_max), global_step=epoch) # print statistics num_batch = len(train_dataloader) self.writer.add_scalar('epoch/train/loss_D', loss_D_sum/num_batch, global_step=epoch) self.writer.add_scalar('epoch/train/loss_G_fake', loss_G_fake_sum/num_batch, global_step=epoch) self.writer.add_scalar('epoch/train/loss_perturb', loss_perturb_sum/num_batch, global_step=epoch) self.writer.add_scalar('epoch/train/loss_adv', loss_adv_sum/num_batch, global_step=epoch) self.writer.add_scalar('epoch/train/loss_G', loss_G_sum/num_batch, global_step=epoch) self.writer.add_scalar('epoch/train/fake_acc', fake_acc_sum/num_batch, global_step=epoch) print("epoch %d:\nloss_D: %.3f, loss_G_fake: %.3f,\ \nloss_perturb: %.3f, loss_adv: %.3f, \n" % (epoch, loss_D_sum/num_batch, loss_G_fake_sum/num_batch, loss_perturb_sum/num_batch, loss_adv_sum/num_batch)) # save generator if epoch%1==0: netG_file_name = self.netG_file_name + '.' + str(epoch) torch.save(self.netG.state_dict(), netG_file_name) netD_file_name = self.netDisc_file_name + '.' + str(epoch) torch.save(self.netDisc.state_dict(), netD_file_name) optG_file_name = self.optG_file_name + '.' + str(epoch) torch.save(self.optimizer_G.state_dict(), optG_file_name) optD_file_name = self.optD_file_name + '.' + str(epoch) torch.save(self.optimizer_D.state_dict(), optD_file_name) #save final model torch.save(self.netG.state_dict(), self.netG_file_name ) torch.save(self.netDisc.state_dict(), self.netDisc_file_name) torch.save(self.optimizer_G.state_dict(), self.optG_file_name) torch.save(self.optimizer_D.state_dict(), self.optD_file_name)
def main(): # get args args = get_args() # set up gpus os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu assert torch.cuda.is_available() # load models if 'gal' in args.model_file: leaky_relu = True else: leaky_relu = False ensemble = utils.get_models(args, train=False, as_ensemble=True, model_file=args.model_file, leaky_relu=leaky_relu) models = ensemble.models train_seed = args.model_file.split('/')[-3] train_alg = args.model_file.split('/')[-4] # get data loaders testloader = utils.get_testloader(args, batch_size=args.batch_size) # pick out samples that are correctly classified by all submodels correct = [] for m in models: correct_m = [] for (x, y) in testloader: x, y = x.cuda(), y.cuda() outputs = m(x) _, pred = outputs.max(1) correct_m.append(pred.eq(y)) correct_m = torch.cat(correct_m) correct.append(correct_m) correct = torch.stack(correct, dim=-1).all(-1) correct_idx = correct.nonzero().squeeze(-1) random.seed(0) subset_idx = correct_idx[random.sample(range(correct_idx.size(0)), args.subset_num)].cpu() subset_loader = utils.get_testloader(args, batch_size=args.batch_size, shuffle=False, subset_idx=subset_idx) # PGD eps_list = [0.03] random_start = args.random_start steps = args.steps rob = {} rob['random_start'] = args.random_start rob['steps'] = args.steps for eps in tqdm(eps_list, desc='PGD eps', leave=False, position=0): correct_or_not_rs = torch.zeros((len(models), len(models)+1, args.subset_num, random_start), dtype=torch.bool) for rs in tqdm(range(random_start), desc='Random Start', leave=False, position=1): torch.manual_seed(rs) test_iter = tqdm(subset_loader, desc='Batch', leave=False, position=2) total = 0 for (x, y) in test_iter: x, y = x.cuda(), y.cuda() adv_list = [] for i, m in enumerate(models): adversary = LinfPGDAttack( m, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=eps, nb_iter=steps, eps_iter=eps/5, rand_init=True, clip_min=0., clip_max=1., targeted=False) adv = adversary.perturb(x, y) adv_list.append(adv) for i, adv in enumerate(adv_list): for j, m in enumerate(models): if j == i: outputs = m(x) _, pred = outputs.max(1) assert pred.eq(y).all() outputs = m(adv) _, pred = outputs.max(1) correct_or_not_rs[i, j, total:total+x.size(0), rs] = pred.eq(y) outputs = ensemble(adv) _, pred = outputs.max(1) correct_or_not_rs[i, len(models), total:total+x.size(0), rs] = pred.eq(y) total += x.size(0) correct_or_not_rs = torch.all(correct_or_not_rs, dim=-1) asr = np.zeros((len(models), len(models)+1)) tqdm.write("eps: {:.2f}".format(eps)) for i in range(len(models)): message = '' for j in range(len(models)+1): message += '\t{}: {:.2%}'.format(j, 1-correct_or_not_rs[i, j, :].sum().item()/args.subset_num) asr[i, j] = 1-correct_or_not_rs[i, j, :].sum().item()/args.subset_num tqdm.write(message) rob[str(eps)] = asr # save to file if args.save_to_file: output_root = os.path.join('results', 'transferability', train_alg, train_seed) if not os.path.exists(output_root): os.makedirs(output_root) output_filename = args.model_file.split('/')[-2] output = os.path.join(output_root, '.'.join((output_filename, 'pkl'))) with open(output, 'wb') as f: pickle.dump(rob, f, pickle.HIGHEST_PROTOCOL)
def get_metric_eval(self): utr_score=[] tr_score=[] for i in range(1): ##TODO: Customise input parameters to methods like LinfPGDAttack adversary = LinfPGDAttack( self.phi, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=self.args.adv_eps, nb_iter=70, eps_iter=0.01, rand_init=True, clip_min=(0.0-0.1307)/0.3081, clip_max=(1.0-0.1307)/0.3081, targeted=False) pred_cln=[] pred_untargeted_adv=[] pred_targeted_adv=[] temp_counter=0 for batch_idx, (x_e, y_e ,d_e, idx_e) in enumerate(self.test_dataset): x_e= x_e.to(self.cuda) print(torch.min(x_e), torch.max(x_e)) y_e= torch.argmax(y_e, dim=1).to(self.cuda) adversary.targeted = False adv_untargeted = adversary.perturb(x_e, y_e) target = torch.ones_like(y_e)*3 adversary.targeted = True adv_targeted = adversary.perturb(x_e, target) print(torch.min(adv_untargeted), torch.max(adv_untargeted)) pred_cln.append( predict_from_logits(self.phi(x_e)) ) pred_untargeted_adv.append( predict_from_logits(self.phi(adv_untargeted)) ) pred_targeted_adv.append( predict_from_logits(self.phi(adv_targeted)) ) temp_counter+=1 if temp_counter ==5: break pred_cln= torch.cat(pred_cln) pred_untargeted_adv= torch.cat(pred_untargeted_adv) pred_targeted_adv= torch.cat(pred_targeted_adv) utr_score.append( torch.sum( pred_cln != pred_untargeted_adv).detach().cpu().numpy() / pred_cln.shape[0] ) tr_score.append( torch.sum(pred_cln!= pred_targeted_adv).detach().cpu().numpy() / pred_cln.shape[0] ) # batch_size=5 # plt.figure(figsize=(10, 8)) # for ii in range(batch_size): # plt.subplot(3, batch_size, ii + 1) # _imshow(x_e[ii]) # plt.title("clean \n pred: {}".format(pred_cln[ii])) # plt.subplot(3, batch_size, ii + 1 + batch_size) # _imshow(adv_untargeted[ii]) # plt.title("untargeted \n adv \n pred: {}".format( # pred_untargeted_adv[ii])) # plt.subplot(3, batch_size, ii + 1 + batch_size * 2) # _imshow(adv_targeted[ii]) # plt.title("targeted to 3 \n adv \n pred: {}".format( # pred_targeted_adv[ii])) # plt.tight_layout() # plt.savefig( self.save_path + '.png' ) utr_score= np.array(utr_score) tr_score= np.array(tr_score) print('MisClassifcation on Untargetted Attack ', np.mean(utr_score), np.std(utr_score), self.args.adv_eps ) print('MisClassifcation on Targeted Atttack', np.mean(tr_score), np.std(tr_score), self.args.adv_eps ) self.metric_score['Untargetted Method']= np.mean( utr_score ) self.metric_score['Targetted Method']= np.mean( tr_score ) return
def train_adv(args, model, device, train_loader, optimizer, scheduler, epoch, cycles, mse_parameter=1.0, clean_parameter=1.0, clean='supclean'): model.train() correct = 0 train_loss = 0.0 model.reset() adversary = LinfPGDAttack(model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=args.eps, nb_iter=args.nb_iter, eps_iter=args.eps_iter, rand_init=True, clip_min=-1.0, clip_max=1.0, targeted=False) print(len(train_loader)) for batch_idx, (images, targets) in enumerate(train_loader): optimizer.zero_grad() images = images.cuda() targets = targets.cuda() model.reset() with ctx_noparamgrad_and_eval(model): adv_images = adversary.perturb(images, targets) images_all = torch.cat((images, adv_images), 0) # Reset the model latent variables model.reset() if (args.dataset == 'cifar10'): logits, orig_feature_all, block1_all, block2_all, block3_all = model( images_all, first=True, inter=True) elif (args.dataset == 'fashion'): logits, orig_feature_all, block1_all, block2_all = model( images_all, first=True, inter=True) ff_prev = orig_feature_all # f1 the original feature of clean images orig_feature, _ = torch.split(orig_feature_all, images.size(0)) block1_clean, _ = torch.split(block1_all, images.size(0)) block2_clean, _ = torch.split(block2_all, images.size(0)) if (args.dataset == 'cifar10'): block3_clean, _ = torch.split(block3_all, images.size(0)) logits_clean, logits_adv = torch.split(logits, images.size(0)) if not ('no' in clean): loss = (clean_parameter * F.cross_entropy(logits_clean, targets) + F.cross_entropy(logits_adv, targets)) / (2 * (cycles + 1)) else: loss = F.cross_entropy(logits_adv, targets) / (cycles + 1) for i_cycle in range(cycles): if (args.dataset == 'cifar10'): recon, block1_recon, block2_recon, block3_recon = model( logits, step='backward', inter_recon=True) elif (args.dataset == 'fashion'): recon, block1_recon, block2_recon = model(logits, step='backward', inter_recon=True) recon_clean, recon_adv = torch.split(recon, images.size(0)) recon_block1_clean, recon_block1_adv = torch.split( block1_recon, images.size(0)) recon_block2_clean, recon_block2_adv = torch.split( block2_recon, images.size(0)) if (args.dataset == 'cifar10'): recon_block3_clean, recon_block3_adv = torch.split( block3_recon, images.size(0)) loss += (F.mse_loss(recon_adv, orig_feature) + F.mse_loss(recon_block1_adv, block1_clean) + F.mse_loss(recon_block2_adv, block2_clean) + F.mse_loss(recon_block3_adv, block3_clean) ) * mse_parameter / (4 * cycles) elif (args.dataset == 'fashion'): loss += (F.mse_loss(recon_adv, orig_feature) + F.mse_loss(recon_block1_adv, block1_clean) + F.mse_loss(recon_block2_adv, block2_clean) ) * mse_parameter / (3 * cycles) # feedforward ff_current = ff_prev + args.res_parameter * (recon - ff_prev) logits = model(ff_current, first=False) ff_prev = ff_current logits_clean, logits_adv = torch.split(logits, images.size(0)) if not ('no' in clean): loss += ( clean_parameter * F.cross_entropy(logits_clean, targets) + F.cross_entropy(logits_adv, targets)) / (2 * (cycles + 1)) else: loss += F.cross_entropy(logits_adv, targets) / (cycles + 1) pred = logits_clean.argmax( dim=1, keepdim=True) # get the index of the max log-probability correct += pred.eq(targets.view_as(pred)).sum().item() loss.backward() if (args.grad_clip): nn.utils.clip_grad_norm_(model.parameters(), 0.5) optimizer.step() scheduler.step() train_loss += loss if batch_idx % args.log_interval == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(images[0]), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.item())) train_loss /= len(train_loader) acc = correct / len(train_loader.dataset) return train_loss, acc
rand_init=True, clip_min=0.0, clip_max=1.0, targeted=False) correct_1 = 0.0 correct_5 = 0.0 attack_correct_1 = 0.0 attack_correct_5 = 0.0 total = 0 for n_iter, (image, label) in enumerate(cifar100_test_loader): print("iteration: {}\ttotal {} iterations".format( n_iter + 1, len(cifar100_test_loader))) image = Variable(image).cuda() label = Variable(label).cuda() output = model(image) adv_untargeted = adversary.perturb(image, label) attack_output = model(adv_untargeted) for i in range(16): save_image_tensor2pillow(image[i], str(i) + '.jpg') save_image_tensor2pillow(adv_untargeted[i], 'attack_' + str(i) + '.jpg') _, pred = output.topk(5, 1, largest=True, sorted=True) _attack, pred_attack = attack_output.topk(5, 1, largest=True, sorted=True) # pred为output的class index label = label.view(label.size(0), -1).expand_as(pred) correct = pred.eq(label).float() attack_correct = pred_attack.eq(label).float() #compute top 5 correct_5 += correct[:, :5].sum() #compute top1 correct_1 += correct[:, :1].sum()
def validate(val_loader, model, criterion, args): print("validating") batch_time = AverageMeter('Time', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(val_loader), [batch_time, losses, top1, top5], prefix='Test: ') # switch to evaluate mode model.eval() normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) with torch.no_grad(): # adversary = LinfPGDAttack( # model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=0.15, # nb_iter=40, eps_iter=0.01, rand_init=True, clip_min=0.0, clip_max=1.0, # targeted=False) if args.attack == 'l2_3': adversary = L2PGDAttack( model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=14.2737, nb_iter=20, eps_iter=1.784, rand_init=True, clip_min=-2.1179, clip_max=2.6400, targeted=False) if args.attack == 'l2_0.15': adversary = L2PGDAttack( model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=0.7137, nb_iter=20, eps_iter=0.09, rand_init=True, clip_min=-2.1179, clip_max=2.6400, targeted=False) if args.attack == 'linf1_1020': adversary = LinfPGDAttack( model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=4.7579 / 1020, nb_iter=20, eps_iter=0.000233, rand_init=True, clip_min=-2.1179, clip_max=2.6400, targeted=False) if args.attack == 'linf4_255': adversary = LinfPGDAttack( model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=19.0316 / 255, nb_iter=20, eps_iter=47.579 / 5100, rand_init=True, clip_min=-2.1179, clip_max=2.6400, targeted=False) # adversary = L1PGDAttack( # model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=190.316, # nb_iter=20, eps_iter=23.7895, rand_init=True, clip_min=-2.1179, clip_max=2.6400, # targeted=False) end = time.time() print("enumerate dataloader") for i, (images, target) in enumerate(val_loader): # print(images) if args.gpu is not None: images = images.cuda(args.gpu, non_blocking=True) if torch.cuda.is_available(): target = target.cuda(args.gpu, non_blocking=True) with torch.enable_grad(): adv_untargeted = adversary.perturb(images, target) # compute output # if args.arch=='simclr': # output = model(adv_untargeted) # elif args.arch=='linf_4' or args.arch=='linf_8' or args.arch=='l2_3': # output= model((adv_untargeted)) # else: output = model((adv_untargeted)) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), images.size(0)) top1.update(acc1[0], images.size(0)) top5.update(acc5[0], images.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i) # TODO: this should also be done with the ProgressMeter print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format(top1=top1, top5=top5)) accuracy_array = [] accuracy_array.append(top1.avg.to('cpu')) accuracy_array.append(top5.avg.to('cpu')) np.save( f'/content/gdrive/MyDrive/model_adv_loss/{args.attack}/{args.arch}_accuracy.npy', accuracy_array) return top1.avg, top5.avg
def _generate_adv_file(attack_method, num_classes, epsilon, set_size): # load model model = torch.load(os.path.join("checkpoint", "resnet50_epoch_22.pth")) model = model.cuda() #define attack if attack_method == "PGD": adversary = LinfPGDAttack(model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=epsilon, nb_iter=20, eps_iter=0.01, rand_init=True, clip_min=0.0, clip_max=1.0, targeted=False) elif attack_method == "FGSM": adversary = GradientSignAttack( model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), clip_min=0.0, clip_max=1.0, eps=epsilon, targeted=False) elif attack_method == "Momentum": adversary = MomentumIterativeAttack( model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=epsilon, nb_iter=20, decay_factor=1.0, eps_iter=1.0, clip_min=0.0, clip_max=1.0, targeted=False, ord=np.inf) elif attack_method == "STA": adversary = SpatialTransformAttack( model, num_classes=num_classes, loss_fn=nn.CrossEntropyLoss(reduction="sum"), initial_const=0.05, max_iterations=500, search_steps=1, confidence=0, clip_min=0.0, clip_max=1.0, targeted=False, abort_early=True) elif attack_method == "DeepFool": adversary = DeepFool(model, max_iter=20, clip_max=1.0, clip_min=0.0, epsilon=epsilon) elif attack_method == "CW": adversary = CarliniWagnerL2Attack( model, num_classes=args.num_classes, epsilon=epsilon, loss_fn=nn.CrossEntropyLoss(reduction="sum"), max_iterations=20, confidence=0, clip_min=0.0, clip_max=1.0, targeted=False, abort_early=True) # version two h5_store = h5py.File("data/test_tiny_ImageNet_" + str(set_size) + ".h5", "r") data = h5_store['data'][:] target = h5_store['true_target'][:] data = torch.from_numpy(data) target = torch.from_numpy(target) test_dataset = ImageNetDataset(data, target) test_loader = DataLoader(dataset=test_dataset, num_workers=4, drop_last=True, batch_size=50, shuffle=False) torch.manual_seed(0) test_adv = np.zeros([set_size, 3, 64, 64]) test_true_target = np.zeros([set_size]) # perturb for batch_idx, (clndata, target) in enumerate(test_loader): print("{}/{}".format(batch_idx, set_size // 50)) clndata, target = clndata.cuda().float(), target.cuda().long() with ctx_noparamgrad_and_eval(model): # print(target) advdata = adversary.perturb(clndata, target) test_adv[batch_idx * 50:(batch_idx + 1) * 50, :, :, :] = advdata.detach().cpu().numpy() test_true_target[batch_idx * 50:(batch_idx + 1) * 50] = target.cpu().numpy() print("test_adv.shape:{}".format(test_adv.shape)) print("test_true_target.shape:{}".format(test_true_target.shape)) del model h5_store = h5py.File( "data/test_tiny_ImageNet_" + str(set_size) + "_adv_" + str(attack_method) + "_" + str(epsilon) + ".h5", 'w') h5_store.create_dataset('data', data=test_adv) h5_store.create_dataset('true_target', data=test_true_target) h5_store.close()
adversary = LinfPGDAttack( net, loss_fn=nn.CrossEntropyLoss().cuda(), eps=16/255, nb_iter=7, eps_iter=4/255, rand_init=True, clip_min=-1.0, clip_max=1.0, targeted=False) if args.alp: criterion_alp = nn.MSELoss().cuda() for epoch in range(1, args.nepoch+1): net.train() for batch_idx, (inputs, labels) in enumerate(trloader): inputs_cls, labels_cls = inputs.cuda(), labels.cuda() optimizer.zero_grad() with ctx_noparamgrad_and_eval(net): inputs_adv = adversary.perturb(inputs_cls, labels_cls) if args.weight == 0: outputs_adv = net(inputs_adv) loss = criterion(outputs_adv, labels_cls) else: inputs_all = torch.cat([inputs_cls, inputs_adv], dim=0) labels_all = torch.cat([labels_cls, labels_cls], dim=0) outputs_all = net(inputs_all) outputs_cls, outputs_adv = torch.split(outputs_all, inputs_cls.size(0), dim=0) loss = criterion(outputs_cls, labels_cls) if args.alp: loss += args.weight * criterion_alp(outputs_cls, outputs_adv) else: loss += args.weight * criterion(outputs_adv, labels_cls)
labelt = labelt.cuda() mlabelt = mlabelt.cuda() blabelt = blabelt.cuda() """ inputp, labelp, mlabelp, blabelp = get_test2(args.batch_size) inputp = torch.FloatTensor(inputp) labelp = torch.FloatTensor(labelp) mlabelp = torch.FloatTensor(mlabelp) blabelp = torch.FloatTensor(blabelp) labelp = labelp.cuda() mlabelp = mlabelp.cuda() blabelp = blabelp.cuda() inputp = inputp.cuda() """ adv_inputs = adversary.perturb(inputt, labelt) # inputs #adv_inputs = inputt noise = torch.normal(0, 1, inputt.size()).cuda() adv_inputs = torch.clamp(adv_inputs + noise, 0, 255) # print(torch.mean(torch.abs()) outt, outt2, outt3 = net(adv_inputs) out1, out2, out3 = net(inputt) noise_avg += torch.mean(torch.abs(adv_inputs - inputt)) if iter % args.print_freq == 0: top1 = accuracy(outt.data, blabelt, 1) top2 = accuracy(outt2.data, mlabelt, 2) #top3 = accuracy(outt3.data, labelt, 3) top3 = accuracy(outt3.data[:, :19], labelt[:, :19], 2) top4 = accuracy(outt3.data[:, :19], outt2[:, :19], 2)
def train_epoch(self, model: nn.Module, train_loader: DataLoader, val_clean_loader: DataLoader, val_triggered_loader: DataLoader, epoch_num: int, use_amp: bool = False): """ Runs one epoch of training on the specified model :param model: the model to train for one epoch :param train_loader: a DataLoader object pointing to the training dataset :param val_clean_loader: a DataLoader object pointing to the validation dataset that is clean :param val_triggered_loader: a DataLoader object pointing to the validation dataset that is triggered :param epoch_num: the epoch number that is being trained :param use_amp: if True, uses automated mixed precision for FP16 training. :return: a list of statistics for batches where statistics were computed """ # Probability of Adversarial attack to occur in each iteration attack_prob = self.optimizer_cfg.training_cfg.adv_training_ratio pid = os.getpid() train_dataset_len = len(train_loader.dataset) loop = tqdm(train_loader, disable=self.optimizer_cfg.reporting_cfg.disable_progress_bar) scaler = None if use_amp: scaler = torch.cuda.amp.GradScaler() train_n_correct, train_n_total = None, None # Define parameters of the adversarial attack attack_eps = float(self.optimizer_cfg.training_cfg.adv_training_eps) attack_iterations = int(self.optimizer_cfg.training_cfg.adv_training_iterations) eps_iter = (2.0 * attack_eps) / float(attack_iterations) attack = LinfPGDAttack( predict=model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=attack_eps, nb_iter=attack_iterations, eps_iter=eps_iter) sum_batchmean_train_loss = 0 running_train_acc = 0 num_batches = len(train_loader) model.train() for batch_idx, (x, y_truth) in enumerate(loop): x = x.to(self.device) y_truth = y_truth.to(self.device) # put network into training mode & zero out previous gradient computations self.optimizer.zero_grad() # get predictions based on input & weights learned so far if use_amp: with torch.cuda.amp.autocast(): # add adversarial noise via l_inf PGD attack # only apply attack to attack_prob of the batches if attack_prob and np.random.rand() <= attack_prob: with ctx_noparamgrad_and_eval(model): x = attack.perturb(x, y_truth) y_hat = model(x) # compute metrics batch_train_loss = self._eval_loss_function(y_hat, y_truth) else: # add adversarial noise vis lin PGD attack if attack_prob and np.random.rand() <= attack_prob: with ctx_noparamgrad_and_eval(model): x = attack.perturb(x, y_truth) y_hat = model(x) batch_train_loss = self._eval_loss_function(y_hat, y_truth) sum_batchmean_train_loss += batch_train_loss.item() running_train_acc, train_n_total, train_n_correct = default_optimizer._running_eval_acc(y_hat, y_truth, n_total=train_n_total, n_correct=train_n_correct, soft_to_hard_fn=self.soft_to_hard_fn, soft_to_hard_fn_kwargs=self.soft_to_hard_fn_kwargs) # compute gradient if use_amp: # Scales loss. Calls backward() on scaled loss to create scaled gradients. # Backward passes under autocast are not recommended. # Backward ops run in the same dtype autocast chose for corresponding forward ops. scaler.scale(batch_train_loss).backward() else: if np.isnan(sum_batchmean_train_loss) or np.isnan(running_train_acc): default_optimizer._save_nandata(x, y_hat, y_truth, batch_train_loss, sum_batchmean_train_loss, running_train_acc, train_n_total, train_n_correct, model) batch_train_loss.backward() # perform gradient clipping if configured if self.optimizer_cfg.training_cfg.clip_grad: if use_amp: # Unscales the gradients of optimizer's assigned params in-place scaler.unscale_(self.optimizer) if self.optimizer_cfg.training_cfg.clip_type == 'norm': # clip_grad_norm_ modifies gradients in place # see: https://pytorch.org/docs/stable/_modules/torch/nn/utils/clip_grad.html torch_clip_grad.clip_grad_norm_(model.parameters(), self.optimizer_cfg.training_cfg.clip_val, **self.optimizer_cfg.training_cfg.clip_kwargs) elif self.optimizer_cfg.training_cfg.clip_type == 'val': # clip_grad_val_ modifies gradients in place # see: https://pytorch.org/docs/stable/_modules/torch/nn/utils/clip_grad.html torch_clip_grad.clip_grad_value_( model.parameters(), self.optimizer_cfg.training_cfg.clip_val) else: msg = "Unknown clipping type for gradient clipping!" logger.error(msg) raise ValueError(msg) if use_amp: # scaler.step() first unscales the gradients of the optimizer's assigned params. # If these gradients do not contain infs or NaNs, optimizer.step() is then called, # otherwise, optimizer.step() is skipped. scaler.step(self.optimizer) # Updates the scale for next iteration. scaler.update() else: self.optimizer.step() # report batch statistics to tensorflow if self.tb_writer: try: batch_num = int(epoch_num * num_batches + batch_idx) self.tb_writer.add_scalar(self.optimizer_cfg.reporting_cfg.experiment_name + '-train_loss', batch_train_loss.item(), global_step=batch_num) self.tb_writer.add_scalar(self.optimizer_cfg.reporting_cfg.experiment_name + '-running_train_acc', running_train_acc, global_step=batch_num) except: # TODO: catch specific expcetions pass loop.set_description('Epoch {}/{}'.format(epoch_num + 1, self.num_epochs)) loop.set_postfix(avg_train_loss=batch_train_loss.item()) if batch_idx % self.num_batches_per_logmsg == 0: logger.info('{}\tTrain Epoch: {} [{}/{} ({:.0f}%)]\tTrainLoss: {:.6f}\tTrainAcc: {:.6f}'.format( pid, epoch_num, batch_idx * len(x), train_dataset_len, 100. * batch_idx / num_batches, batch_train_loss.item(), running_train_acc)) train_stats = EpochTrainStatistics(running_train_acc, sum_batchmean_train_loss / float(num_batches)) # if we have validation data, we compute on the validation dataset num_val_batches_clean = len(val_clean_loader) if num_val_batches_clean > 0: logger.info('Running Validation on Clean Data') running_val_clean_acc, _, _, val_clean_loss = \ default_optimizer._eval_acc(val_clean_loader, model, self.device, self.soft_to_hard_fn, self.soft_to_hard_fn_kwargs, self._eval_loss_function) else: logger.info("No dataset computed for validation on clean dataset!") running_val_clean_acc = None val_clean_loss = None num_val_batches_triggered = len(val_triggered_loader) if num_val_batches_triggered > 0: logger.info('Running Validation on Triggered Data') running_val_triggered_acc, _, _, val_triggered_loss = \ default_optimizer._eval_acc(val_triggered_loader, model, self.device, self.soft_to_hard_fn, self.soft_to_hard_fn_kwargs, self._eval_loss_function) else: logger.info( "No dataset computed for validation on triggered dataset!") running_val_triggered_acc = None val_triggered_loss = None validation_stats = EpochValidationStatistics(running_val_clean_acc, val_clean_loss, running_val_triggered_acc, val_triggered_loss) if num_val_batches_clean > 0: logger.info('{}\tTrain Epoch: {} \tCleanValLoss: {:.6f}\tCleanValAcc: {:.6f}'.format( pid, epoch_num, val_clean_loss, running_val_clean_acc)) if num_val_batches_triggered > 0: logger.info('{}\tTrain Epoch: {} \tTriggeredValLoss: {:.6f}\tTriggeredValAcc: {:.6f}'.format( pid, epoch_num, val_triggered_loss, running_val_triggered_acc)) if self.tb_writer: try: batch_num = int((epoch_num + 1) * num_batches) if num_val_batches_clean > 0: self.tb_writer.add_scalar(self.optimizer_cfg.reporting_cfg.experiment_name + '-clean-val-loss', val_clean_loss, global_step=batch_num) self.tb_writer.add_scalar(self.optimizer_cfg.reporting_cfg.experiment_name + '-clean-val_acc', running_val_clean_acc, global_step=batch_num) if num_val_batches_triggered > 0: self.tb_writer.add_scalar(self.optimizer_cfg.reporting_cfg.experiment_name + '-triggered-val-loss', val_triggered_loss, global_step=batch_num) self.tb_writer.add_scalar(self.optimizer_cfg.reporting_cfg.experiment_name + '-triggered-val_acc', running_val_triggered_acc, global_step=batch_num) except: pass # update the lr-scheduler if necessary if self.lr_scheduler is not None: if self.optimizer_cfg.training_cfg.lr_scheduler_call_arg is None: self.lr_scheduler.step() elif self.optimizer_cfg.training_cfg.lr_scheduler_call_arg.lower() == 'val_acc': val_acc = validation_stats.get_val_acc() if val_acc is not None: self.lr_scheduler.step(val_acc) else: msg = "val_clean_acc not defined b/c validation dataset is not defined! Ignoring LR step!" logger.warning(msg) elif self.optimizer_cfg.training_cfg.lr_scheduler_call_arg.lower() == 'val_loss': val_loss = validation_stats.get_val_loss() if val_loss is not None: self.lr_scheduler.step(val_loss) else: msg = "val_clean_loss not defined b/c validation dataset is not defined! Ignoring LR step!" logger.warning(msg) else: msg = "Unknown mode for calling lr_scheduler!" logger.error(msg) raise ValueError(msg) return train_stats, validation_stats
def main(): data = [] torch.cuda.empty_cache() parser = argparse.ArgumentParser(description='PyTorch MNIST Example') parser.add_argument('--batch-size', type=int, default=60000, metavar='N', help='input batch size for training (default: 60000)') parser.add_argument('--test-batch-size', type=int, default=10000, metavar='N', help='input batch size for testing (default: 1000)') parser.add_argument('--epochs', type=int, default=1, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--lr', type=float, default=0.01, metavar='LR', help='learning rate (default: 0.01)') parser.add_argument('--momentum', type=float, default=0.5, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument( '--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--save-model', action='store_true', default=False, help='For Saving the current Model') args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") kwargs = {'num_workers': 1} if use_cuda else {} train_loader = torch.utils.data.DataLoader(datasets.MNIST( '../data', train=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])), batch_size=60000, shuffle=False, **kwargs) test_loader = torch.utils.data.DataLoader(datasets.MNIST( '../data', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])), batch_size=10000, shuffle=False, **kwargs) test_loader_small = torch.utils.data.DataLoader(datasets.MNIST( '../data', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])), batch_size=5, shuffle=False, **kwargs) strage = device model = Net() model.to(device) checkpoint = torch.load('mnist_cnn.pt', map_location=lambda storage, loc: storage) model.load_state_dict(checkpoint) layer_sel = 1 Associations = [] for xin in range(4): temp = torch.load('map_association_' + str(xin) + '.pt') Associations.append(temp) hookF = [Hook(layer[1]) for layer in list(model._modules.items())] # Let's test how this association is predictive of the test set adversary = LinfPGDAttack(model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=0.2, nb_iter=40, eps_iter=0.01, rand_init=True, clip_min=0.0, clip_max=1.0, targeted=False) for data, target in test_loader_small: break labels_ = target data = data.to(device) labels_ = labels_.to(device) adv_untargeted = adversary.perturb(data, labels_) #adv_untargeted=data print(adv_untargeted.size()) pred_n = test(args, model, device, test_loader_small, hookF, adv_untargeted) for ttin in range(3): layer_sel = ttin act_map = Associations[layer_sel] roV = intermediate_output[layer_sel] sel = Associations[layer_sel] sel = sel.numpy() wm = torch.load('wm_' + str(layer_sel) + '.pt', map_location=lambda storage, loc: storage) fp = open('labels_' + str(layer_sel) + '.json') # labels for wm i.e., the labels of the test set. label = json.load(fp) fp.close() cog = CogMem_load(wm, label) for data, target in test_loader_small: break labels_ = target cog.foward(roV) pred = cog.pred.long() #pred=cog.pred.long().cpu().numpy() total_1 = 0 total_2 = 0 total_3 = 0 total_4 = 0 total_5 = 0 cons1 = 0 cons2 = 0 temp = 0 corr = np.zeros((10, 10)) mem = [] #print ('sel shape',sel.shape) #print (cog.image.size()) #print ('pred',pred.size()) for xi, xin in enumerate(pred_n): cls = xin.item() label_t = labels_[xi].long().item() v2 = cog.image[:, xi] idx = torch.argsort(v2).cpu().numpy() mem.append(v2.cpu().numpy()) idx = np.flip(idx, 0)[:3] tar = sel[idx, :] temp_v = np.zeros(10) for zin in idx: temp_v = temp_v + sel[zin, :] * v2[zin].item() #print (temp_v) #tar=sel[idx,:] #idx3=cog.labels[idx].long().item() idx2 = np.argmax(temp_v) idx3 = np.argsort(temp_v) idx3 = np.flip(idx3, 0)[:3] sum_v = np.sum(np.exp(temp_v)) #print (xi, idx, cls, idx3, idx2) # cls: prediction, idx2: max from association, idx3, label from truth, idx_truth: ground truth if cls == idx2: total_1 = total_1 + 1 if label_t == cls: total_2 = total_2 + 1 if label_t == idx2: total_3 = total_3 + 1 if label_t != cls: temp = temp + 1 if cls == idx2: total_4 = total_4 + 1 else: temp = temp + 1 if cls == idx2: total_5 = total_5 + 1 if cls in idx3: cons1 = cons1 + 1 if label_t in idx3: cons2 = cons2 + 1 for c1 in idx3: if c1 == cls: for c2 in idx3: if c1 != c2: corr[c1, c2] = corr[c1, c2] + np.exp(temp_v[c2]) / sum_v max_v = np.amax(corr) #corr=corr/500.0 print('pred. of prediction:', total_1, 'global pred. of actual class:', total_2, 'local pred. of actual class:', total_3) #print ('cons1',cons1,'cons2',cons2) #print (idx3) mem = np.array(mem) data = np.loadtxt('mem_' + str(layer_sel) + '.txt') temp = np.argsort(mem[0]) temp = np.flip(temp, 0) print('adv', temp[:3]) #print (data.shape) temp = np.argsort(data[0, :]) temp = np.flip(temp, 0) print('clean', temp[:3]) diff = data[0, :] - mem[0] #print ('diff',diff) #print (np.amax(diff),np.mean(diff),np.std(diff),np.amin(diff)) #pylab.figure(ttin+1) #pylab.plot(data[0,:], label='clean') #pylab.plot(mem[0], label='adv') #pylab.legend() torch.cuda.empty_cache() del cog, roV pylab.show()
def experiment(num_shared_classes, percent_shared_data, n_epochs=200,batch_size=128, eps=.3, adv_steps=100, learning_rate=.0004, gpu_num=1,adv_training=False,task="CIFAR100"): print("epochs,batch_size,eps,adv_steps,learning_rate,task") print(n_epochs,batch_size,eps,adv_steps,learning_rate,task) cuda = torch.cuda.is_available() transform_test = transforms.Compose( [transforms.ToTensor(),transforms.Normalize((0.5070751592371323, 0.48654887331495095, 0.4409178433670343), (0.2673342858792401, 0.2564384629170883, 0.27615047132568404))]) transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.5070751592371323, 0.48654887331495095, 0.4409178433670343), (0.2673342858792401, 0.2564384629170883, 0.27615047132568404)), ]) if task.upper() == "CIFAR100": transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) ]) transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) ]) train_data = CIFAR100("data/",transform=transform_train, download=False) test_data = CIFAR100("data/", train=False, transform=transform_test, download=False) elif task.upper() == "IMAGENET": train_data = ImageNet('data/imagenet', split='train', download=False) test_data = ImageNet('data/imagenet', split='val', download=False) elif task.upper() == "FASHIONMNIST": transform = transforms.Compose([transforms.Lambda(lambda image: image.convert('RGB')), transforms.ToTensor() ]) train_data = FashionMNIST('data/fashionmnist',transform=transform, train=True, download=False) test_data = FashionMNIST('data/fashionmnist', transform=transform, train=False, download=False) else: train_data = CIFAR10("data/",transform=transform_train,download=False) test_data = CIFAR10("data/", train=False, transform=transform_test,download=False) # model1 = ResNet(ResidualBlock, [2, 2, 2],num_classes=10) # model2 = ResNet(ResidualBlock, [2, 2, 2],num_classes=10) all_classes = set([x[1] for x in train_data]) shared_classes = random.sample(all_classes, num_shared_classes) split_classes = [c for c in all_classes if c not in shared_classes] # get classes not shared if len(split_classes) % 2 == 1: # if we have an odd #, randomly remove one so that number of classes will be the same for each model split_classes.pop(random.randint(0, len(split_classes) - 1)) model1_split = random.sample(split_classes, len(split_classes) // 2) model2_split = [c for c in split_classes if c not in model1_split] model1_classes = model1_split model2_classes = model2_split model1_classes.sort() model2_classes.sort() # DEBUG: print("shared classes: {}".format(shared_classes)) print("model1 classes: {}".format(model1_classes)) print("model2 classes: {}".format(model2_classes)) model1_x_train = [] model1_y_train = [] model2_x_train = [] model2_y_train = [] shared_x_train = [] shared_y_train = [] # train data splits for index in range(len(train_data)): current_class = train_data[index][1] # model 1 if current_class in model1_classes: model1_x_train.append(train_data[index][0]) model1_y_train.append(train_data[index][1]) # model 2 if current_class in model2_classes: model2_x_train.append(train_data[index][0]) model2_y_train.append(train_data[index][1]) # split by percentage for classes per model1 if percent_shared_data < 100: new_model1_x_train = [] new_model1_y_train = [] for curr_class in model1_classes: temp_data_x = [] temp_data_y = [] # get all examples of class for i in range(len(model1_x_train)): if(model1_y_train[i] == curr_class): temp_data_x.append(model1_x_train[i]) temp_data_y.append(model1_y_train[i]) # split data by half the size total_size = len(temp_data_x) shared_size = int(total_size * .5) shared_indices = random.sample(list(range(len(temp_data_x))),shared_size) new_model1_x_train += [temp_data_x[i] for i in shared_indices] new_model1_y_train += [temp_data_y[i] for i in shared_indices] # split for model2 new_model2_x_train = [] new_model2_y_train = [] for curr_class in model2_classes: temp_data_x = [] temp_data_y = [] # get all examples of class for i in range(len(model2_x_train)): if(model2_y_train[i] == curr_class): temp_data_x.append(model2_x_train[i]) temp_data_y.append(model2_y_train[i]) # split data by half the size total_size = len(temp_data_x) shared_size = int(total_size * .5) shared_indices = random.sample(list(range(len(temp_data_x))),shared_size) new_model2_x_train += [temp_data_x[i] for i in shared_indices] new_model2_y_train += [temp_data_y[i] for i in shared_indices] # rewrite dataset model1_x_train = new_model1_x_train model1_y_train = new_model1_y_train model2_x_train = new_model2_x_train model2_y_train = new_model2_y_train # Carry out datasplitting for shared classes and add to datasets for shared_class in shared_classes: all_examples_x_train = [] all_examples_y_train = [] # get all examples of class for index in range(len(train_data)): current_class = train_data[index][1] if current_class == shared_class: all_examples_x_train.append(train_data[index][0]) all_examples_y_train.append(train_data[index][1]) # find max number of samples per model (set to be amount of examples if data is completely disjoint) max_examples = len(all_examples_x_train) // 2 # get shared examples shared_examples_x_train = [] shared_examples_y_train = [] num_shared_examples = max_examples * percent_shared_data // 100 for _ in range(num_shared_examples): random_int = random.randint(0, len(all_examples_x_train) - 1) shared_examples_x_train.append(all_examples_x_train.pop(random_int)) shared_examples_y_train.append(all_examples_y_train.pop(random_int)) # get disjoint examples disjoint_examples = max_examples - len(shared_examples_x_train) model1_examples_x_train = [] model1_examples_y_train = [] model2_examples_x_train = [] model2_examples_y_train = [] for _ in range(disjoint_examples): model1_rand_int = random.randint(0, len(all_examples_x_train) - 1) model1_examples_x_train.append(all_examples_x_train.pop(model1_rand_int)) model1_examples_y_train.append(all_examples_y_train.pop(model1_rand_int)) model2_rand_int = random.randint(0, len(all_examples_x_train) - 1) model2_examples_x_train.append(all_examples_x_train.pop(model2_rand_int)) model2_examples_y_train.append(all_examples_y_train.pop(model2_rand_int)) # add to the datasets for the model model1_x_train = shared_examples_x_train + model1_x_train + model1_examples_x_train model1_y_train = shared_examples_y_train + model1_y_train + model1_examples_y_train model2_x_train = shared_examples_x_train + model2_x_train + model2_examples_x_train model2_y_train = shared_examples_y_train + model2_y_train + model2_examples_y_train #print(model1_y_train) # assign mapping for new classes model1_class_mapping = {} model2_class_mapping = {} model1_classes_inc = 0 # go through model1 and assign unique classes to incrimental int starting at 0 for index in range(len(model1_y_train)): # if it doesn't exist assign if model1_y_train[index] not in model1_class_mapping.keys(): model1_class_mapping[model1_y_train[index]] = model1_classes_inc model1_classes_inc += 1 # append assigned token model1_y_train[index] = model1_class_mapping[model1_y_train[index]] model2_classes_inc = 0 # go through model2 and assign unique classes to incrimental int starting at 0 for index in range(len(model2_y_train)): # if it doesn't exist in model2 OR in model1, assign it if model2_y_train[index] not in model2_class_mapping.keys() and model2_y_train[index] not in model1_class_mapping.keys(): model2_class_mapping[model2_y_train[index]] = model2_classes_inc model2_y_train[index] = model2_classes_inc model2_classes_inc += 1 elif model2_y_train[index] in model1_class_mapping.keys(): model2_y_train[index] = model1_class_mapping[model2_y_train[index]] else: model2_y_train[index] = model2_class_mapping[model2_y_train[index]] model1_x_test = [] model1_y_test = [] model2_x_test = [] model2_y_test = [] shared_x_test = [] shared_y_test = [] # test data splits for index in range(len(test_data)): current_class = test_data[index][1] # model 1 if current_class in model1_classes: model1_x_test.append(test_data[index][0]) model1_y_test.append(test_data[index][1]) # model 2 if current_class in model2_classes: model2_x_test.append(test_data[index][0]) model2_y_test.append(test_data[index][1]) # shared classes for eval if current_class in shared_classes: shared_x_test.append(test_data[index][0]) shared_y_test.append(test_data[index][1]) model1_x_test += shared_x_test model1_y_test += shared_y_test model2_x_test += shared_x_test model2_y_test += shared_y_test for index in range(len(model1_y_test)): model1_y_test[index] = model1_class_mapping[model1_y_test[index]] for index in range(len(model2_y_test)): if model2_y_test[index] in model1_class_mapping.keys(): model2_y_test[index] = model1_class_mapping[model2_y_test[index]] else: model2_y_test[index] = model2_class_mapping[model2_y_test[index]] model1_classes_len= len(set([item for item in model1_y_train])) model2_classes_len = len(set([item for item in model2_y_train])) if task.upper() == "CIFAR100": model1 = models.wide_resnet50_2() model2 = models.wide_resnet50_2() # model1.fc = nn.Linear(2048, model1_classes_len) model2.fc = nn.Linear(2048, model2_classes_len) elif task.upper() == "IMAGENET": model1 = models.wide_resnet50_2() model2 = models.wide_resnet50_2() model1.fc = nn.Linear(2048, model1_classes_len) model2.fc = nn.Linear(2048, model2_classes_len) elif task.upper() == "FASHIONMNIST": model1 = models.resnet18() model2 = models.resnet18() model1.fc = nn.Linear(512, model1_classes_len) model2.fc = nn.Linear(512, model2_classes_len) else: # Get model (using ResNet50 for now) model1 = models.resnet50() model2 = models.resnet50() model1.fc = nn.Linear(2048, model1_classes_len) model2.fc = nn.Linear(2048, model2_classes_len) cuda = torch.cuda.is_available() if gpu_num in range(torch.cuda.device_count()): device = torch.device('cuda:'+str(gpu_num) if cuda else 'cpu') torch.cuda.set_device(device) else: device = torch.device('cpu') # Model Training model1 = model1.to(device) model2 = model2.to(device) criterion1 = nn.CrossEntropyLoss() optimizer1 = optim.AdamW(model1.parameters(), lr=learning_rate) scheduler1 = optim.lr_scheduler.MultiStepLR(optimizer1,milestones=[60, 120, 160], gamma=.2) #learning rate decay criterion2 = nn.CrossEntropyLoss() optimizer2 = optim.AdamW(model2.parameters(), lr=learning_rate) scheduler2 = optim.lr_scheduler.MultiStepLR(optimizer2,milestones=[60, 120, 160], gamma=.2) #learning rate decay # zip together two lists train_set1 = list(zip(model1_x_train, model1_y_train)) # create trainloader 1 trainloader_1 = torch.utils.data.DataLoader(train_set1, batch_size=batch_size, shuffle=True, num_workers=2) # create trainloader 2 # zip together two lists train_set2 = list(zip(model2_x_train, model2_y_train)) # create trainloader 1 trainloader_2 = torch.utils.data.DataLoader(train_set2, batch_size=batch_size, shuffle=True, num_workers=2) # TODO change this num_adv_batchs = 2 if adv_training else 0 adv_batches = random.sample(range(len(trainloader_1)), num_adv_batchs) #print("adv_batches:", adv_batches) # train model 1 for epoch in tqdm(range(n_epochs),desc="Epoch"): # loop over the dataset multiple times running_loss = 0.0 for i, data in enumerate(trainloader_1, 0): if cuda: data = tuple(d.cuda() for d in data) # get the inputs; data is a list of [inputs, labels] inputs, labels = data # zero the parameter gradients optimizer1.zero_grad() # forward + backward + optimize # train adversarial # if i in adv_batches: # print("adv training!") # adversary = LinfPGDAttack( # model1, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=eps, # nb_iter=adv_steps, eps_iter=0.01, rand_init=True, clip_min=0.0, clip_max=1.0, # targeted=False) # inputs = adversary.perturb(inputs, labels) outputs = model1(inputs) loss = criterion1(outputs, labels) loss.backward() optimizer1.step() # print statistics running_loss += loss.item() if i % 2000 == 1999: # print every 2000 mini-batches print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000)) running_loss = 0.0 print('Finished Training model1') # train model 2 for epoch in tqdm(range(n_epochs),desc="Epoch"): # loop over the dataset multiple times running_loss = 0.0 for i, data in enumerate(trainloader_2, 0): if cuda: data = tuple(d.cuda() for d in data) # get the inputs; data is a list of [inputs, labels] inputs, labels = data # zero the parameter gradients optimizer2.zero_grad() # forward + backward + optimize outputs = model2(inputs) loss = criterion2(outputs, labels) loss.backward() optimizer2.step() # print statistics running_loss += loss.item() if i % 2000 == 1999: # print every 2000 mini-batches print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000)) running_loss = 0.0 print('Finished Training model2') model1 = model1.to("cpu") model2 = model2.to("cpu") # convert shared classes to new labels for index in range(len(shared_y_test)): if shared_y_test[index] in model1_class_mapping.keys(): shared_y_test[index] = model1_class_mapping[shared_y_test[index]] else: shared_y_test[index] = model2_class_mapping[shared_y_test[index]] shared_y_test = torch.Tensor(shared_y_test).long() # if cuda: # shared_x_test = tuple(d.cuda() for d in shared_x_test) # shared_y_test = torch.Tensor(shared_y_test).long().cuda() model1_x_test = torch.stack(model1_x_test) model2_x_test = torch.stack(model2_x_test) model1.eval() shared_x_test = torch.stack(shared_x_test) model1.eval() adversary = LinfPGDAttack( model1, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=eps, nb_iter=adv_steps, eps_iter=0.01, rand_init=True, clip_min=0.0, clip_max=1.0, targeted=False) adv_untargeted = adversary.perturb(shared_x_test, shared_y_test) timestr = time.strftime("%Y%m%d_%H%M%S") print("saving models at", timestr) model1_name = './models/{}_{}_{}_model1_{}.pickle'.format(task,num_shared_classes, percent_shared_data,timestr) model2_name = './models/{}_{}_{}_model2_{}.pickle'.format(task,num_shared_classes, percent_shared_data,timestr) adv_name = './models/{}_{}_{}_adv_{}.pickle'.format(task,num_shared_classes, percent_shared_data,timestr) torch.save(model1, model1_name) torch.save(model2, model2_name) torch.save(adversary, adv_name) # Eval with torch.no_grad(): model1.eval() model2.eval() # model1 outputs output1 = model1(model1_x_test) shared_output1 = model1(shared_x_test) adv_output1 = model1(adv_untargeted) # model2 outputs output2 = model2(model2_x_test) shared_output2 = model2(shared_x_test) adv_output2 = model2(adv_untargeted) if task.upper() == "CIFAR100": # model 1 print("model1_acc:", accuracy(output1,model1_y_test)) print("model1_acc_5:", accuracy_n(output1,model1_y_test,5)) print("model1_acc_shared:", accuracy(shared_output1,shared_y_test)) print("model1_acc_5_shared:", accuracy_n(shared_output1,shared_y_test,5)) print("model1_adv_acc_shared:", accuracy(adv_output1,shared_y_test)) print("model1_adv_acc_5_shared:", accuracy_n(adv_output1,shared_y_test,5)) print() # model 2 print("model2_acc:", accuracy(output2,model2_y_test)) print("model2_acc_5:", accuracy_n(output2,model2_y_test,5)) print("model2_acc_shared:", accuracy(shared_output2,shared_y_test)) print("model2_acc_5_shared:", accuracy_n(shared_output2,shared_y_test,5)) print("model2_adv_acc_shared:", accuracy(adv_output2,shared_y_test)) print("model2_adv_acc_5_shared:", accuracy_n(adv_output2,shared_y_test,5)) else: # model 1 print("model1_acc:", accuracy(output1,model1_y_test)) print("model1_acc_shared:", accuracy(shared_output1,shared_y_test)) print("model1_adv_acc_shared:", accuracy(adv_output1,shared_y_test)) print() # model 2 print("model2_acc:", accuracy(output2,model2_y_test)) print("model2_acc_shared:", accuracy(shared_output2,shared_y_test)) print("model2_adv_acc_shared:", accuracy(adv_output2,shared_y_test))
def main(): args = parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu gpus = [idx for idx, gpu in enumerate(args.gpu.split(","))] work_dir = '{}/train_pytorch_model/adversarial_train/feature_denoise/'.format( PY_ROOT) # pretrained_model_path = '{}/train_pytorch_model/adversarial_train/feature_denoise/{}@{}_{}_{}_{}.pth.tar'.format( # PY_ROOT, args.dataset, args.arch, denoise_str, args.filter_type, args.ksize) assert os.path.exists(work_dir), "{} does not exist!".format(work_dir) os.makedirs(work_dir, exist_ok=True) set_log_file(work_dir + "/adv_train_{}.log".format(args.dataset)) log.info('Command line is: {}'.format(' '.join(sys.argv))) log.info('Called with args:') print_args(args) model_path = '{}/train_pytorch_model/adversarial_train/feature_denoise/pgd_adv_train_{}@{}_{}_{}.pth.tar'.format( PY_ROOT, args.dataset, args.arch, args.filter_type, args.ksize) best_model_path = '{}/train_pytorch_model/adversarial_train/feature_denoise/best_pgd_adv_train_{}@{}_{}_{}.pth.tar'.format( PY_ROOT, args.dataset, args.arch, args.filter_type, args.ksize) model = FeatureDefenseModel(args.dataset, args.arch, no_grad=False) model = model.cuda() resume_epoch = 0 if os.path.exists(model_path): state_dict = torch.load(model_path, map_location=lambda storage, location: storage) model.load_state_dict(state_dict["state_dict"]) resume_epoch = state_dict["epoch"] log.info("Load model from {} at epoch {}".format( model_path, resume_epoch)) # model = model.to(args.gpu) if torch.cuda.is_available(): model.cuda(gpus[0]) log.info( "After trained over, model will be saved to {}".format(model_path)) train_loader = get_img_label_data_loader(args.dataset, args.batch_size, True) test_loader = get_img_label_data_loader(args.dataset, args.batch_size, False) if torch.cuda.device_count() > 1: criterion = torch.nn.DataParallel(nn.CrossEntropyLoss(), gpus).cuda() else: criterion = nn.CrossEntropyLoss().cuda() optimizer = optim.SGD(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay, momentum=args.momentum, nesterov=True) scheduler = MultiStepLR(optimizer, milestones=[ int(args.epochs / 2), int(args.epochs * 3 / 4), int(args.epochs * 7 / 8) ], gamma=0.1) total, correct, train_loss = 0, 0, 0 # Record the best accuracy best_test_clean_acc, best_test_adv_acc, best_epoch = 0, 0, 0 log.info( "basic model: {}, whether denoising: {}, filter type: {}, kernel size: {}" .format(args.arch, args.whether_denoising, args.filter_type, args.ksize)) for epoch in range(resume_epoch, args.epochs): if epoch % args.test_interval == 0: model.eval() test_total, test_correct, test_robustness = 0, 0, 0 attack = LinfPGDAttack( model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=0.031372, nb_iter=30, eps_iter=0.01, rand_init=True, clip_min=0.0, clip_max=1.0, targeted=False) start_time = time.time() for (images, labels) in test_loader: images = images.cuda() labels = labels.cuda().long() test_total += images.shape[0] with torch.no_grad(): test_correct += model(images).max(1)[1].eq( labels).float().sum().item() adv_images = attack.perturb(images, labels) with torch.no_grad(): test_robustness += model(adv_images).max(1)[1].eq( labels).float().sum().item() test_acc, test_adv_acc = test_correct / test_total, test_robustness / test_total # Record the time on the testset end_time = time.time() testset_total_time = end_time - start_time if test_adv_acc > best_test_adv_acc: best_epoch = epoch best_test_adv_acc = test_adv_acc best_test_clean_acc = test_acc torch.save( { "state_dict": model.state_dict(), "epoch": epoch + 1 }, best_model_path) log.info( "Present best adversarial model ----- best epoch: {} clean_test_acc: {:.3f} adv_test_acc: {:.3f}" .format(best_epoch, best_test_clean_acc, best_test_adv_acc)) log.info( "Epoch:{} clean_test_acc: {:.3f} adv_test_acc: {:.3f} during {} seconds" .format(epoch, test_acc, test_adv_acc, testset_total_time)) # Test and Train on the trainset train_total, train_correct, train_robustness = 0, 0, 0 train_clean_loss, train_adv_loss, train_loss = 0, 0, 0 start_time = time.time() attack = LinfPGDAttack(model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=0.031372, nb_iter=30, eps_iter=0.01, rand_init=True, clip_min=0.0, clip_max=1.0, targeted=False) for images, labels in train_loader: images = images.cuda() labels = labels.cuda().long() train_total += images.shape[0] with torch.no_grad(): train_correct += model(images).max(1)[1].eq( labels).float().sum().item() model.eval() adv_images = attack.perturb(images, labels) model.train() adv_outputs = model(adv_images) train_robustness += adv_outputs.max(1)[1].eq( labels).float().sum().item() adv_loss = criterion(adv_outputs, labels) # clean_outputs = model(torch.from_numpy(images).cuda()) # 我认为对抗训练不需要真实图片的loss # clean_loss = criterion(clean_outputs, torch.from_numpy(labels).cuda()) optimizer.zero_grad() adv_loss.backward() optimizer.step() train_adv_loss += adv_loss.item() train_loss = train_adv_loss model.eval() scheduler.step(epoch) # Record the time on the trainset end_time = time.time() trainset_total_time = end_time - start_time train_acc, train_adv_acc = train_correct / train_total, train_robustness / train_total log.info( "Epoch:{} train_clean_loss: {:.3f} train_adv_loss: {:.3f} train_total_loss: {:.3f}" .format(epoch, train_clean_loss, train_adv_loss, train_loss)) log.info( "Epoch:{} clean_train_acc: {:.3f} adv_train_acc: {:.3f} Consumed time:{}" .format(epoch, train_acc, train_adv_acc, trainset_total_time)) torch.save({ "state_dict": model.state_dict(), "epoch": epoch + 1 }, model_path)
def sample_cases(sdim, args): sdim.eval() n_classes = args.get(args.dataset).n_classes sample_likelihood_dict = {} # logger.info('==> Corruption type: {}, severity level {}'.format(corruption_type, level)) data_dir = hydra.utils.to_absolute_path(args.data_dir) dataset = get_dataset(data_name=args.dataset, data_dir=data_dir, train=False, crop_flip=False) test_loader = DataLoader(dataset=dataset, batch_size=1, shuffle=False) x, y = next(iter(test_loader)) x, y = x.to(args.device), y.long().to(args.device) def f_forward(x_, y_, image_name): with torch.no_grad(): log_lik = sdim(x_) save_name = '{}.png'.format(image_name) save_image(x_, save_name, normalize=True) return log_lik[:, y_].item() sample_likelihood_dict['original'] = f_forward(x, y, 'original') eps_2 = 2 / 255 eps_4 = 4 / 255 eps_8 = 8 / 255 x_u_4 = (x + torch.FloatTensor(x.size()).uniform_(-eps_4, eps_4).to( args.device)).clamp_(0., 1.) x_g_4 = (x + torch.randn(x.size()).clamp_(-eps_4, eps_4).to( args.device)).clamp_(0., 1.) x_u_8 = (x + torch.FloatTensor(x.size()).uniform_(-eps_8, eps_8).to( args.device)).clamp_(0., 1.) x_g_8 = (x + torch.randn(x.size()).clamp_(-eps_8, eps_8).to( args.device)).clamp_(0., 1.) sample_likelihood_dict['uniform_4'] = f_forward(x_u_4, y, 'uniform_4') sample_likelihood_dict['uniform_8'] = f_forward(x_u_8, y, 'uniform_8') sample_likelihood_dict['gaussian_4'] = f_forward(x_g_4, y, 'gaussian_4') sample_likelihood_dict['gaussian_8'] = f_forward(x_g_8, y, 'gaussian_8') adversary = LinfPGDAttack(sdim, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=eps_2, nb_iter=40, eps_iter=0.01, rand_init=True, clip_min=-1.0, clip_max=1.0, targeted=False) adv_pgd_2 = adversary.perturb(x, y) adversary = LinfPGDAttack(sdim, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=eps_4, nb_iter=40, eps_iter=0.01, rand_init=True, clip_min=-1.0, clip_max=1.0, targeted=False) adv_pgd_4 = adversary.perturb(x, y) adversary = LinfPGDAttack(sdim, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=eps_8, nb_iter=40, eps_iter=0.01, rand_init=True, clip_min=-1.0, clip_max=1.0, targeted=False) adv_pgd_8 = adversary.perturb(x, y) # adversary = CW(sdim, n_classes, max_iterations=1000, c=1, clip_min=0., clip_max=1., learning_rate=0.01, # targeted=False) # # adv_cw_1, _, _, _ = adversary.perturb(x, y) # # adversary = CW(sdim, n_classes, max_iterations=1000, c=10, clip_min=0., clip_max=1., learning_rate=0.01, # targeted=False) # # adv_cw_10, _, _, _ = adversary.perturb(x, y) sample_likelihood_dict['pgd_2'] = f_forward(adv_pgd_2, y, 'pgd_2') sample_likelihood_dict['pgd_4'] = f_forward(adv_pgd_4, y, 'pgd_4') sample_likelihood_dict['pgd_8'] = f_forward(adv_pgd_8, y, 'pgd_8') # sample_likelihood_dict['cw_1'] = f_forward(adv_cw_1, y, 'cw_1') # sample_likelihood_dict['cw_10'] = f_forward(adv_cw_10, y, 'cw_10') print(sample_likelihood_dict) save_dir = hydra.utils.to_absolute_path('attack_logs/case_study') if not os.path.exists(save_dir): os.mkdir(save_dir) torch.save(sample_likelihood_dict, os.path.join(save_dir, 'sample_likelihood_dict.pt'))
print('==> This is the PGD') from advertorch.attacks import LinfPGDAttack adversary = LinfPGDAttack(model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=0.3, nb_iter=40, eps_iter=0.01, rand_init=True, clip_min=0.0, clip_max=1.0, targeted=False) correct_clean = 0 correct_adv = 0 for idx, (cln_data, true_label) in enumerate(loader): cln_data, true_label = cln_data.to(device), true_label.to(device) adv_untargeted = adversary.perturb(cln_data, true_label) pred_cln = predict_from_logits(model(cln_data)) pred_adv = predict_from_logits(model(adv_untargeted)) correct_clean = correct_clean + (pred_cln.data == true_label.data).float().sum() correct_adv = correct_adv + (pred_adv.data == true_label.data).float().sum() print("current correct clean samples: %s; current correct adv samples: %s" %(correct_clean.data.item(), correct_adv.data.item())) print("correct clean samples: ", correct_clean) print("correct adversarial samples: ", correct_adv)