def cw_l2_attack(model, hps): print('============== CW_l2 Summary ===============') confidence = hps.cw_confidence adversary = CarliniWagnerL2Attack(model, num_classes=10, confidence=confidence, clip_min=0., clip_max=1., max_iterations=1000) print('confidence = {}'.format(confidence)) attack_run_rejection_policy(model, adversary, hps) print('============== CW_l2 Summary ===============')
def create_adv_input(self, x, y, model): # Prepare copied model model = copy.deepcopy(model) # Prepare input and corresponding label data = torch.from_numpy(np.expand_dims(x, axis=0).astype(np.float32)) target = torch.from_numpy(np.array([y]).astype(np.int64)) data.requires_grad = True from advertorch.attacks import CarliniWagnerL2Attack adversary = CarliniWagnerL2Attack(model.forward, self.num_classes, max_iterations=self.max_iterations) perturbed_data = adversary.perturb(data, target) # Have to be different output = model.forward(perturbed_data) final_pred = output.max( 1, keepdim=True)[1] # get the index of the max log-probability if final_pred.item() == target.item(): return perturbed_data, 0 else: return perturbed_data, 1
def test_adver(net, tar_net, attack, target): net.eval() tar_net.eval() # BIM if attack == 'BIM': adversary = LinfBasicIterativeAttack( net, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=0.25, nb_iter=120, eps_iter=0.02, clip_min=0.0, clip_max=1.0, targeted=opt.target) # PGD elif attack == 'PGD': if opt.target: adversary = PGDAttack(net, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=0.25, nb_iter=11, eps_iter=0.03, clip_min=0.0, clip_max=1.0, targeted=opt.target) else: adversary = PGDAttack(net, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=0.25, nb_iter=6, eps_iter=0.03, clip_min=0.0, clip_max=1.0, targeted=opt.target) # FGSM elif attack == 'FGSM': adversary = GradientSignAttack( net, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=0.26, targeted=opt.target) elif attack == 'CW': adversary = CarliniWagnerL2Attack( net, num_classes=10, learning_rate=0.45, # loss_fn=nn.CrossEntropyLoss(reduction="sum"), binary_search_steps=10, max_iterations=12, targeted=opt.target) # ---------------------------------- # Obtain the accuracy of the model # ---------------------------------- with torch.no_grad(): correct_netD = 0.0 total = 0.0 net.eval() for data in testloader: inputs, labels = data inputs = inputs.cuda() labels = labels.cuda() outputs = net(inputs) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct_netD += (predicted == labels).sum() print('Accuracy of the network on netD: %.2f %%' % (100. * correct_netD.float() / total)) # ---------------------------------- # Obtain the attack success rate of the model # ---------------------------------- correct = 0.0 total = 0.0 tar_net.eval() total_L2_distance = 0.0 for data in testloader: inputs, labels = data inputs = inputs.to(device) labels = labels.to(device) outputs = tar_net(inputs) _, predicted = torch.max(outputs.data, 1) if target: # randomly choose the specific label of targeted attack labels = torch.randint(0, 9, (1, )).to(device) # test the images which are not classified as the specific label if predicted != labels: # print(total) adv_inputs_ori = adversary.perturb(inputs, labels) L2_distance = (torch.norm(adv_inputs_ori - inputs)).item() total_L2_distance += L2_distance with torch.no_grad(): outputs = tar_net(adv_inputs_ori) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum() else: # test the images which are classified correctly if predicted == labels: # print(total) adv_inputs_ori = adversary.perturb(inputs, labels) L2_distance = (torch.norm(adv_inputs_ori - inputs)).item() total_L2_distance += L2_distance with torch.no_grad(): outputs = tar_net(adv_inputs_ori) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum() if target: print('Attack success rate: %.2f %%' % (100. * correct.float() / total)) else: print('Attack success rate: %.2f %%' % (100.0 - 100. * correct.float() / total)) print('l2 distance: %.4f ' % (total_L2_distance / total))
num_workers=0) classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') # In[4]: #################################### # Construct an adversary instance #################################### adversary_CW = CarliniWagnerL2Attack(model, num_classes=len(classes), confidence=0, targeted=False, learning_rate=0.01, binary_search_steps=9, max_iterations=10000, abort_early=True, initial_const=0.001, clip_min=0.0, clip_max=1.0, loss_fn=None) adversary_Jacobian = JacobianSaliencyMapAttack(model, num_classes=len(classes), clip_min=0.0, clip_max=1.0, loss_fn=None, theta=1.0, gamma=1.0, comply_cleverhans=False)
def adv_train_loop(model, params, ds, min_y, base_data, model_id, attack_type, device, batch_size, max_epochs=5): print('training adversarial:', attack_type) ds_train, ds_valid = ds min_y_train, min_y_val = min_y original_model = copy.deepcopy( model) # used to generate adv images for the trained model original_model.eval() model = copy.deepcopy( model) # making a copy so that original model is not changed model = model.to(device) model_id = f'{model_id}_{attack_type}' with create_summary_writer(model, ds_train, base_data, model_id, device=device) as writer: lr = params['lr'] mom = params['momentum'] wd = params['l2_wd'] optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=mom, weight_decay=wd) sched = ReduceLROnPlateau(optimizer, factor=0.5, patience=5) funcs = {'accuracy': Accuracy(), 'loss': Loss(F.cross_entropy)} loss = funcs['loss']._loss_fn acc_metric = Accuracy(device=device) loss_metric = Loss(F.cross_entropy, device=device) acc_val_metric = Accuracy(device=device) loss_val_metric = Loss(F.cross_entropy, device=device) classifier = PyTorchClassifier( model=original_model, clip_values=(0, 1), loss=nn.CrossEntropyLoss(), optimizer=optimizer, input_shape=(3, 64, 64), nb_classes=200, ) attack = None # if attack_type == "fgsm": # attack = FastGradientMethod(estimator=classifier, eps=0.2) # elif attack_type == "bim": # attack = BasicIterativeMethod(estimator=classifier, eps=0.2) # elif attack_type == "carlini": # attack = CarliniLInfMethod(classifier=classifier) # elif attack_type == "deepfool": # attack = DeepFool(classifier=classifier) if attack_type == "fgsm": attack = GradientSignAttack(model, loss_fn=loss, eps=0.2) elif attack_type == "ffa": attack = FastFeatureAttack(model, loss_fn=loss, eps=0.3) elif attack_type == "carlini": attack = CarliniWagnerL2Attack(model, 200, max_iterations=1000) elif attack_type == "lbfgs": attack = DeepFool(classifier=classifier) def train_step(engine, batch): model.train() x, y = batch x = x.to(device) y = y.to(device) - min_y_train with ctx_noparamgrad_and_eval(model): x_adv = attack.perturb(x, y) optimizer.zero_grad() x = torch.cat((x, x_adv)) y = torch.cat((y, y)) ans = model.forward(x) l = loss(ans, y) optimizer.zero_grad() l.backward() optimizer.step() # return ans, y return l.item() trainer = Engine(train_step) # acc_metric.attach(trainer, "accuracy") # loss_metric.attach(trainer, 'loss') def train_eval_step(engine, batch): model.eval() x, y = batch x = x.to(device) y = y.to(device) - min_y_train x_adv = attack.perturb(x, y) x = torch.cat((x, x_adv)) y = torch.cat((y, y)) with torch.no_grad(): ans = model.forward(x) return ans, y train_evaluator = Engine(train_eval_step) acc_metric.attach(train_evaluator, "accuracy") loss_metric.attach(train_evaluator, 'loss') def validation_step(engine, batch): model.eval() x, y = batch x = x.to(device) y = y.to(device) - min_y_val x_adv = attack.perturb(x, y) x = torch.cat((x, x_adv)) y = torch.cat((y, y)) with torch.no_grad(): ans = model.forward(x) return ans, y valid_evaluator = Engine(validation_step) acc_val_metric.attach(valid_evaluator, "accuracy") loss_val_metric.attach(valid_evaluator, 'loss') @trainer.on( Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10)) def log_validation_results(engine): valid_evaluator.run(ds_valid) metrics = valid_evaluator.state.metrics valid_avg_accuracy = metrics['accuracy'] avg_nll = metrics['loss'] print( "Validation Results - Epoch: {} Avg accuracy: {:.2f} Avg loss: {:.2f}" .format(engine.state.epoch, valid_avg_accuracy, avg_nll)) writer.add_scalar("validation/avg_loss", avg_nll, engine.state.epoch) writer.add_scalar("validation/avg_accuracy", valid_avg_accuracy, engine.state.epoch) writer.add_scalar("validation/avg_error", 1. - valid_avg_accuracy, engine.state.epoch) @trainer.on(Events.EPOCH_COMPLETED) def lr_scheduler(engine): metrics = valid_evaluator.state.metrics avg_nll = metrics['accuracy'] sched.step(avg_nll) @trainer.on(Events.ITERATION_COMPLETED(every=50)) def log_training_loss(engine): batch = engine.state.batch ds = DataLoader(TensorDataset(*batch), batch_size=batch_size) train_evaluator.run(ds) metrics = train_evaluator.state.metrics # metrics = engine.state.metrics accuracy = metrics['accuracy'] nll = metrics['loss'] iter = (engine.state.iteration - 1) % len(ds_train) + 1 if (iter % 50) == 0: print("Epoch[{}] Iter[{}/{}] Accuracy: {:.2f} Loss: {:.2f}". format(engine.state.epoch, iter, len(ds_train), accuracy, nll)) writer.add_scalar("batchtraining/detloss", nll, engine.state.epoch) writer.add_scalar("batchtraining/accuracy", accuracy, engine.state.iteration) writer.add_scalar("batchtraining/error", 1. - accuracy, engine.state.iteration) writer.add_scalar("batchtraining/loss", engine.state.output, engine.state.iteration) @trainer.on(Events.EPOCH_COMPLETED) def log_lr(engine): writer.add_scalar("lr", optimizer.param_groups[0]['lr'], engine.state.epoch) # @trainer.on(Events.EPOCH_COMPLETED) # def log_training_results(engine): # train_evaluator.run(ds_train) # metrics = train_evaluator.state.metrics # # metrics = engine.state.metrics # avg_accuracy = metrics['accuracy'] # avg_nll = metrics['loss'] # print("Training Results - Epoch: {} Avg accuracy: {:.2f} Avg loss: {:.2f}" # .format(engine.state.epoch, avg_accuracy, avg_nll)) # writer.add_scalar("training/avg_loss", avg_nll, engine.state.epoch) # writer.add_scalar("training/avg_accuracy", # avg_accuracy, engine.state.epoch) # writer.add_scalar("training/avg_error", 1. - # avg_accuracy, engine.state.epoch) @trainer.on( Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10)) def validation_value(engine): metrics = valid_evaluator.state.metrics valid_avg_accuracy = metrics['accuracy'] return valid_avg_accuracy to_save = {'model': model} handler = Checkpoint( to_save, DiskSaver(os.path.join(base_data, model_id), create_dir=True), score_function=validation_value, score_name="val_acc", global_step_transform=global_step_from_engine(trainer), n_saved=None) # kick everything off trainer.add_event_handler( Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10), handler) trainer.run(ds_train, max_epochs=max_epochs)
# load classifier predict = get_classifier(cfg, cfg.classifier) state_dict = torch.load(cfg.classifier.ckpt) predict.load_state_dict(state_dict) for p in predict.parameters(): p.requires_grad_(False) predict = torch.nn.Sequential(proj_fn, transform.classifier_preprocess_layer, predict).cuda() predict.eval() # create attacker attacker = CarliniWagnerL2Attack(predict=predict, num_classes=cfg.dataset.num_classes, learning_rate=0.2, initial_const=10, binary_search_steps=4, max_iterations=100, abort_early=True) total = 0 correct_adv = 0 for i, (images, labels) in enumerate(progress_bar): if i < start_ind or i >= end_ind: continue images, labels = images.cuda(), labels.cuda() result_path = os.path.join(result_dir, 'batch_{:04d}.pt'.format(i)) if os.path.isfile(result_path): result_dict = torch.load(result_path) images_adv = result_dict['input'].cuda()
ord=norm, rand_init=True) elif args.attack == 'MIFGSM': adversary = MomentumIterativeAttack( lambda x: wrapper(normalize(x), pcl=pcl), eps=epsilon, eps_iter=epsilon / 10, ord=norm, nb_iter=10) elif args.attack == 'FGSM': adversary = GradientSignAttack(lambda x: wrapper(x, pcl=pcl), eps=epsilon) # adversary = PGDAttack(lambda x: wrapper(x, pcl=pcl), eps=epsilon, eps_iter=epsilon, nb_iter=1, ord=norm, rand_init=False) elif args.attack == 'CW': adversary = CarliniWagnerL2Attack(lambda x: wrapper(x, pcl=pcl), 10, binary_search_steps=2, max_iterations=500, initial_const=1e-1) elif args.attack == 'DDN': adversary = DDN(steps=100, device=device) ddn = True else: adversary = None criterion = torch.nn.CrossEntropyLoss() net.eval() test_acc_adv, test_loss_adv, dist_l2, dist_linf = adv_test( lambda x: wrapper(x, pcl=pcl), test_loader, criterion,
def CW(model,X,y,num_class=10,num_iter=10): adversary = CarliniWagnerL2Attack(model, loss_fn=nn.CrossEntropyLoss(reduction="sum"),num_classes=num_class,confidence=0, targeted=False, learning_rate=0.01, binary_search_steps=5, max_iterations=20, abort_early=True, initial_const=0.001, clip_min=0.0, clip_max=1.0) adv_untargeted = adversary.perturb(X, y)-X return adv_untargeted
def model_test(model, data_loader, output_file_path, attack='mia', eps=8/255, nb_iter=3): model.eval() test_loss, adv_loss, correct, correct_adv, nb_data, adv_l2dist, adv_linfdist = \ 0, 0, 0, 0, 0, 0.0, 0.0 start_time = time.time() for i, (data, target) in enumerate(data_loader): print('i:', i) indx_target = target.clone() data_length = data.shape[0] nb_data += data_length data, target = data.cuda(), target.cuda() with torch.no_grad(): output = model(data) # print('data max:', torch.max(data)) # print('data min:', torch.min(data)) if attack == 'cw': if i >= 5: break adversary = CarliniWagnerL2Attack(predict=model, num_classes=10, targeted=True, clip_min=min_v, clip_max=max_v, max_iterations=50) elif attack == 'mia': adversary = MomentumIterativeAttack(predict=model, targeted=True, eps=eps, nb_iter=40, eps_iter=0.01*(max_v-min_v), clip_min=min_v, clip_max=max_v ) elif attack == 'pgd': adversary = LinfPGDAttack(predict=model, targeted=True, eps=eps, nb_iter=nb_iter, eps_iter=eps*1.25/nb_iter, clip_min=min_v, clip_max=max_v ) else: raise 'unimplemented error' pred = model(data) # torch.Size([128, 10]) print('pred:', type(pred), pred.shape) print('target:', type(target), target.shape, target[0:20]) # pred_argmax = torch.argmax(pred, dim=1) # print('pred_argmax:', type(pred_argmax), pred_argmax.shape, pred_argmax[0:10]) # for i in range(list(pred.shape)[0]): # pred[i,pred_argmax[i]] = -1 for i in range(list(pred.shape)[0]): pred[i,target[i]] = -1 # target_adv = torch.argmax(pred, dim=1) target_adv = (target + 5) % 10 print('target_adv:', type(target_adv), target_adv.shape, target_adv[0:20]) data_adv = adversary.perturb(data, target_adv) print('data_adv max:', torch.max(data_adv)) print('data_adv min:', torch.min(data_adv)) print('linf:', torch.max(torch.abs(data_adv-data)) ) adv_l2dist += torch.norm((data-data_adv).view(data.size(0), -1), p=2, dim=-1).sum().item() adv_linfdist += torch.max((data-data_adv).view(data.size(0), -1).abs(), dim=-1)[0].sum().item() with torch.no_grad(): output_adv = model(data_adv) pred_adv = output_adv.data.max(1)[1] correct_adv += pred_adv.cpu().eq(indx_target).sum() pred = output.data.max(1)[1] # get the index of the max log-probability correct += pred.cpu().eq(indx_target).sum() time_consume = time.time() - start_time print('time_consume:', time_consume) acc = float(100. * correct) / nb_data print('\tTest set: Accuracy: {}/{}({:.2f}%)'.format( correct, nb_data, acc)) acc_adv = float(100. * correct_adv) / nb_data print('\tAdv set: Accuracy : {}/{}({:.2f}%)'.format( correct_adv, nb_data, acc_adv )) adv_l2dist /= nb_data adv_linfdist /= nb_data print('\tAdv dist: L2: {:.8f} , Linf: {:.8f}'.format(adv_l2dist, adv_linfdist)) with open(output_file_path, "a+") as output_file: output_file.write(args.model_name + '\n') info_string = 'attack: %s:\n acc: %.2f, acc_adv: %.2f, adv_l2dist: %.2f, adv_linfdist: %.2f, time_consume: %.2f' % ( attack, acc, acc_adv, adv_l2dist, adv_linfdist, time_consume) output_file.write(info_string) return acc, acc_adv