def plotAdv(img, model): foolModel = foolbox.models.KerasModel(model, bounds=(0, 255)) attack = foolbox.attacks.L1BasicIterativeAttack(foolModel, criterion=TargetClass(1), distance=MAE) adversarial_object = attack(img, label=0, unpack=False) advImg = adversarial_object.image plt.figure() plt.subplot(1, 3, 1) plt.title('Original') plt.imshow(img / 255) # division by 255 to convert [0, 255] to [0, 1] plt.axis('off') plt.subplot(1, 3, 2) plt.title('Adversarial') plt.imshow(advImg / 255) # ::-1 to convert BGR to RGB plt.axis('off') plt.subplot(1, 3, 3) plt.title('Difference') difference = ((advImg - img) / 255) plt.imshow(difference / abs(difference).max() * 0.2 + 0.5) plt.axis('off') plt.show()
def boundary_attack(model, img, target): img_01 = (img / 255).astype(np.float32) atk = BoundaryAttack(model, TargetClass(target)) label = 1-target adv = atk(img_01, label, iterations=1000, verbose=False, log_every_n_steps=100) if adv is not None: adv = np.clip(adv * 255, 0, 255) return adv
def foolbox_attack(name, model): target = re.search(r'\d{0,2}$', name).group() if target is not None: name = re.sub('_' + target, '', name) if name == 'FGSM': return foolbox.attacks.FGSM(model, distance=Linf) elif name == 'S&P': return foolbox.attacks.SaltAndPepperNoiseAttack(model, distance=Linf) elif name == 'C&W': return foolbox.attacks.CarliniWagnerL2Attack(model, distance=Linf) elif name == 'SinglePixelAttack': return foolbox.attacks.SinglePixelAttack(model, distance=Linf) elif name == 'LocalSearchAttack': return foolbox.attacks.LocalSearchAttack(model, distance=Linf) elif name == 'SpatialAttack': return foolbox.attacks.SpatialAttack(model, distance=Linf) elif name == 'ShiftsAttack': return foolbox.attacks.SpatialAttack(model, distance=Linf) elif name == 'BoundaryAttack': return foolbox.attacks.BoundaryAttack(model, distance=Linf) elif name == 'PointwiseAttack': return foolbox.attacks.PointwiseAttack(model, distance=Linf) elif name == 'ContrastReductionAttack': return foolbox.attacks.ContrastReductionAttack(model, distance=Linf) elif name == 'AdditiveUniformNoiseAttack': return foolbox.attacks.AdditiveUniformNoiseAttack(model, distance=Linf) elif name == 'AdditiveGaussianNoiseAttack': return foolbox.attacks.AdditiveGaussianNoiseAttack(model, distance=Linf) elif name == 'BlendedUniformNoiseAttack': return foolbox.attacks.BlendedUniformNoiseAttack(model, distance=Linf) elif name == 'GaussianBlurAttack': return foolbox.attacks.GaussianBlurAttack(model, distance=Linf) elif name == 'DeepFoolAttack': return foolbox.attacks.DeepFoolAttack(model, distance=Linf) elif name == 'GenAttack': return foolbox.attacks.GenAttack(model, criterion=TargetClass(int(target)), distance=Linf) elif name == 'PrecomputedAdversarialsAttack': return foolbox.attacks.PrecomputedAdversarialsAttack(model, distance=Linf) elif name == 'InversionAttack': return foolbox.attacks.InversionAttack(model, distance=Linf) elif name == 'HopSkipJumpAttack': return foolbox.attacks.HopSkipJumpAttack(model, distance=Linf) elif name == 'RandomPGD': return foolbox.attacks.RandomPGD(model, distance=Linf) else: print('Oops')
def attack_switcher(att, fmodel): """ Initialize different attacks. """ switcher = { "fgsm": fa.GradientSignAttack(fmodel, distance=Linf), "bim": fa.LinfinityBasicIterativeAttack(fmodel, distance=Linf), "mim": fa.MomentumIterativeAttack(fmodel, distance=Linf), "df": LimitedDeepFoolL2Attack(fmodel), "cw": LimitedCarliniWagnerL2Attack(fmodel), "hsj": LimitedHopSkipJumpAttack(fmodel, distance=Linf), "ga": fa.GenAttack(fmodel, criterion=TargetClass(9), distance=Linf), } return switcher.get(att)
ITER = 500 maxN = 30 initN = 30 else: raise NotImplementedError() #ITER = 20 print("PGEN: %s" % PGEN) if p_gen is None: rho = 1.0 else: rvs = p_gen.generate_ps(src_image, 10, level=999) grad_gt = fmodel.gradient_one(src_image, label=src_label) rho = p_gen.calc_rho(grad_gt, src_image).item() print("rho: %.4f" % rho) attack = foolbox.attacks.BAPP_custom(fmodel, criterion=TargetClass(src_label)) adv = attack(tgt_image, tgt_label, starting_point=src_image, iterations=ITER, stepsize_search='geometric_progression', unpack=False, max_num_evals=maxN, initial_num_evals=initN, internal_dtype=np.float32, rv_generator=p_gen, atk_level=args.atk_level, mask=mask, batch_size=16, rho_ref=rho, log_every_n_steps=1,
#image, label = foolbox.utils.imagenet_example() #print (image.shape) #print (label) src_image = load_imagenet_img('../raw_data/imagenet_example/bad_joke_eel.png') tgt_image = load_imagenet_img('../raw_data/imagenet_example/awkward_moment_seal.png') #tgt_image = load_imagenet_img('../raw_data/imagenet_example/example.png') src_label = np.argmax(fmodel.forward_one(src_image[:,:,::-1])) tgt_label = np.argmax(fmodel.forward_one(tgt_image[:,:,::-1])) print (src_image.shape) print (tgt_image.shape) print ("Source Image Label:", src_label) print ("Target Image Label:", tgt_label) #attack = foolbox.attacks.BoundaryAttackPlusPlus(fmodel) #attack = foolbox.attacks.BoundaryAttackPlusPlus(fmodel, criterion=TargetClass(src_label)) attack = foolbox.attacks.BAPP_custom(fmodel, criterion=TargetClass(src_label)) adv = attack(tgt_image[:,:,::-1], tgt_label, starting_point = src_image[:,:,::-1], iterations=20, stepsize_search='geometric_progression', verbose=True, unpack=False, max_num_evals=100, initial_num_evals=100) #attack = foolbox.attacks.BoundaryAttack(fmodel) #attack = foolbox.attacks.BoundaryAttack(fmodel, criterion=TargetClass(src_label)) #adv = attack(tgt_image[:,:,::-1], tgt_label, starting_point = src_image[:,:,::-1], iterations=2000, log_every_n_steps=50, verbose=True, unpack=False) #Final adv adversarial = adv.perturbed[:,:,::-1] adv_label = np.argmax(fmodel.forward_one(adversarial[:,:,::-1])) ret1 = tgt_image/255 ret2 = adversarial/255 print ("Total calls:", adv._total_prediction_calls) print ("Final MSE between Target and Adv:", MSE(ret1, ret2)) print ("Source label: %d; Target label: %d; Adv label: %d"%(src_label, tgt_label, adv_label)) import matplotlib.pyplot as plt
l1, 0) elif args.experiment_type == "VGG": model = convolutional.vgg_model_wide(args.dataset, 0, l1, 0) elif args.experiment_type == "leNet": model = convolutional.leNet_model_wide(0, l1, 0) else: raise Exception("Invalid model!") model.fit(x_train, y_train, epochs=50, batch_size=128) preds = np.argmax(model.predict(x_test), axis=1) kmodel = KerasModel(model=model, bounds=(min_, max_)) attack = None if args.attack_type == 'l2': attack = CarliniWagnerL2Attack(kmodel, TargetClass(7)) elif args.attack_type == 'linf': attack = RandomPGD(kmodel, TargetClass(7)) x_sample = np.take(x_test, ones, axis=0) # We exclude by default those examples which are not predicted by the classifier as 1s. true_ones = np.where(preds == 1)[0] x_sample = np.take(x_sample, true_ones, axis=0) y_sample = np.array([to_one_hot(1) for _ in x_sample]) adversarial = None if args.attack_type == 'l2': adversarial = attack(x_sample, np.argmax(y_sample, axis=1),
dog_x = np.expand_dims(dog_img, axis=0) cat_x = np.expand_dims(cat_img, axis=0) # Build a foolbox model fmodel = KerasModel(kmodel, bounds=(-1, 1)) # label of the target class preds = kmodel.predict(dog_x) dog_label = np.argmax(preds) # label of the original class preds = kmodel.predict(cat_x) cat_label = np.argmax(preds) criterion_1 = TopKMisclassification(k=5) criterion_2 = TargetClass(dog_label) criterion_3 = TargetClassProbability(dog_label, p=0.5) criterion = criterion_1 & criterion_2 & criterion_3 attack = BoundaryAttack(model=fmodel, criterion=criterion) iteration_size = 1000 global_iterations = 0 # Run boundary attack to generate an adversarial example adversarial = attack(cat_img, label=cat_label, unpack=False, iterations=iteration_size, starting_point=dog_img, log_every_n_steps=10, verbose=True)
json_file.close() cnn = model_from_json(loaded_model_json) cnn.load_weights("model.h5") print("Loaded model from disk") # FOOLING fmodel = foolbox.models.KerasModel(cnn, bounds=(0, 1)) for _ in range(10): indx = np.random.randint(0, x_test.shape[0]) image, label = x_test[indx], y_test[indx] print("Label: ", np.argmax(label)) print("Prediction: ", np.argmax(fmodel.predictions(image))) # Apply attack attack = LBFGSAttack(fmodel, criterion=TargetClass(3)) adversarial = attack(image, np.argmax(label)) if adversarial is None: break print("Adversarial Prediction: ", np.argmax(fmodel.predictions(adversarial))) print("Adversarial Prediction: ", softmax(fmodel.predictions(adversarial))) # Plot the attack plt.figure() plt.subplot(1, 3, 1) plt.title("Original") plt.imshow(image.reshape((28, 28)), cmap="gray") plt.axis("off") plt.subplot(1, 3, 2) plt.title("Adversarial")
def validate(val_loader, model, epsilon, args): batch_time = AverageMeter('Time', ':6.3f') top1 = AverageMeter('Acc@1', ':6.2f') progress = ProgressMeter(len(val_loader), [batch_time, top1], prefix='Test: ') # switch to evaluate mode model.eval() mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) preprocessing = (mean, std) fmodel = PyTorchModel(model, bounds=(0, 1), num_classes=1000, preprocessing=preprocessing) clean_labels = np.zeros(len(val_loader)) target_labels = np.zeros(len(val_loader)) clean_pred_labels = np.zeros(len(val_loader)) adv_pred_labels = np.zeros(len(val_loader)) end = time.time() # Batch processing is not supported in in foolbox 1.8, so we feed images one by one. Note that we are using a batch # size of 2, which means we consider every other image (due to computational costs) for i, (images, target) in enumerate(val_loader): image = images.cpu().numpy()[0] clean_label = target.cpu().numpy()[0] target_label = np.random.choice( np.setdiff1d(np.arange(1000), clean_label)) attack = RandomStartProjectedGradientDescentAttack( model=fmodel, criterion=TargetClass(target_label), distance=Linfinity) adversarial = attack(image, clean_label, binary_search=False, epsilon=epsilon, stepsize=2. / 255, iterations=args.pgd_steps, random_start=True) if np.any(adversarial == None): # Non-adversarial adversarial = image target_label = clean_label adv_pred_labels[i] = np.argmax(fmodel.predictions(adversarial)) clean_labels[i] = clean_label target_labels[i] = target_label clean_pred_labels[i] = np.argmax(fmodel.predictions(image)) print('Iter, Clean, Clean_pred, Adv, Adv_pred: ', i, clean_label, clean_pred_labels[i], target_label, adv_pred_labels[i]) # measure accuracy and update average acc1 = 100. * np.mean(clean_label == adv_pred_labels[i]) top1.update(acc1, 1) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i) print('* Acc@1 {top1.avg:.3f} '.format(top1=top1)) return top1.avg
def random_targeted(attack_fn, class_start: int, class_end: int): return partial( attack_fn, criterion=TargetClass( target_class=random.randint(class_start, class_end)), )
def start_attack(foolmodel, image, label, threshold=90): advs = [] for i in range(config.exp['label_num']): if i is not label: # 希望模型能将图片识别成i ~定向 criterion = TargetClass(i) # 提供三种主流的对抗攻击方式 {基于决策的,基于梯度的,基于分数的} # Decision-based attacks attack1_1 = foolbox.attacks.AdditiveUniformNoiseAttack( foolmodel, criterion=criterion) attack1_2 = foolbox.attacks.AdditiveGaussianNoiseAttack( foolmodel, criterion=criterion) # Gradient-based attacks attack2 = foolbox.attacks.FGSM(foolmodel, criterion=criterion) attack3 = foolbox.attacks.SaliencyMapAttack(foolmodel, criterion=criterion) # Score-based attacks 不采用,数据都很差 评分都很低 # attack4 = foolbox.attacks.LocalSearchAttack(foolmodel, criterion=criterion) # 使用第一种第二种攻击方案 for eps in range(100, 1000, 100): # 1. 使用Decision-based attacks print(eps) adv1_1 = attack1_1(image, label, epsilons=eps) adv1_2 = attack1_2(image, label, epsilons=eps) if adv1_1 is not None and cal_score(image, adv1_1) >= threshold: # 将高于threshold的数据保留下来,高的不需要保留 advs.append(adv1_1) if adv1_2 is not None and cal_score(image, adv1_2) >= threshold: # 将高于threshold的数据保留下来,高的不需要保留 advs.append(adv1_2) # 使用第三种攻击方案 eps_set = [10, 100, 200, 300, 500, 800] temp_score = 0 for eps in eps_set: print(eps) # 2. 使用Gradient-based attacks adv2 = attack2(image, label, epsilons=eps) if adv2 is not None: score = cal_score(image, adv2) if score >= threshold and temp_score != score: advs.append(adv2) temp_score = score # 使用第四种攻击方案 temp_score = 0 for theta in np.arange(0.1, 1.1, 0.1): print(theta) # 2. 使用Gradient-based attacks adv3 = attack3(image, label, theta=theta) if adv3 is not None: score = cal_score(image, adv3) if score >= threshold and temp_score != score: advs.append(adv3) temp_score = score # 返回生成的攻击样本,原始标签以及对抗之后的标签,用于保存 # labels = foolmodel.batch_predictions(np.array(advs)) # labels = np.argmax(labels, axis=1) # original_labels = np.zeros(shape=(len(labels),)) # original_labels[:] = label return np.array(advs)
def attack(algorithm, dataset, targeted, norm='l2', num=50, stopping_criteria=None, query_limit=40000, start_from=0, gpu=0): os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu) print("Attacking:".format(num)) print(" Number of samples - {0}".format(num)) print(" Dataset - {0}".format(dataset.upper())) print(" Targeted - {0}".format(targeted)) print(" Norm - {0}".format(norm)) print(" Query Limit - {0}".format(query_limit)) print(" GPU - {0}".format(gpu)) print() if stopping_criteria is not None: print(" Stopping criteria - {0}".format(stopping_criteria)) if start_from > 0: print(" Start from {0}".format(start_from)) if dataset == 'mnist': net = MNIST() net.cuda() net = torch.nn.DataParallel(net, device_ids=[0]) load_model(net, '../mnist_gpu.pt') train_loader, test_loader, train_dataset, test_dataset = load_mnist_data( ) elif dataset == 'cifar': net = CIFAR10() net.cuda() net = torch.nn.DataParallel(net, device_ids=[0]) load_model(net, '../cifar10_gpu.pt') train_loader, test_loader, train_dataset, test_dataset = load_cifar10_data( ) elif dataset == 'imagenet': net = models.__dict__["resnet50"](pretrained=True) net.cuda() net = torch.nn.DataParallel(net, device_ids=[0]) train_loader, test_loader, train_dataset, test_dataset = load_imagenet_data( ) else: print("Invalid dataset") return net.eval() model = net.module if torch.cuda.is_available() else net #amodel = PytorchModel(model, bounds=[0,1], num_classes=10) fmodel = foolbox.models.PyTorchModel(model, bounds=[0, 1], num_classes=10) #if targeted: # criterion = TargetClass(target) # attack = foolbox.attacks.BoundaryAttack(fmodel,criterion) #else: # attack = foolbox.attacks.BoundaryAttack(fmodel) print("using BoundaryAttack from foolbox") #print("Invalid algorithm") np.random.seed(0) seeds = np.random.randint(10000, size=[2 * num]) count = 0 for i, (xi, yi) in enumerate(test_loader): if i < start_from: continue if count == num: break image, label = xi[0].numpy(), yi.item() seed_index = i - start_from np.random.seed(seeds[seed_index]) target = np.random.randint(10) * torch.ones( 1, dtype=torch.long).cuda() if targeted else None print("Attacking Source: {0} Target: {1} Seed: {2} Number {3}".format( yi.item(), target, seeds[seed_index], i)) if targeted: target = target.item() criterion = TargetClass(target) attack = foolbox.attacks.BoundaryAttack(fmodel, criterion) for i, (xi, yi) in enumerate(train_loader): if i == 1: break #print(yi, target) index = (yi == target).nonzero() image_t = xi[index[0]][0] image_t = image_t.numpy() adv = attack(image, label, iterations=6000, verbose=False, unpack=False, log_every_n_steps=100, starting_point=image_t) else: attack = foolbox.attacks.BoundaryAttack(fmodel) adv = attack(image, label, iterations=6000, verbose=False, unpack=False, log_every_n_steps=100) if adv.image is None: continue dis = LA.norm(adv.image - image) #print("Norm L2 and queries of image {} is {} and {}".format(dis, adv._total_prediction_calls)) print( "adversarial Example Found Successfully: distortion {} target {} queries {}" .format(dis, np.argmax(fmodel.predictions(adv.image)), adv._total_prediction_calls)) #adv, dist = attack(xi.cuda(), yi.cuda(), target=target, # seed=seeds[seed_index], query_limit=query_limit) #if dist > 1e-8 and dist != float('inf'): # count += 1 print()
global_threshold_lbp_linear) models = [model_lbp_rbf, model_lbp_linear] adv_models = [adv_model_lbp_rbf, adv_model_lbp_linear] modelnames = ['model_lbp_rbf', 'model_lbp_linear'] thresholds = [global_threshold_lbp, global_threshold_lbp_linear] genuine_idx, forgery_idx, skforgery_idx = selected_images[user] # Attack genuine images if genuine_idx != -1: selected_genuine = x_test[genuine_idx].squeeze() for original_m, adv_m, mname, t in zip(models, adv_models, modelnames, thresholds): assert original_m.predictions(selected_genuine)[1] == 1 atk = BoundaryAttack(adv_m, TargetClass(0)) print('Running Boundary attack on {}'.format(mname)) boundary_result = atk(selected_genuine.astype(np.float32), 1, iterations=1000, verbose=False) if boundary_result is not None: results_genuine.append( (user, mname, 'genuine', 'decision', genuine_idx, boundary_result, rmse(boundary_result - selected_genuine), original_m.predict_score(boundary_result), original_m.predictions(boundary_result)[0])) else:
#p_gen = None #p_gen = PerturbGenerator(preprocess=((0,1,2),mean,std)) #p_gen = PerturbGenerator() #p_gen = BigGANGenerator() #p_gen = UNet(n_channels=3) #p_gen.load_state_dict(torch.load('unet.model', map_location='cpu')) #p_gen = ResizeGenerator() p_gen = DCTGenerator() #rvs = p_gen.generate_ps(src_image, 10, level=3) #print (rvs) #print (rvs.shape) #assert 0 #attack = foolbox.attacks.BoundaryAttackPlusPlus(fmodel) #attack = foolbox.attacks.BoundaryAttackPlusPlus(fmodel, criterion=TargetClass(src_label)) attack = foolbox.attacks.BAPP_physical(fmodel, criterion=TargetClass(src_label)) adv = attack(tgt_image, tgt_label, starting_point = src_image, iterations=50, stepsize_search='geometric_progression', verbose=True, unpack=False, max_num_evals=100, initial_num_evals=100, internal_dtype=np.float32, rv_generator = p_gen, atk_level=args.atk_level, mask=mask) #Final adv adversarial = adv.perturbed adv_label = np.argmax(fmodel.forward_one(adversarial)) ret1 = tgt_image ret2 = adversarial print ("Total calls:", adv._total_prediction_calls) print ("Final MSE between Target and Adv:", MSE(ret1, ret2)) print ("Source label: %d; Target label: %d; Adv label: %d"%(src_label, tgt_label, adv_label)) print (attack.logger) with open('BAPP_result/attack_%s.log'%args.suffix, 'w') as outf: json.dump(attack.logger, outf)
for image, label in val_loader_foolbox: label = id2id[label.item()] prep_image = (image - imagenet_mean) / imagenet_std prep_image = prep_image.cuda() image = image[0].numpy() # PGD configuration wrapped_model = wrapper(model, prep_image, attack_size, (x, y), clip_fn, a, b) wrapped_model.eval() print('image {}, current location: {}'.format(img_idx, (x, y))) fmodel = foolbox.models.PyTorchModel(wrapped_model, bounds=(0, 1), num_classes=1000, preprocessing=(mean, std)) criterion = TargetClass(targeted_class) with warnings.catch_warnings(): warnings.simplefilter("ignore") attack = attack_alg(fmodel, criterion=criterion, distance=foolbox.distances.Linfinity) subimg = get_subimg(image, (x, y), attack_size) adversarial = attack(subimg, label, iterations=max_iter, epsilon=1., stepsize=0.01, random_start=True, return_early=False, binary_search=False)
def bn_targeted_criterion(): label = bn_label() assert label in [0, 1] return TargetClass(1 - label)
x_train.shape[1:], dropout, dropout) elif args.experiment_type == "six_layer_dnn": kmodel = neural_networks.asymmetric_six_layer_nn_foolbox( x_train.shape[1:], dropout, dropout) elif args.experiment_type == "VGG": kmodel = convolutional.mini_VGG_foolbox(dropout, dropout, 0, "mnist") elif args.experiment_type == "leNet5": kmodel = convolutional.leNet_cnn_foolbox(dropout, dropout, "mnist") kmodel.fit(x_train, y_train, epochs=10, batch_size=128) # kmodel.fit(x_train, y_train, epochs=50, batch_size=128) preds = np.argmax(kmodel.predict(x_test), axis=1) x_sample = np.take(x_test, ones, axis=0)[:5] y_sample = np.array([1 for x in x_sample]) y_target = np.array([to_one_hot(7) for _ in x_sample]) attack = CarliniWagnerL2Attack(kmodel, TargetClass(7)) # attack = RandomPGD(kmodel, TargetClass(7)) adversarial = attack(x_sample, y_sample, binary_search_steps=5, max_iterations=600) # adversarial = attack(x_sample, y_sample, iterations=30) print(kmodel.predict(adversarial)) # For those samples for which the L2 method does not produce an adversarial sample within the attack parameters, # we exclude them from the perturbation evaluation.