def test_krclassifier(self): """ Second test with the KerasClassifier. :return: """ # Build KerasClassifier krc = get_classifier_kr() # Get MNIST (x_train, y_train), (x_test, y_test) = self.mnist # Attack up = UniversalPerturbation(krc, max_iter=1, attacker="ead", attacker_params={ "max_iter": 5, "targeted": False }) x_train_adv = up.generate(x_train) self.assertTrue((up.fooling_rate >= 0.2) or not up.converged) x_test_adv = x_test + up.noise self.assertFalse((x_test == x_test_adv).all()) train_y_pred = np.argmax(krc.predict(x_train_adv), axis=1) test_y_pred = np.argmax(krc.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == test_y_pred).all()) self.assertFalse((np.argmax(y_train, axis=1) == train_y_pred).all())
def test_iris_pt(self): (_, _), (x_test, y_test) = self.iris classifier = get_iris_classifier_pt() attack_params = { "max_iter": 1, "attacker": "ead", "attacker_params": { "max_iter": 5, "targeted": False } } attack = UniversalPerturbation(classifier) attack.set_params(**attack_params) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info( 'Accuracy on Iris with universal adversarial examples: %.2f%%', (acc * 100))
def test_tfclassifier(self): """ First test with the TensorFlowClassifier. :return: """ # Build TensorFlowClassifier tfc, sess = get_classifier_tf() # Get MNIST (x_train, y_train), (x_test, y_test) = self.mnist # Attack up = UniversalPerturbation(tfc, max_iter=1, attacker="newtonfool", attacker_params={"max_iter": 5}) x_train_adv = up.generate(x_train) self.assertTrue((up.fooling_rate >= 0.2) or not up.converged) x_test_adv = x_test + up.noise self.assertFalse((x_test == x_test_adv).all()) train_y_pred = np.argmax(tfc.predict(x_train_adv), axis=1) test_y_pred = np.argmax(tfc.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == test_y_pred).all()) self.assertFalse((np.argmax(y_train, axis=1) == train_y_pred).all())
def test_ptclassifier(self): """ Third test with the PyTorchClassifier. :return: """ # Build PyTorchClassifier ptc = get_classifier_pt() # Get MNIST (x_train, y_train), (x_test, y_test) = self.mnist x_train = np.swapaxes(x_train, 1, 3).astype(np.float32) x_test = np.swapaxes(x_test, 1, 3).astype(np.float32) # Attack up = UniversalPerturbation(ptc, max_iter=1, attacker="newtonfool", attacker_params={"max_iter": 5}) x_train_adv = up.generate(x_train) self.assertTrue((up.fooling_rate >= 0.2) or not up.converged) x_test_adv = x_test + up.noise self.assertFalse((x_test == x_test_adv).all()) train_y_pred = np.argmax(ptc.predict(x_train_adv), axis=1) test_y_pred = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == test_y_pred).all()) self.assertFalse((np.argmax(y_train, axis=1) == train_y_pred).all())
def test_iris_k_unbounded(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channel_index=1) attack_params = { "max_iter": 1, "attacker": "newtonfool", "attacker_params": { "max_iter": 5 } } attack = UniversalPerturbation(classifier) attack.set_params(**attack_params) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info( 'Accuracy on Iris with universal adversarial examples: %.2f%%', (acc * 100))
def test_tensorflow_mnist(self): """ First test with the TensorFlowClassifier. :return: """ (x_train, y_train), (x_test, y_test) = self.mnist x_test_original = x_test.copy() # Build TensorFlowClassifier tfc, sess = get_classifier_tf() # Attack up = UniversalPerturbation(tfc, max_iter=1, attacker="newtonfool", attacker_params={"max_iter": 5}) x_train_adv = up.generate(x_train) self.assertTrue((up.fooling_rate >= 0.2) or not up.converged) x_test_adv = x_test + up.noise self.assertFalse((x_test == x_test_adv).all()) train_y_pred = np.argmax(tfc.predict(x_train_adv), axis=1) test_y_pred = np.argmax(tfc.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == test_y_pred).all()) self.assertFalse((np.argmax(y_train, axis=1) == train_y_pred).all()) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
def test_keras_mnist(self): """ Second test with the KerasClassifier. :return: """ (x_train, y_train), (x_test, y_test) = self.mnist x_test_original = x_test.copy() # Build KerasClassifier krc = get_classifier_kr() # Attack up = UniversalPerturbation(krc, max_iter=1, attacker="ead", attacker_params={"max_iter": 5, "targeted": False}) x_train_adv = up.generate(x_train) self.assertTrue((up.fooling_rate >= 0.2) or not up.converged) x_test_adv = x_test + up.noise self.assertFalse((x_test == x_test_adv).all()) train_y_pred = np.argmax(krc.predict(x_train_adv), axis=1) test_y_pred = np.argmax(krc.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == test_y_pred).all()) self.assertFalse((np.argmax(y_train, axis=1) == train_y_pred).all()) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
def test_pytorch_iris(self): classifier = get_tabular_classifier_pt() attack_params = { "max_iter": 1, "attacker": "ead", "attacker_params": { "max_iter": 5, "targeted": False } } attack = UniversalPerturbation(classifier) attack.set_params(**attack_params) x_test_iris_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_iris_adv).all()) self.assertTrue((x_test_iris_adv <= 1).all()) self.assertTrue((x_test_iris_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_iris_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax( self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info( "Accuracy on Iris with universal adversarial examples: %.2f%%", (acc * 100))
def test_pytorch_mnist(self): """ Third test with the PyTorchClassifier. :return: """ (x_train, y_train), (x_test, y_test) = self.mnist x_train = np.swapaxes(x_train, 1, 3).astype(np.float32) x_test = np.swapaxes(x_test, 1, 3).astype(np.float32) x_test_original = x_test.copy() # Build PyTorchClassifier ptc = get_classifier_pt() # Attack up = UniversalPerturbation(ptc, max_iter=1, attacker="newtonfool", attacker_params={"max_iter": 5}) x_train_adv = up.generate(x_train) self.assertTrue((up.fooling_rate >= 0.2) or not up.converged) x_test_adv = x_test + up.noise self.assertFalse((x_test == x_test_adv).all()) train_y_pred = np.argmax(ptc.predict(x_train_adv), axis=1) test_y_pred = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == test_y_pred).all()) self.assertFalse((np.argmax(y_train, axis=1) == train_y_pred).all()) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
def GetAttackers(classifier, x_test, attacker_name): """ Function: Load classifier and generate adversarial samples """ t_start = time.time() if attacker_name == "FGSM": attacker = FastGradientMethod(classifier=classifier, eps=0.3) elif attacker_name == "Elastic": attacker = ElasticNet(classifier=classifier, confidence=0.5) elif attacker_name == "BasicIterativeMethod": attacker = BasicIterativeMethod(classifier=classifier, eps=0.3) elif attacker_name == "NewtonFool": attacker = NewtonFool(classifier=classifier, max_iter=20) elif attacker_name == "HopSkipJump": attacker = HopSkipJump(classifier=classifier, max_iter=20) elif attacker_name == "ZooAttack": attacker = ZooAttack(classifier=classifier, max_iter=20) elif attacker_name == "VirtualAdversarialMethod": attacker = VirtualAdversarialMethod(classifier=classifier, max_iter=20) elif attacker_name == "UniversalPerturbation": attacker = UniversalPerturbation(classifier=classifier, max_iter=20) elif attacker_name == "AdversarialPatch": attacker = AdversarialPatch(classifier=classifier, max_iter=20) elif attacker_name == "Attack": attacker = Attack(classifier=classifier) elif attacker_name == "BoundaryAttack": attacker = BoundaryAttack(classifier=classifier, targeted=False, epsilon=0.05, max_iter=20) #, max_iter=20 elif attacker_name == "CarliniL2": attacker = CarliniL2Method(classifier=classifier, confidence=0.5, learning_rate=0.001, max_iter=15) elif attacker_name == "CarliniLinf": attacker = CarliniLInfMethod(classifier=classifier, confidence=0.5, learning_rate=0.001, max_iter=15) elif attacker_name == "DeepFool": attacker = DeepFool(classifier) elif attacker_name == "SMM": attacker = SaliencyMapMethod(classifier=classifier, theta=2) elif attacker_name == "PGD": attacker = ProjectedGradientDescent(classifier=classifier, norm=2, eps=1, eps_step=0.5) else: raise ValueError("Please get the right attacker's name for the input.") test_adv = attacker.generate(x_test) dt = time.time() - t_start return test_adv, dt
def test_classifier_type_check_fail_classifier(self): # Use a useless test classifier to test basic classifier properties class ClassifierNoAPI: pass classifier = ClassifierNoAPI with self.assertRaises(TypeError) as context: _ = UniversalPerturbation(classifier=classifier) self.assertIn('For `UniversalPerturbation` classifier must be an instance of ' '`art.classifiers.classifier.Classifier`, the provided classifier is instance of ' '(<class \'object\'>,).', str(context.exception))
def test_classifier_type_check_fail_gradients(self): # Use a test classifier not providing gradients required by white-box attack from art.classifiers.scikitlearn import ScikitlearnDecisionTreeClassifier from sklearn.tree import DecisionTreeClassifier classifier = ScikitlearnDecisionTreeClassifier(model=DecisionTreeClassifier()) with self.assertRaises(TypeError) as context: _ = UniversalPerturbation(classifier=classifier) self.assertIn('For `UniversalPerturbation` classifier must be an instance of ' '`art.classifiers.classifier.ClassifierNeuralNetwork` and ' '`art.classifiers.classifier.ClassifierGradients`, the provided classifier is instance of ' '(<class \'art.classifiers.scikitlearn.ScikitlearnClassifier\'>,).', str(context.exception))
def get_adversarial(targeted, attack_name, classifier, xs, target_ys, batch_size, dataset, fgsm_epsilon=0, cwl2_confidence=0): # The attack attack = '' samples_range = xs.shape[0] #====================================== if attack_name == 'FastGradientMethod': # norm=np.inf, eps=.3, eps_step=0.1, targeted=False, num_random_init=0, batch_size=1,minimal=False attack = FastGradientMethod(classifier=classifier, targeted=targeted, eps=fgsm_epsilon, batch_size=batch_size) #===================================== elif attack_name == 'CarliniLInfMethod': # confidence=0.0, targeted=False, learning_rate=0.01, max_iter=10, max_halving=5, #max_doubling=5, eps=0.3, batch_size=128 attack = CarliniLInfMethod(classifier=classifier, max_iter=1000, targeted=targeted, batch_size=batch_size) #------------------------------- elif attack_name == 'UniversalPerturbation': # attacker='deepfool', attacker_params=None, delta=0.2, # max_iter=20, eps=10.0, norm=np.inf if targeted: print('UniversalPerturbation attack cannot be targeted.') exit() attack = UniversalPerturbation(classifier=classifier, max_iter=5) #============================================== elif attack_name == 'ProjectedGradientDescent': # norm=np.inf, eps=.3, eps_step=0.1, max_iter=100, # targeted=False, num_random_init=0, batch_size=1 if dataset == 'mnist': attack = ProjectedGradientDescent(classifier=classifier, targeted=targeted, norm=1, eps=.3, eps_step=0.01, num_random_init=0, max_iter=40, batch_size=batch_size) else: attack = ProjectedGradientDescent(classifier=classifier, targeted=targeted, norm=1, eps=8.0, eps_step=2.0, num_random_init=0, max_iter=7, batch_size=batch_size) if targeted: # Generate the adversarial samples in steps adv = attack.generate(xs[0:batch_size, :, :, :], y=target_ys[0:batch_size]) ################### last_ii = 0 for ii in range(batch_size, samples_range - batch_size, batch_size): print(ii) adv_samples = attack.generate( xs[ii:ii + batch_size, :, :, :], y=target_ys[ii:ii + batch_size]) #################### adv = np.concatenate((adv, adv_samples), axis=0) last_ii = ii # The rest of the samples if last_ii + batch_size < xs.shape[0]: last_samples = xs[last_ii + batch_size:, :, :, :] adv_samples = attack.generate( last_samples, y=target_ys[last_ii + batch_size:]) ################ adv = np.concatenate((adv, adv_samples), axis=0) else: # Generate the adversarial samples in steps adv = attack.generate(xs[0:batch_size, :, :, :]) ################### last_ii = 0 for ii in range(batch_size, samples_range - batch_size, batch_size): print(ii) adv_samples = attack.generate( xs[ii:ii + batch_size, :, :, :]) #################### adv = np.concatenate((adv, adv_samples), axis=0) last_ii = ii # The rest of the samples if last_ii + batch_size < xs.shape[0]: last_samples = xs[last_ii + batch_size:, :, :, :] adv_samples = attack.generate(last_samples) ################ adv = np.concatenate((adv, adv_samples), axis=0) adv = np.asarray(adv) return adv
def my_gen_UAP(self): num_i = self.X_images.shape[0] num_m = len(self.X_materials_paths) imshape = self.X_images[0].shape #print("\n Generating UAP ...") if self.targeted > 0: print(" *** targeted attack *** \n") adv_crafter = UniversalPerturbation( self.classifier, attacker='fgsm', delta=0.000001, attacker_params={"targeted":True, "eps":self.fgsm_eps}, max_iter=self.uap_iter, eps=self.norm_size, norm=self.norm_type) else: print(" *** non-targeted attack *** \n") adv_crafter = UniversalPerturbation( self.classifier, attacker='fgsm', delta=0.000001, attacker_params={"eps":self.fgsm_eps}, max_iter=self.uap_iter, eps=self.norm_size, norm=self.norm_type) LOG = [] X_materials_cnt = 0 noise = np.zeros(imshape) noise = noise.astype('float32') for i,path in enumerate(self.X_materials_paths): X_materials = np.load(path) X_materials_cnt += X_materials.shape[0] #if X_materials.shape[-1] != 3: #X_materials = self.my_gray_scale(images=X_materials) X_materials -= 128.0 # -1~+1正規化 X_materials /= 128.0 # UAPの生成 if self.targeted >= 0: Y_materials_tar = self.my_target_labels(length=X_materials.shape[0]) # targeted-attackの標的配列を生成 noise = adv_crafter.generate(X_materials, noise=noise, y=Y_materials_tar, targeted=True) else: noise = adv_crafter.generate(X_materials, noise=noise) # ノイズが一度も更新されなかった場合の対策 if type(adv_crafter.noise[0,:]) == int: noise = np.zeros(imshape) else: noise = np.copy(adv_crafter.noise) noise = np.reshape(noise, imshape) noise_random = self.my_randomized_noise(noise=noise) # ランダムノイズの生成 # 誤認識率の計算 fr_i = self.my_calc_fooling_ratio(images=self.X_images, noise=noise) # images+noiseの誤認識率 fr_m = self.my_calc_fooling_ratio(images=X_materials, noise=noise) # materials+noiseの誤認識率 fr_i_r = self.my_calc_fooling_ratio(images=self.X_images, noise=noise_random) fr_m_r = self.my_calc_fooling_ratio(images=X_materials, noise=noise_random) # 生成したUAPの大きさを計算 norm_2 = np.linalg.norm(noise) norm_inf = abs(noise).max() LOG.append([X_materials_cnt, norm_2, norm_inf, fr_i, fr_m, fr_i_r, fr_m_r]) #np.save(self.save_path+'_noise_{}'.format(i), noise) print("LOG: {} {:.2f} {:.2f} {:.2f} {:.2f} {:.2f} {:.2f}".format(X_materials_cnt, norm_2, norm_inf, fr_i, fr_m, fr_i_r, fr_m_r)) del(X_materials) # メモリ解放 np.save(self.save_path+'_noise', noise) np.save(self.save_path+'_LOG', np.array(LOG)) return noise, np.array(LOG)
def main(args): (x_train, y_train), (x_test, y_test), min_, max_ = load_dataset(str('cifar10')) x_train = np.swapaxes(x_train, 1, 3).astype(np.float32) x_test = np.swapaxes(x_test, 1, 3).astype(np.float32) x_train = x_train[:50] y_train = y_train[:50] model = VGG('VGG16') model.load_state_dict(torch.load("./logs/pytorch_vgg16.model")) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=1e-2) classifier = PyTorchClassifier(model=model, clip_values=(min_, max_), loss=criterion, optimizer=optimizer, input_shape=(3, 32, 32), nb_classes=10) predictions = classifier.predict(x_test) accuracy = np.sum( np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len( y_test) print('Accuracy on benign test examples: {}%'.format(accuracy * 100)) attack_params = { "attacker": "fgsm", # "attacker_params": { # "max_iter": 1000, # "epsilon": 0.02 # }, "delta": 0.01, "max_iter": 1000, "eps": 13.0 / 255.0, "norm": np.inf } # Craft attack on training examples adv_crafter = UniversalPerturbation(classifier, **attack_params) x_train_adv = adv_crafter.generate(x_train) # fooling rate on train set adv_crafter.fooling_rate # # Convergence adv_crafter.converged print('\nCraft attack train examples') # adv_crafter.v: vector (array) for perturbation # perturbation = adv_crafter.v[0, :] # universal perturbation perturbation = adv_crafter.noise x_train_adv = x_train + perturbation # randomized perturbation (control) # perturbation_rand = np.random.permutation(perturbation.reshape(32 * 32 * 3)).reshape(3, 32, 32) # x_train_adv_rand = x_train + perturbation_rand preds = np.argmax(classifier.predict(x_train), axis=1) preds_adv = np.argmax(classifier.predict(x_train_adv), axis=1) acc = np.sum(preds != preds_adv) / y_train.shape[0] # Fooling rate on train set (universal perturbation) print("\nFooling rate: %.2f%%" % (acc * 100))
model_type=args.model, mode='inference') # # Generate adversarial examples classifier, norm, eps = set_art(model=model, norm_str=args.norm, eps=args.eps, mean_l2_train=mean_l2_train, mean_linf_train=mean_linf_train) adv_crafter = UniversalPerturbation(classifier, attacker='fgsm', delta=0.000001, attacker_params={ 'targeted': False, 'eps': 0.0024 }, max_iter=15, eps=eps, norm=norm) _ = adv_crafter.generate(X_train) noise = adv_crafter.noise[0, :].astype(np.float32) base_f = 'nontargeted_{}_{}_eps{:.3f}'.format(args.model, args.norm, args.eps) save_f_noise = 'result/{}/noise/{}'.format(args.dataset, base_f) np.save(save_f_noise, noise) # # Evaluate the ART classifier on adversarial examples preds_train = np.argmax(classifier.predict(X_train), axis=1) preds_test = np.argmax(classifier.predict(X_test), axis=1)