def _test_backend_mnist(self, classifier): # Get MNIST (x_train, y_train), (x_test, y_test) = self.mnist # Test DeepFool attack = DeepFool(classifier, max_iter=5) x_test_adv = attack.generate(x_test) x_train_adv = attack.generate(x_train) self.assertFalse((x_train == x_train_adv).all()) self.assertFalse((x_test == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_train == train_y_pred).all()) self.assertFalse((y_test == test_y_pred).all()) acc = np.sum( np.argmax(train_y_pred, axis=1) == np.argmax( y_train, axis=1)) / y_train.shape[0] logger.info('Accuracy on adversarial train examples: %.2f%%', (acc * 100)) acc = np.sum( np.argmax(test_y_pred, axis=1) == np.argmax( y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on adversarial test examples: %.2f%%', (acc * 100))
def test_mnist(self): session = tf.Session() k.set_session(session) comp_params = {"loss": 'categorical_crossentropy', "optimizer": 'adam', "metrics": ['accuracy']} # get MNIST batch_size, nb_train, nb_test = 100, 1000, 11 (X_train, Y_train), (X_test, Y_test), _, _ = load_mnist() X_train, Y_train = X_train[:nb_train], Y_train[:nb_train] X_test, Y_test = X_test[:nb_test], Y_test[:nb_test] im_shape = X_train[0].shape # get classifier classifier = CNN(im_shape, act="relu") classifier.compile(comp_params) classifier.fit(X_train, Y_train, epochs=1, batch_size=batch_size, verbose=0) scores = classifier.evaluate(X_test, Y_test) print("\naccuracy on test set: %.2f%%" % (scores[1] * 100)) df = DeepFool(classifier, sess=session) df.set_params(clip_min=0., clip_max=1.) x_test_adv = df.generate(X_test) self.assertFalse((X_test == x_test_adv).all()) y_pred = classifier.predict(x_test_adv) self.assertFalse((Y_test == y_pred).all()) scores = classifier.evaluate(x_test_adv, Y_test) print('\naccuracy on adversarial examples: %.2f%%' % (scores[1] * 100))
def _test_backend_mnist(self, classifier): # Get MNIST (x_train, y_train), (x_test, y_test), _, _ = load_mnist() x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN] x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST] # Test DeepFool attack = DeepFool(classifier, max_iter=5) x_test_adv = attack.generate(x_test) x_train_adv = attack.generate(x_train) self.assertFalse((x_train == x_train_adv).all()) self.assertFalse((x_test == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_train == train_y_pred).all()) self.assertFalse((y_test == test_y_pred).all()) acc = np.sum( np.argmax(train_y_pred, axis=1) == np.argmax( y_train, axis=1)) / y_train.shape[0] print('\nAccuracy on adversarial train examples: %.2f%%' % (acc * 100)) acc = np.sum( np.argmax(test_y_pred, axis=1) == np.argmax( y_test, axis=1)) / y_test.shape[0] print('\nAccuracy on adversarial test examples: %.2f%%' % (acc * 100))
def test_multi_attack_mnist(self): """ Test the adversarial trainer using two attackers: FGSM and DeepFool. The source and target models of the attack are two CNNs on MNIST trained for 5 epochs. FGSM and DeepFool both generate the attack images on the same source classifier. The test cast check if accuracy on adversarial samples increases after adversarially training the model. :return: None """ session = tf.Session() k.set_session(session) # Load MNIST (x_train, y_train), (x_test, y_test), _, _ = load_dataset('mnist') x_train, y_train, x_test, y_test = x_train[: NB_TRAIN], y_train[: NB_TRAIN], x_test[: NB_TEST], y_test[: NB_TEST] im_shape = x_train[0].shape # Create and fit target classifier comp_params = { 'loss': 'categorical_crossentropy', 'optimizer': 'adam', 'metrics': ['accuracy'] } params = {'epochs': 5, 'batch_size': BATCH_SIZE} classifier_tgt = CNN(im_shape, dataset='mnist') classifier_tgt.compile(comp_params) classifier_tgt.fit(x_train, y_train, **params) # Create source classifier classifier_src = CNN(im_shape, dataset='mnist') classifier_src.compile(comp_params) classifier_tgt.fit(x_train, y_train, **params) # Create FGSM and DeepFool attackers adv1 = FastGradientMethod(classifier_src, session) adv2 = DeepFool(classifier_src, session) x_adv = np.vstack((adv1.generate(x_test), adv2.generate(x_test))) y_adv = np.vstack((y_test, y_test)) print(y_adv.shape) acc = classifier_tgt.evaluate(x_adv, y_adv) # Perform adversarial training adv_trainer = AdversarialTrainer(classifier_tgt, [adv1, adv2]) adv_trainer.fit(x_train, y_train, **params) # Evaluate that accuracy on adversarial sample has improved acc_adv_trained = adv_trainer.classifier.evaluate(x_adv, y_adv) self.assertTrue(acc_adv_trained >= acc)
def test_iris_pt(self): (_, _), (x_test, y_test) = self.iris classifier = get_iris_classifier_pt() attack = DeepFool(classifier, max_iter=5, batch_size=128) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info( 'Accuracy on Iris with DeepFool adversarial examples: %.2f%%', (acc * 100))
def test_partial_grads(self): # Get MNIST (_, _), (x_test, y_test) = self.mnist attack = DeepFool(self.classifier_k, max_iter=2, nb_grads=3) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) test_y_pred = get_labels_np_array( self.classifier_k.predict(x_test_adv)) self.assertFalse((y_test == test_y_pred).all()) acc = np.sum( np.argmax(test_y_pred, axis=1) == np.argmax( y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on adversarial test examples: %.2f%%', (acc * 100))
def test_iris_k_unbounded(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channel_index=1) attack = DeepFool(classifier, max_iter=5, batch_size=128) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info( 'Accuracy on Iris with DeepFool adversarial examples: %.2f%%', (acc * 100))
def GetAttackers(classifier, x_test, attacker_name): """ Function: Load classifier and generate adversarial samples """ t_start = time.time() if attacker_name == "FGSM": attacker = FastGradientMethod(classifier=classifier, eps=0.3) elif attacker_name == "Elastic": attacker = ElasticNet(classifier=classifier, confidence=0.5) elif attacker_name == "BasicIterativeMethod": attacker = BasicIterativeMethod(classifier=classifier, eps=0.3) elif attacker_name == "NewtonFool": attacker = NewtonFool(classifier=classifier, max_iter=20) elif attacker_name == "HopSkipJump": attacker = HopSkipJump(classifier=classifier, max_iter=20) elif attacker_name == "ZooAttack": attacker = ZooAttack(classifier=classifier, max_iter=20) elif attacker_name == "VirtualAdversarialMethod": attacker = VirtualAdversarialMethod(classifier=classifier, max_iter=20) elif attacker_name == "UniversalPerturbation": attacker = UniversalPerturbation(classifier=classifier, max_iter=20) elif attacker_name == "AdversarialPatch": attacker = AdversarialPatch(classifier=classifier, max_iter=20) elif attacker_name == "Attack": attacker = Attack(classifier=classifier) elif attacker_name == "BoundaryAttack": attacker = BoundaryAttack(classifier=classifier, targeted=False, epsilon=0.05, max_iter=20) #, max_iter=20 elif attacker_name == "CarliniL2": attacker = CarliniL2Method(classifier=classifier, confidence=0.5, learning_rate=0.001, max_iter=15) elif attacker_name == "CarliniLinf": attacker = CarliniLInfMethod(classifier=classifier, confidence=0.5, learning_rate=0.001, max_iter=15) elif attacker_name == "DeepFool": attacker = DeepFool(classifier) elif attacker_name == "SMM": attacker = SaliencyMapMethod(classifier=classifier, theta=2) elif attacker_name == "PGD": attacker = ProjectedGradientDescent(classifier=classifier, norm=2, eps=1, eps_step=0.5) else: raise ValueError("Please get the right attacker's name for the input.") test_adv = attacker.generate(x_test) dt = time.time() - t_start return test_adv, dt
classifier = KerasClassifier((0, 1), model, use_logits=False) return classifier # Get session session = tf.Session() k.set_session(session) # Read MNIST dataset (x_train, y_train), (x_test, y_test), min_, max_ = load_mnist() # Construct and train a convolutional neural network on MNIST using Keras source = cnn_mnist_k(x_train.shape[1:]) source.fit(x_train, y_train, nb_epochs=5, batch_size=128) # Craft adversarial samples with DeepFool adv_crafter = DeepFool(source) x_train_adv = adv_crafter.generate(x_train) x_test_adv = adv_crafter.generate(x_test) # Construct and train a convolutional neural network target = cnn_mnist_tf(x_train.shape[1:]) target.fit(x_train, y_train, nb_epochs=5, batch_size=128) # Evaluate the CNN on the adversarial samples preds = target.predict(x_test_adv) acc = np.sum(np.equal(np.argmax(preds, axis=1), np.argmax( y_test, axis=1))) / y_test.shape[0] print("\nAccuracy on adversarial samples: %.2f%%" % (acc * 100))
The `ratio` determines how many of the clean samples in each batch are replaced with their adversarial counterpart. warning: Both successful and unsuccessful adversarial samples are used for training. In the case of unbounded attacks (e.g., DeepFool), this can result in invalid (very noisy) samples being included. """ if adv_train_attack == "FGM": attacks = FastGradientMethod(robust_classifier, eps=attack_par["epsilon"], norm=attack_par["norm_type"]) elif adv_train_attack == "BIM": attacks = BasicIterativeMethod(robust_classifier, norm=attack_par["norm_type"], eps=attack_par["epsilon"], eps_step=attack_par["epsilon_step"], max_iter=attack_par["max_iteration"]) elif adv_train_attack == "PGD": attacks = ProjectedGradientDescent(robust_classifier, norm=attack_par["norm_type"], eps=attack_par["epsilon"], eps_step=attack_par["epsilon_step"], max_iter=attack_par["max_iteration"]) elif adv_train_attack == "JSMA": attacks = SaliencyMapMethod(robust_classifier, theta=attack_par["theta"], gamma=attack_par["gamma"]) elif adv_train_attack == "DeepFool": attacks = DeepFool(robust_classifier, max_iter=attack_par["max_iteration"], epsilon=attack_par["epsilon"]) # ==================================2-2.开始对抗训练 =============================== # trainer = AdversarialTrainer(robust_classifier, attacks, ratio=ratio_value) trainer.fit(x_train, y_train, nb_epochs=adv_train_num, batch_size=128, verbose=2) robust_classifier_model.save("./model/adv_model") end_time = time.time() model = load_model("./model/adv_model") scores = model.evaluate(x_test, y_test, verbose=0) # print('Test loss:', scores[0]) # print('Test accuracy:', scores[1]) # print("adv_model generation's timecost: " + str(end_time - begin_time)) print("adversarial_training completed!") os.system("echo " + data_type + "_" + model_type + ": " + str(end_time - begin_time) + " > ../evaluation/adv_time.txt") os.system("rm -r ./model/origin_model")
x_test_adv_robust = attacker_robust.generate(x_test[:100]) x_test_adv_robust_pred = np.argmax( robust_classifier.predict(x_test_adv_robust), axis=1) nb_correct_adv_robust_pred = np.sum(x_test_adv_robust_pred == true_label) print("Correctly classified against PGD attack: {}".format( nb_correct_adv_robust_pred)) # CW attacker_robust = cw(robust_classifier, targeted=False, batch_size=100) x_test_adv_robust = attacker_robust.generate(x_test[:100]) x_test_adv_robust_pred = np.argmax( robust_classifier.predict(x_test_adv_robust), axis=1) nb_correct_adv_robust_pred = np.sum(x_test_adv_robust_pred == true_label) print("Correctly classified against CW attack: {}".format( nb_correct_adv_robust_pred)) # DeepFool adv_crafter_df = DeepFool(robust_classifier) img_adv_df = adv_crafter_df.generate(x_test[0:100]) x_test_adv_robust_pred_df = np.argmax(robust_classifier.predict(img_adv_df), axis=1) nb_correct_adv_robust_pred_df = np.sum(x_test_adv_robust_pred_df == true_label) print("Correctly classified against DeepFool attack: {}".format( nb_correct_adv_robust_pred_df)) # Normal images original_model = load_model('saved_models/mnist_cnn_original.h5') # original classifier = KerasClassifier(clip_values=(0, 1), model=original_model, use_logits=False) x_test_pred = np.argmax(classifier.predict(x_test), axis=1) nb_correct_pred = np.sum( x_test_pred == np.argmax(y_test, axis=1)) / y_test.shape[0] * 100
def attack(self, model=None, attack_str=""): imgs = self._load_images(attack_str, self._test_or_val_dataset) if self._test_or_val_dataset == "_x_test_set_": X = self.__data.x_test Y = self.__data.y_test else: X = self.__data.x_val Y = self.__data.y_val if type(imgs) != type(None): print('\n{0} adversarial examples using {1} attack loaded...\n'. format(self.__dataset, self.__attack)) return imgs if type(model) == type(None): model = self.surrogate_model.fit(self.__data.x_train, self.__data.y_train, verbose=1, epochs=self.__epochs, batch_size=128) wrap = KerasClassifier((0., 1.), model=self.surrogate_model) else: wrap = KerasClassifier((0., 1.), model=model) if self.__attack == 'FGSM': print('\nCrafting adversarial examples using FGSM attack...\n') fgsm = FastGradientMethod(wrap) if self.__data.dataset_name == 'MNIST': x_adv_images = fgsm.generate(x=X[self.idx_adv][:self._length], eps=0.2) else: x_adv_images = fgsm.generate(x=X[self.idx_adv][:self._length], eps=0.025) path = os.path.join( self._attack_dir, self.__dataset.lower() + self._test_or_val_dataset + "fgsm.pkl") helpers.save_pkl(x_adv_images, path) elif self.__attack.startswith("CW"): print('\nCrafting adversarial examples using CW attack...\n') cw = CarliniL2Method(wrap, confidence=0.0, targeted=False, binary_search_steps=1, learning_rate=0.2, initial_const=10, max_iter=100) x_adv_images = cw.generate(X[self.idx_adv][:self._length]) path = os.path.join( self._attack_dir, self.__dataset.lower() + self._test_or_val_dataset + "cw.pkl") helpers.save_pkl(x_adv_images, path) elif self.__attack == 'BIM': print('\nCrafting adversarial examples using BIM attack...\n') if self.__dataset == 'MNIST': bim = BasicIterativeMethod(wrap, eps=0.25, eps_step=0.2, max_iter=100, norm=np.inf) if self.__dataset == 'CIFAR': bim = BasicIterativeMethod(wrap, eps=0.025, eps_step=0.01, max_iter=1000, norm=np.inf) x_adv_images = bim.generate(x=X[self.idx_adv][:self._length]) path = os.path.join( self._attack_dir, self.__dataset.lower() + self._test_or_val_dataset + "bim.pkl") helpers.save_pkl(x_adv_images, path) elif self.__attack == 'DEEPFOOL': print('\nCrafting adversarial examples using DeepFool attack...\n') deepfool = DeepFool(wrap) x_adv_images = deepfool.generate(x=X[self.idx_adv][:self._length]) path = os.path.join( self._attack_dir, self.__dataset.lower() + self._test_or_val_dataset + "deepfool.pkl") helpers.save_pkl(x_adv_images, path) return x_adv_images
'loss': 'categorical_crossentropy', 'optimizer': 'adam', 'metrics': ['accuracy'] } classifier = CNN(im_shape, act='relu', dataset='cifar10') classifier.compile(comp_params) classifier.fit(x_train, y_train, validation_split=.1, epochs=10, batch_size=128) # Craft adversarial samples with DeepFool print('Create DeepFool attack') epsilon = .1 # Maximum perturbation adv_crafter = DeepFool(classifier, sess=session) print('Craft training examples') x_train_adv = adv_crafter.generate(x_val=x_train, eps=epsilon, clip_min=min_, clip_max=max_) print('Craft test examples') x_test_adv = adv_crafter.generate(x_val=x_test, eps=epsilon, clip_min=min_, clip_max=max_) # Evaluate the classifier on the adversarial samples scores = classifier.evaluate(x_test, y_test) print("\nClassifier before adversarial training") print(
# FGSM adv_crafter_fgsm = FastGradientMethod(cifar_classifier, eps=epsilon, eps_step=0.01, batch_size=batch_size) x_test_adv = adv_crafter_fgsm.generate(x=test_dataset_array) # Test the classifier on adversarial exmaples predictions = cifar_classifier.predict(x_test_adv) accuracy = np.sum( np.argmax(predictions, axis=1) == test_label_dataset_array) / len( test_label_dataset_array) print('Accuracy after FGSM attack: {}%'.format(accuracy * 100)) # Deepfool adv_crafter_deepfool = DeepFool(cifar_classifier, batch_size=batch_size) x_test_adv = adv_crafter_deepfool.generate(x=test_dataset_array) predictions = cifar_classifier.predict(x_test_adv) accuracy = np.sum( np.argmax(predictions, axis=1) == test_label_dataset_array) / len( test_label_dataset_array) print('Accuracy after DeepFool attack: {}%'.format(accuracy * 100)) # C&W adv_crafter_cwinf = CarliniLInfMethod(cifar_classifier, eps=epsilon, batch_size=batch_size) x_test_adv = adv_crafter_cwinf.generate(x=test_dataset_array)
# Evaluate the classifier on the test set preds = np.argmax(classifier.predict(x_test), axis=1) acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0] print("\nTest accuracy: %.2f%%" % (acc * 100)) # Craft adversarial samples with FGSM epsilon = .1 # Maximum perturbation adv_crafter = FastGradientMethod(classifier) x_test_adv = adv_crafter.generate(x=x_test, eps=epsilon) print(x_test_adv.shape) local_path = "C:\\Users\\alonh\Documents\\Thesis\\MNIST-adversarial-images\\" np.save(local_path + "adv_img_list_FGSM.npy", x_test_adv) adv_crafter = DeepFool(classifier) x_test_adv_DeepFool = adv_crafter.generate(x_test) np.save(local_path + "adv_img_list_DeepFool.npy", x_test_adv_DeepFool) # Evaluate the classifier on the adversarial examples preds = np.argmax(classifier.predict(x_test_adv), axis=1) acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0] print("\nTest accuracy on adversarial sample: %.2f%%" % (acc * 100)) # reset and restore old variables local_path = "C:\\Users\\alonh\Documents\\Thesis\\MNIST-adversarial-images\\original-nn-data.ckpt" model_yaml = model.to_yaml() with open( "C:\\Users\\alonh\\Documents\\Thesis\\MNIST-adversarial-images\\model.yaml", "w") as yaml_file:
clip_max=max_) X_test_adv = adv_crafter.generate(x_val=x_test, eps=e, clip_min=min_, clip_max=max_) if args.save: np.save(os.path.join(SAVE_ADV, "eps%.2f_train.npy" % eps), X_train_adv) np.save(os.path.join(SAVE_ADV, "eps%.2f_test.npy" % eps), X_test_adv) else: if args.adv_method == 'deepfool': adv_crafter = DeepFool(classifier, session, clip_min=min_, clip_max=max_) elif args.adv_method == 'jsma': adv_crafter = SaliencyMapMethod(classifier, sess=session, clip_min=min_, clip_max=max_, gamma=1, theta=max_) elif args.adv_method == 'carlini': adv_crafter = CarliniL2Method(classifier, sess=session, targeted=False, confidence=10) else: adv_crafter = UniversalPerturbation(classifier,
def evaluation(x_test, y_test, classify_idx_lst, model, test_acc, ws, current_line, attack_name, flag, column_i): classifier = KerasClassifier((0., 1.), model=model) if attack_name == "FGM": # ===========================参数设置========================= # # Maximum perturbation # Order of the norm parameter_lst = [[10, 1], [20, 1], [30, 1], [40, 1], [50, 1], [60, 1], [70, 1], [80, 1], [90, 1], [100, 1], [1, 2], [2, 2], [3, 2], [4, 2], [5, 2], [6, 2], [7, 2], [8, 2], [9, 2], [10, 2], [0.05, np.inf], [0.10, np.inf], [0.15, np.inf], [0.20, np.inf], [0.25, np.inf], [0.30, np.inf], [0.35, np.inf], [0.40, np.inf], [0.45, np.inf], [0.50, np.inf]] # ===========================进行攻击========================= # for [epsilon, norm_type] in parameter_lst: # print("current parameter: " + str(epsilon) + ", " + str(norm_type)) adv_crafter = FastGradientMethod(classifier) x_test_adv = adv_crafter.generate(x=x_test[classify_idx_lst], eps=epsilon, norm=norm_type) score = model.evaluate(x_test_adv, y_test[classify_idx_lst], verbose=0) acc = score[1] ws.write(current_line, 0, attack_name) ws.write( current_line, 1, "(" + str(round(epsilon, 4)) + ", " + str(norm_type) + ")") if flag == "ori": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, acc) elif flag == "adv": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, acc) else: ws.write(current_line, 0 + 3 * column_i, test_acc) ws.write(current_line, 1 + 3 * column_i, acc) current_line += 1 elif attack_name == "BIM": # ===========================参数设置========================= # # Order of the norm # Maximum perturbation that the attacker can introduce # Attack step size (input variation) at each iteration # The maximum number of iterations. parameter_lst = [[1, 20.0, 2.0, 10], [1, 20.0, 4.0, 10], [1, 20.0, 6.0, 10], [1, 20.0, 8.0, 10], [1, 20.0, 10.0, 10], [1, 20.0, 2.0, 50], [1, 20.0, 4.0, 50], [1, 20.0, 6.0, 50], [1, 20.0, 8.0, 50], [1, 20.0, 10.0, 50], [2, 2.0, 0.2, 10], [2, 2.0, 0.4, 10], [2, 2.0, 0.6, 10], [2, 2.0, 0.8, 10], [2, 2.0, 1.0, 10], [2, 2.0, 0.2, 50], [2, 2.0, 0.4, 50], [2, 2.0, 0.6, 50], [2, 2.0, 0.8, 50], [2, 2.0, 1.0, 50], [np.inf, 0.1, 0.002, 10], [np.inf, 0.1, 0.004, 10], [np.inf, 0.1, 0.006, 10], [np.inf, 0.1, 0.008, 10], [np.inf, 0.1, 0.010, 10], [np.inf, 0.1, 0.002, 50], [np.inf, 0.1, 0.004, 50], [np.inf, 0.1, 0.006, 50], [np.inf, 0.1, 0.008, 50], [np.inf, 0.1, 0.010, 50]] # ===========================进行攻击========================= # for [norm_type, epsilon, epsilon_step, max_iteration] in parameter_lst: # print("current parameter: " + str(norm_type) + ", " + str(epsilon) + ", " + str(epsilon_step) + ", " + str( # max_iteration)) adv_crafter = BasicIterativeMethod(classifier) x_test_adv = adv_crafter.generate(x=x_test[classify_idx_lst], norm=norm_type, eps=epsilon, eps_step=epsilon_step, max_iter=max_iteration) score = model.evaluate(x_test_adv, y_test[classify_idx_lst], verbose=0) acc = score[1] ws.write(current_line, 0, attack_name) ws.write( current_line, 1, "(" + str(norm_type) + ", " + str(round(epsilon, 4)) + ", " + str(round(epsilon_step, 4)) + ", " + str(max_iteration) + ")") if flag == "ori": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, acc) elif flag == "adv": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, acc) else: ws.write(current_line, 0 + 3 * column_i, test_acc) ws.write(current_line, 1 + 3 * column_i, acc) current_line += 1 elif attack_name == "JSMA": # ===========================参数设置========================= # # Perturbation introduced to each modified feature per step (can be positive or negative). # Maximum percentage of perturbed features (between 0 and 1). parameter_lst = [[0.5, 0.5], [0.4, 0.5], [0.3, 0.5], [0.2, 0.5], [0.1, 0.5], [-0.1, 0.5], [-0.2, 0.5], [-0.3, 0.5], [-0.4, 0.5], [-0.5, 0.5]] # ===========================进行攻击========================= # for [theta, gamma] in parameter_lst: # print("current parameter: " + str(theta) + ", " + str(gamma)) adv_crafter = SaliencyMapMethod(classifier) x_test_adv = adv_crafter.generate(x=x_test[classify_idx_lst], theta=theta, gamma=gamma) score = model.evaluate(x_test_adv, y_test[classify_idx_lst], verbose=0) acc = score[1] ws.write(current_line, 0, attack_name) ws.write( current_line, 1, "(" + str(round(theta, 4)) + ", " + str(round(gamma, 4)) + ")") if flag == "ori": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, acc) elif flag == "adv": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, acc) else: ws.write(current_line, 0 + 3 * column_i, test_acc) ws.write(current_line, 1 + 3 * column_i, acc) current_line += 1 elif attack_name == "DeepFool": # ===========================参数设置========================= # # The maximum number of iterations. # Overshoot parameter. parameter_lst = [[2, 0.10], [4, 0.10], [6, 0.10], [8, 0.10], [10, 0.10], [12, 0.10], [14, 0.10], [16, 0.10], [18, 0.10], [20, 0.10]] # ===========================进行攻击========================= # for [max_iteration, epsilon] in parameter_lst: # print("current parameter: " + str(max_iteration) + ", " + str(epsilon)) adv_crafter = DeepFool(classifier) x_test_adv = adv_crafter.generate(x=x_test[classify_idx_lst], max_iter=max_iteration, epsilon=epsilon) score = model.evaluate(x_test_adv, y_test[classify_idx_lst], verbose=0) acc = score[1] ws.write(current_line, 0, attack_name) ws.write( current_line, 1, "(" + str(max_iteration) + ", " + str(round(epsilon, 4)) + ")") if flag == "ori": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, acc) elif flag == "adv": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, acc) else: ws.write(current_line, 0 + 3 * column_i, test_acc) ws.write(current_line, 1 + 3 * column_i, acc) current_line += 1 elif attack_name == "CW-L2": # ===========================参数设置========================= # # confidence: Confidence of adversarial examples: a higher value produces examples that are farther away, # from the original input, but classified with higher confidence as the target class. # The maximum number of iterations. parameter_lst = [[0, 1], [0, 2], [0, 3], [0, 4], [0, 5]] # ===========================进行攻击========================= # for [confidence_value, max_iter_value] in parameter_lst: # print("current parameter: " + str(confidence_value) + ", " + str(max_iter_value)) adv_crafter = CarliniL2Method(classifier) sum_adv_acc = 0 for adv_label in range(0, 10): one_hot_label = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] one_hot_label[adv_label] = 1 x_test_adv = adv_crafter.generate( x=x_test[classify_idx_lst], confidence=confidence_value, targeted=True, max_iter=max_iter_value, y=np.array([one_hot_label] * x_test[classify_idx_lst].shape[0])) score = model.evaluate(x_test_adv, y_test[classify_idx_lst], verbose=0) acc = score[1] sum_adv_acc += acc ws.write(current_line, 0, attack_name) ws.write( current_line, 1, "(" + str(round(confidence_value, 4)) + ", " + str(max_iter_value) + ")") if flag == "ori": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, sum_adv_acc / 10) elif flag == "adv": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, sum_adv_acc / 10) else: ws.write(current_line, 0 + 3 * column_i, test_acc) ws.write(current_line, 1 + 3 * column_i, sum_adv_acc / 10) current_line += 1 elif attack_name == "CW-Linf": # ===========================参数设置========================= # # confidence: Confidence of adversarial examples: a higher value produces examples that are farther away, # from the original input, but classified with higher confidence as the target class. # The maximum number of iterations. parameter_lst = [[0, 1], [0, 2], [0, 3], [0, 4], [0, 5]] # ===========================进行攻击========================= # for [confidence_value, max_iter_value] in parameter_lst: # print("current parameter: " + str(confidence_value) + ", " + str(max_iter_value)) adv_crafter = CarliniLInfMethod(classifier) sum_adv_acc = 0 for adv_label in range(0, 10): one_hot_label = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] one_hot_label[adv_label] = 1 x_test_adv = adv_crafter.generate( x=x_test[classify_idx_lst], confidence=confidence_value, targeted=True, max_iter=max_iter_value, y=np.array([one_hot_label] * x_test[classify_idx_lst].shape[0])) score = model.evaluate(x_test_adv, y_test[classify_idx_lst], verbose=0) acc = score[1] sum_adv_acc += acc ws.write(current_line, 0, attack_name) ws.write( current_line, 1, "(" + str(round(confidence_value, 4)) + ", " + str(max_iter_value) + ")") if flag == "ori": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, sum_adv_acc / 10) elif flag == "adv": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, sum_adv_acc / 10) else: ws.write(current_line, 0 + 3 * column_i, test_acc) ws.write(current_line, 1 + 3 * column_i, sum_adv_acc / 10) current_line += 1 current_line += 1 # print("\n------------------------------------------------") return ws, current_line
# Read MNIST dataset (x_train, y_train), (x_test, y_test), min_, max_ = load_mnist() # Construct and train a convolutional neural network on MNIST using Keras source = cnn_mnist_k() source.compile(loss=keras.losses.categorical_crossentropy, optimizer=Adam(lr=0.01), metrics=['accuracy']) source = KerasClassifier(clip_values=(min_, max_), model=source, use_logits=False) source.fit(x_train, y_train, nb_epochs=5, batch_size=128) # Craft adversarial samples with DeepFool adv_crafter = DeepFool(source) x_test_adv = adv_crafter.generate(x_test) # Compare with existing Adversarial Training (from ART) robust_classifier = load_model('saved_models/mnist_cnn_robust.h5') robust_classifier = KerasClassifier(clip_values=(0, 1), model=robust_classifier, use_logits=False) print('compare_transfer.py for mnist dataset v2') print('based on inf norm') # Normal images original_model = load_model('saved_models/mnist_cnn_original.h5') # original classifier = KerasClassifier(clip_values=(0, 1), model=original_model, use_logits=False)
if (preds_pgd_random[i] != preds_pgd[i]): TP_comb_pgd = TP_comb_pgd + 1 else: TP_comb_pgd = TP_comb_pgd + Tpgd TPR_pgd_random = TP_pgd_random / adv_sample_cw print("\nTPR for PGD when random noise is added: %.3f%%" % (TPR_pgd_random * 100)) TPR_pgd_comb = TP_comb_pgd / adv_sample_cw TPR_pgd = TP_pgd / adv_sample_cw print("\nTPR for PGD: %.3f%%" % (TPR_pgd * 100)) print("\nTPR for PGD when combining: %.3f%%" % (TPR_pgd_comb * 100)) # ============================================================================= # # Craft adversarial samples using DeepFool # check # ============================================================================= attack_DeepFool = DeepFool(classifier) x_test_adv_df = attack_DeepFool.generate( x=x_test[3 * adv_sample:3 * adv_sample + adv_sample_cw]) # Evaluate the classifier on the adversarial examples # add test image noise x_test_adv_df_random = x_test_adv_df + np.random.normal( mean, 0.01, x_test_adv_df.shape) preds_df_random = np.argmax(classifier.predict(x_test_adv_df_random), axis=1) preds_df = np.argmax(classifier.predict(x_test_adv_df), axis=1) y_adv = y_test[3 * adv_sample:3 * adv_sample + adv_sample_cw] TP_df_random = 0 TP_df = 0 TP_comb_df = 0 for i in np.arange(adv_sample_cw): diff_random = x_test[i + 3 * adv_sample] - x_test_adv_df_random[i]