def _test_backend_mnist(self, classifier): # Get MNIST (x_train, y_train), (x_test, y_test), _, _ = load_mnist() x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN] x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST] # Test DeepFool attack = DeepFool(classifier, max_iter=5) x_test_adv = attack.generate(x_test) x_train_adv = attack.generate(x_train) self.assertFalse((x_train == x_train_adv).all()) self.assertFalse((x_test == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_train == train_y_pred).all()) self.assertFalse((y_test == test_y_pred).all()) acc = np.sum( np.argmax(train_y_pred, axis=1) == np.argmax( y_train, axis=1)) / y_train.shape[0] print('\nAccuracy on adversarial train examples: %.2f%%' % (acc * 100)) acc = np.sum( np.argmax(test_y_pred, axis=1) == np.argmax( y_test, axis=1)) / y_test.shape[0] print('\nAccuracy on adversarial test examples: %.2f%%' % (acc * 100))
def _test_backend_mnist(self, classifier): # Get MNIST (x_train, y_train), (x_test, y_test) = self.mnist # Test DeepFool attack = DeepFool(classifier, max_iter=5) x_test_adv = attack.generate(x_test) x_train_adv = attack.generate(x_train) self.assertFalse((x_train == x_train_adv).all()) self.assertFalse((x_test == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_train == train_y_pred).all()) self.assertFalse((y_test == test_y_pred).all()) acc = np.sum( np.argmax(train_y_pred, axis=1) == np.argmax( y_train, axis=1)) / y_train.shape[0] logger.info('Accuracy on adversarial train examples: %.2f%%', (acc * 100)) acc = np.sum( np.argmax(test_y_pred, axis=1) == np.argmax( y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on adversarial test examples: %.2f%%', (acc * 100))
def test_mnist(self): session = tf.Session() k.set_session(session) comp_params = {"loss": 'categorical_crossentropy', "optimizer": 'adam', "metrics": ['accuracy']} # get MNIST batch_size, nb_train, nb_test = 100, 1000, 11 (X_train, Y_train), (X_test, Y_test), _, _ = load_mnist() X_train, Y_train = X_train[:nb_train], Y_train[:nb_train] X_test, Y_test = X_test[:nb_test], Y_test[:nb_test] im_shape = X_train[0].shape # get classifier classifier = CNN(im_shape, act="relu") classifier.compile(comp_params) classifier.fit(X_train, Y_train, epochs=1, batch_size=batch_size, verbose=0) scores = classifier.evaluate(X_test, Y_test) print("\naccuracy on test set: %.2f%%" % (scores[1] * 100)) df = DeepFool(classifier, sess=session) df.set_params(clip_min=0., clip_max=1.) x_test_adv = df.generate(X_test) self.assertFalse((X_test == x_test_adv).all()) y_pred = classifier.predict(x_test_adv) self.assertFalse((Y_test == y_pred).all()) scores = classifier.evaluate(x_test_adv, Y_test) print('\naccuracy on adversarial examples: %.2f%%' % (scores[1] * 100))
def test_multi_attack_mnist(self): """ Test the adversarial trainer using two attackers: FGSM and DeepFool. The source and target models of the attack are two CNNs on MNIST trained for 5 epochs. FGSM and DeepFool both generate the attack images on the same source classifier. The test cast check if accuracy on adversarial samples increases after adversarially training the model. :return: None """ session = tf.Session() k.set_session(session) # Load MNIST (x_train, y_train), (x_test, y_test), _, _ = load_dataset('mnist') x_train, y_train, x_test, y_test = x_train[: NB_TRAIN], y_train[: NB_TRAIN], x_test[: NB_TEST], y_test[: NB_TEST] im_shape = x_train[0].shape # Create and fit target classifier comp_params = { 'loss': 'categorical_crossentropy', 'optimizer': 'adam', 'metrics': ['accuracy'] } params = {'epochs': 5, 'batch_size': BATCH_SIZE} classifier_tgt = CNN(im_shape, dataset='mnist') classifier_tgt.compile(comp_params) classifier_tgt.fit(x_train, y_train, **params) # Create source classifier classifier_src = CNN(im_shape, dataset='mnist') classifier_src.compile(comp_params) classifier_tgt.fit(x_train, y_train, **params) # Create FGSM and DeepFool attackers adv1 = FastGradientMethod(classifier_src, session) adv2 = DeepFool(classifier_src, session) x_adv = np.vstack((adv1.generate(x_test), adv2.generate(x_test))) y_adv = np.vstack((y_test, y_test)) print(y_adv.shape) acc = classifier_tgt.evaluate(x_adv, y_adv) # Perform adversarial training adv_trainer = AdversarialTrainer(classifier_tgt, [adv1, adv2]) adv_trainer.fit(x_train, y_train, **params) # Evaluate that accuracy on adversarial sample has improved acc_adv_trained = adv_trainer.classifier.evaluate(x_adv, y_adv) self.assertTrue(acc_adv_trained >= acc)
def test_iris_pt(self): (_, _), (x_test, y_test) = self.iris classifier = get_iris_classifier_pt() attack = DeepFool(classifier, max_iter=5, batch_size=128) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info( 'Accuracy on Iris with DeepFool adversarial examples: %.2f%%', (acc * 100))
def test_partial_grads(self): # Get MNIST (_, _), (x_test, y_test) = self.mnist attack = DeepFool(self.classifier_k, max_iter=2, nb_grads=3) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) test_y_pred = get_labels_np_array( self.classifier_k.predict(x_test_adv)) self.assertFalse((y_test == test_y_pred).all()) acc = np.sum( np.argmax(test_y_pred, axis=1) == np.argmax( y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on adversarial test examples: %.2f%%', (acc * 100))
def test_iris_k_unbounded(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channel_index=1) attack = DeepFool(classifier, max_iter=5, batch_size=128) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info( 'Accuracy on Iris with DeepFool adversarial examples: %.2f%%', (acc * 100))
x_test_adv_robust_pred = np.argmax( robust_classifier.predict(x_test_adv_robust), axis=1) nb_correct_adv_robust_pred = np.sum(x_test_adv_robust_pred == true_label) print("Correctly classified against PGD attack: {}".format( nb_correct_adv_robust_pred)) # CW attacker_robust = cw(robust_classifier, targeted=False, batch_size=100) x_test_adv_robust = attacker_robust.generate(x_test[:100]) x_test_adv_robust_pred = np.argmax( robust_classifier.predict(x_test_adv_robust), axis=1) nb_correct_adv_robust_pred = np.sum(x_test_adv_robust_pred == true_label) print("Correctly classified against CW attack: {}".format( nb_correct_adv_robust_pred)) # DeepFool adv_crafter_df = DeepFool(robust_classifier) img_adv_df = adv_crafter_df.generate(x_test[0:100]) x_test_adv_robust_pred_df = np.argmax(robust_classifier.predict(img_adv_df), axis=1) nb_correct_adv_robust_pred_df = np.sum(x_test_adv_robust_pred_df == true_label) print("Correctly classified against DeepFool attack: {}".format( nb_correct_adv_robust_pred_df)) # Normal images original_model = load_model('saved_models/mnist_cnn_original.h5') # original classifier = KerasClassifier(clip_values=(0, 1), model=original_model, use_logits=False) x_test_pred = np.argmax(classifier.predict(x_test), axis=1) nb_correct_pred = np.sum( x_test_pred == np.argmax(y_test, axis=1)) / y_test.shape[0] * 100 print("Test accuracy for normal instances: %.1f%%" % (nb_correct_pred))
def attack(self, model=None, attack_str=""): imgs = self._load_images(attack_str, self._test_or_val_dataset) if self._test_or_val_dataset == "_x_test_set_": X = self.__data.x_test Y = self.__data.y_test else: X = self.__data.x_val Y = self.__data.y_val if type(imgs) != type(None): print('\n{0} adversarial examples using {1} attack loaded...\n'. format(self.__dataset, self.__attack)) return imgs if type(model) == type(None): model = self.surrogate_model.fit(self.__data.x_train, self.__data.y_train, verbose=1, epochs=self.__epochs, batch_size=128) wrap = KerasClassifier((0., 1.), model=self.surrogate_model) else: wrap = KerasClassifier((0., 1.), model=model) if self.__attack == 'FGSM': print('\nCrafting adversarial examples using FGSM attack...\n') fgsm = FastGradientMethod(wrap) if self.__data.dataset_name == 'MNIST': x_adv_images = fgsm.generate(x=X[self.idx_adv][:self._length], eps=0.2) else: x_adv_images = fgsm.generate(x=X[self.idx_adv][:self._length], eps=0.025) path = os.path.join( self._attack_dir, self.__dataset.lower() + self._test_or_val_dataset + "fgsm.pkl") helpers.save_pkl(x_adv_images, path) elif self.__attack.startswith("CW"): print('\nCrafting adversarial examples using CW attack...\n') cw = CarliniL2Method(wrap, confidence=0.0, targeted=False, binary_search_steps=1, learning_rate=0.2, initial_const=10, max_iter=100) x_adv_images = cw.generate(X[self.idx_adv][:self._length]) path = os.path.join( self._attack_dir, self.__dataset.lower() + self._test_or_val_dataset + "cw.pkl") helpers.save_pkl(x_adv_images, path) elif self.__attack == 'BIM': print('\nCrafting adversarial examples using BIM attack...\n') if self.__dataset == 'MNIST': bim = BasicIterativeMethod(wrap, eps=0.25, eps_step=0.2, max_iter=100, norm=np.inf) if self.__dataset == 'CIFAR': bim = BasicIterativeMethod(wrap, eps=0.025, eps_step=0.01, max_iter=1000, norm=np.inf) x_adv_images = bim.generate(x=X[self.idx_adv][:self._length]) path = os.path.join( self._attack_dir, self.__dataset.lower() + self._test_or_val_dataset + "bim.pkl") helpers.save_pkl(x_adv_images, path) elif self.__attack == 'DEEPFOOL': print('\nCrafting adversarial examples using DeepFool attack...\n') deepfool = DeepFool(wrap) x_adv_images = deepfool.generate(x=X[self.idx_adv][:self._length]) path = os.path.join( self._attack_dir, self.__dataset.lower() + self._test_or_val_dataset + "deepfool.pkl") helpers.save_pkl(x_adv_images, path) return x_adv_images
} classifier = CNN(im_shape, act='relu', dataset='cifar10') classifier.compile(comp_params) classifier.fit(x_train, y_train, validation_split=.1, epochs=10, batch_size=128) # Craft adversarial samples with DeepFool print('Create DeepFool attack') epsilon = .1 # Maximum perturbation adv_crafter = DeepFool(classifier, sess=session) print('Craft training examples') x_train_adv = adv_crafter.generate(x_val=x_train, eps=epsilon, clip_min=min_, clip_max=max_) print('Craft test examples') x_test_adv = adv_crafter.generate(x_val=x_test, eps=epsilon, clip_min=min_, clip_max=max_) # Evaluate the classifier on the adversarial samples scores = classifier.evaluate(x_test, y_test) print("\nClassifier before adversarial training") print( "\nLoss on adversarial samples: %.2f%%\nAccuracy on adversarial samples: %.2f%%" % (scores[0], scores[1] * 100)) # Data augmentation: expand the training set with the adversarial samples
adv_crafter_fgsm = FastGradientMethod(cifar_classifier, eps=epsilon, eps_step=0.01, batch_size=batch_size) x_test_adv = adv_crafter_fgsm.generate(x=test_dataset_array) # Test the classifier on adversarial exmaples predictions = cifar_classifier.predict(x_test_adv) accuracy = np.sum( np.argmax(predictions, axis=1) == test_label_dataset_array) / len( test_label_dataset_array) print('Accuracy after FGSM attack: {}%'.format(accuracy * 100)) # Deepfool adv_crafter_deepfool = DeepFool(cifar_classifier, batch_size=batch_size) x_test_adv = adv_crafter_deepfool.generate(x=test_dataset_array) predictions = cifar_classifier.predict(x_test_adv) accuracy = np.sum( np.argmax(predictions, axis=1) == test_label_dataset_array) / len( test_label_dataset_array) print('Accuracy after DeepFool attack: {}%'.format(accuracy * 100)) # C&W adv_crafter_cwinf = CarliniLInfMethod(cifar_classifier, eps=epsilon, batch_size=batch_size) x_test_adv = adv_crafter_cwinf.generate(x=test_dataset_array) predictions = cifar_classifier.predict(x_test_adv)
# Read MNIST dataset (x_train, y_train), (x_test, y_test), min_, max_ = load_mnist() # Construct and train a convolutional neural network on MNIST using Keras source = cnn_mnist_k() source.compile(loss=keras.losses.categorical_crossentropy, optimizer=Adam(lr=0.01), metrics=['accuracy']) source = KerasClassifier(clip_values=(min_, max_), model=source, use_logits=False) source.fit(x_train, y_train, nb_epochs=5, batch_size=128) # Craft adversarial samples with DeepFool adv_crafter = DeepFool(source) x_test_adv = adv_crafter.generate(x_test) # Compare with existing Adversarial Training (from ART) robust_classifier = load_model('saved_models/mnist_cnn_robust.h5') robust_classifier = KerasClassifier(clip_values=(0, 1), model=robust_classifier, use_logits=False) print('compare_transfer.py for mnist dataset v2') print('based on inf norm') # Normal images original_model = load_model('saved_models/mnist_cnn_original.h5') # original classifier = KerasClassifier(clip_values=(0, 1), model=original_model, use_logits=False)
classifier = KerasClassifier((0, 1), model, use_logits=False) return classifier # Get session session = tf.Session() k.set_session(session) # Read MNIST dataset (x_train, y_train), (x_test, y_test), min_, max_ = load_mnist() # Construct and train a convolutional neural network on MNIST using Keras source = cnn_mnist_k(x_train.shape[1:]) source.fit(x_train, y_train, nb_epochs=5, batch_size=128) # Craft adversarial samples with DeepFool adv_crafter = DeepFool(source) x_train_adv = adv_crafter.generate(x_train) x_test_adv = adv_crafter.generate(x_test) # Construct and train a convolutional neural network target = cnn_mnist_tf(x_train.shape[1:]) target.fit(x_train, y_train, nb_epochs=5, batch_size=128) # Evaluate the CNN on the adversarial samples preds = target.predict(x_test_adv) acc = np.sum(np.equal(np.argmax(preds, axis=1), np.argmax( y_test, axis=1))) / y_test.shape[0] print("\nAccuracy on adversarial samples: %.2f%%" % (acc * 100))
TP_comb_pgd = TP_comb_pgd + 1 else: TP_comb_pgd = TP_comb_pgd + Tpgd TPR_pgd_random = TP_pgd_random / adv_sample_cw print("\nTPR for PGD when random noise is added: %.3f%%" % (TPR_pgd_random * 100)) TPR_pgd_comb = TP_comb_pgd / adv_sample_cw TPR_pgd = TP_pgd / adv_sample_cw print("\nTPR for PGD: %.3f%%" % (TPR_pgd * 100)) print("\nTPR for PGD when combining: %.3f%%" % (TPR_pgd_comb * 100)) # ============================================================================= # # Craft adversarial samples using DeepFool # check # ============================================================================= attack_DeepFool = DeepFool(classifier) x_test_adv_df = attack_DeepFool.generate( x=x_test[3 * adv_sample:3 * adv_sample + adv_sample_cw]) # Evaluate the classifier on the adversarial examples # add test image noise x_test_adv_df_random = x_test_adv_df + np.random.normal( mean, 0.01, x_test_adv_df.shape) preds_df_random = np.argmax(classifier.predict(x_test_adv_df_random), axis=1) preds_df = np.argmax(classifier.predict(x_test_adv_df), axis=1) y_adv = y_test[3 * adv_sample:3 * adv_sample + adv_sample_cw] TP_df_random = 0 TP_df = 0 TP_comb_df = 0 for i in np.arange(adv_sample_cw): diff_random = x_test[i + 3 * adv_sample] - x_test_adv_df_random[i] diff_random = diff_random.reshape((28, 28)) perturbation_random = norm(diff_random) / 28