def test_two_attacks(self): (x_train, y_train), (x_test, y_test) = self.mnist x_test_original = x_test.copy() attack1 = FastGradientMethod(classifier=self.classifier, batch_size=16) attack2 = DeepFool(classifier=self.classifier, max_iter=5, batch_size=16) x_test_adv = attack1.generate(x_test) predictions = np.argmax(self.classifier.predict(x_test_adv), axis=1) accuracy = np.sum(predictions == np.argmax(y_test, axis=1)) / NB_TEST adv_trainer = AdversarialTrainer(self.classifier, attacks=[attack1, attack2]) adv_trainer.fit(x_train, y_train, nb_epochs=2, batch_size=16) predictions_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1) accuracy_new = np.sum( predictions_new == np.argmax(y_test, axis=1)) / NB_TEST self.assertEqual(accuracy_new, 0.36) self.assertEqual(accuracy, 0.13) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
def test_two_attacks_with_generator(self): (x_train, y_train), (x_test, y_test) = self.mnist x_train_original = x_train.copy() class MyDataGenerator(DataGenerator): def __init__(self, x, y, size, batch_size): self.x = x self.y = y self.size = size self.batch_size = batch_size def get_batch(self): ids = np.random.choice(self.size, size=min(self.size, self.batch_size), replace=False) return self.x[ids], self.y[ids] generator = MyDataGenerator(x_train, y_train, x_train.shape[0], 128) attack1 = FastGradientMethod(self.classifier_k) attack2 = DeepFool(self.classifier_tf) x_test_adv = attack1.generate(x_test) preds = np.argmax(self.classifier_k.predict(x_test_adv), axis=1) acc = np.sum(preds == np.argmax(y_test, axis=1)) / NB_TEST adv_trainer = AdversarialTrainer(self.classifier_k, attacks=[attack1, attack2]) adv_trainer.fit_generator(generator, nb_epochs=5) preds_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1) acc_new = np.sum(preds_new == np.argmax(y_test, axis=1)) / NB_TEST # No reason to assert the newer accuracy is higher. It might go down slightly self.assertGreaterEqual(acc_new, acc * ACCURACY_DROP) logger.info('Accuracy before adversarial training: %.2f%%', (acc * 100)) logger.info('\nAccuracy after adversarial training: %.2f%%', (acc_new * 100)) # Finally assert that the original training data hasn't changed: self.assertTrue((x_train == x_train_original).all())
def test_two_attacks_with_generator(self): (x_train, y_train), (x_test, y_test) = self.mnist x_train_original = x_train.copy() x_test_original = x_test.copy() class MyDataGenerator(DataGenerator): def __init__(self, x, y, size, batch_size): super().__init__(size=size, batch_size=batch_size) self.x = x self.y = y self._size = size self._batch_size = batch_size def get_batch(self): ids = np.random.choice(self.size, size=min(self.size, self.batch_size), replace=False) return self.x[ids], self.y[ids] generator = MyDataGenerator(x_train, y_train, size=x_train.shape[0], batch_size=16) attack1 = FastGradientMethod(classifier=self.classifier, batch_size=16) attack2 = DeepFool(classifier=self.classifier, max_iter=5, batch_size=16) x_test_adv = attack1.generate(x_test) predictions = np.argmax(self.classifier.predict(x_test_adv), axis=1) accuracy = np.sum(predictions == np.argmax(y_test, axis=1)) / NB_TEST adv_trainer = AdversarialTrainer(self.classifier, attacks=[attack1, attack2]) adv_trainer.fit_generator(generator, nb_epochs=3) predictions_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1) accuracy_new = np.sum( predictions_new == np.argmax(y_test, axis=1)) / NB_TEST self.assertAlmostEqual(accuracy_new, 0.25, delta=0.02) self.assertAlmostEqual(accuracy, 0.11, delta=0.0) # Check that x_train and x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_train_original - x_train))), 0.0, delta=0.00001) self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
def test_classifier_match(self): attack = FastGradientMethod(self.classifier_k) adv_trainer = AdversarialTrainer(self.classifier_k, attack) self.assertEqual(len(adv_trainer.attacks), 1) self.assertEqual(adv_trainer.attacks[0].classifier, adv_trainer.classifier)
def test_transfer(self): (x_train, y_train), (x_test, y_test) = self.mnist attack = DeepFool(self.classifier_tf) x_test_adv = attack.generate(x_test) preds = np.argmax(self.classifier_k.predict(x_test_adv), axis=1) acc = np.sum(preds == np.argmax(y_test, axis=1)) / NB_TEST adv_trainer = AdversarialTrainer(self.classifier_k, attack) adv_trainer.fit(x_train, y_train, nb_epochs=2, batch_size=6) preds_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1) acc_new = np.sum(preds_new == np.argmax(y_test, axis=1)) / NB_TEST self.assertGreaterEqual(acc_new, acc * ACCURACY_DROP) logger.info('Accuracy before adversarial training: %.2f%%', (acc * 100)) logger.info('Accuracy after adversarial training: %.2f%%', (acc_new * 100))
def test_fit_predict(self): (x_train, y_train), (x_test, y_test) = self.mnist attack = FastGradientMethod(self.classifier_k) x_test_adv = attack.generate(x_test) preds = np.argmax(self.classifier_k.predict(x_test_adv), axis=1) acc = np.sum(preds == np.argmax(y_test, axis=1)) / NB_TEST adv_trainer = AdversarialTrainer(self.classifier_k, attack) adv_trainer.fit(x_train, y_train, nb_epochs=5, batch_size=128) preds_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1) acc_new = np.sum(preds_new == np.argmax(y_test, axis=1)) / NB_TEST self.assertGreaterEqual(acc_new, acc * accuracy_drop) print('\nAccuracy before adversarial training: %.2f%%' % (acc * 100)) print('\nAccuracy after adversarial training: %.2f%%' % (acc_new * 100))
def test_two_attacks(self): (x_train, y_train), (x_test, y_test) = self.mnist attack1 = FastGradientMethod(self.classifier_k) attack2 = DeepFool(self.classifier_tf) x_test_adv = attack1.generate(x_test) preds = np.argmax(self.classifier_k.predict(x_test_adv), axis=1) acc = np.sum(preds == np.argmax(y_test, axis=1)) / NB_TEST adv_trainer = AdversarialTrainer(self.classifier_k, attacks=[attack1, attack2]) adv_trainer.fit(x_train, y_train, nb_epochs=5, batch_size=128) preds_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1) acc_new = np.sum(preds_new == np.argmax(y_test, axis=1)) / NB_TEST # No reason to assert the newer accuracy is higher. It might go down slightly self.assertGreaterEqual(acc_new, acc * ACCURACY_DROP) logger.info('Accuracy before adversarial training: %.2f%%', (acc * 100)) logger.info('\nAccuracy after adversarial training: %.2f%%', (acc_new * 100))
def test_transfer(self): (x_train, y_train), (x_test, y_test) = self.mnist x_test_original = x_test.copy() attack = DeepFool(self.classifier_tf) x_test_adv = attack.generate(x_test) preds = np.argmax(self.classifier_k.predict(x_test_adv), axis=1) acc = np.sum(preds == np.argmax(y_test, axis=1)) / NB_TEST adv_trainer = AdversarialTrainer(self.classifier_k, attack) adv_trainer.fit(x_train, y_train, nb_epochs=2, batch_size=6) preds_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1) acc_new = np.sum(preds_new == np.argmax(y_test, axis=1)) / NB_TEST self.assertGreaterEqual(acc_new, acc * ACCURACY_DROP) logger.info('Accuracy before adversarial training: %.2f%%', (acc * 100)) logger.info('Accuracy after adversarial training: %.2f%%', (acc_new * 100)) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
def test_targeted_attack_error(self): """ Test the adversarial trainer using a targeted attack, which will currently result in a NotImplementError. :return: None """ (x_train, y_train), (_, _) = self.mnist params = {"nb_epochs": 2, "batch_size": BATCH_SIZE} adv = FastGradientMethod(self.classifier, targeted=True) adv_trainer = AdversarialTrainer(self.classifier, attacks=adv) self.assertRaises(NotImplementedError, adv_trainer.fit, x_train, y_train, **params)
def test_two_attacks(self): (x_train, y_train), (x_test, y_test) = self.mnist x_test_original = x_test.copy() attack1 = FastGradientMethod(self.classifier_k) attack2 = DeepFool(self.classifier_tf) x_test_adv = attack1.generate(x_test) preds = np.argmax(self.classifier_k.predict(x_test_adv), axis=1) acc = np.sum(preds == np.argmax(y_test, axis=1)) / NB_TEST adv_trainer = AdversarialTrainer(self.classifier_k, attacks=[attack1, attack2]) adv_trainer.fit(x_train, y_train, nb_epochs=5, batch_size=128) preds_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1) acc_new = np.sum(preds_new == np.argmax(y_test, axis=1)) / NB_TEST # No reason to assert the newer accuracy is higher. It might go down slightly self.assertGreaterEqual(acc_new, acc * ACCURACY_DROP) logger.info('Accuracy before adversarial training: %.2f%%', (acc * 100)) logger.info('\nAccuracy after adversarial training: %.2f%%', (acc_new * 100)) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
use_logits=False) attack = ProjectedGradientDescent(classifier, eps=8 / 255, eps_step=1 / 255, max_iter=20, batch_size=512) x_test_pgd = attack.generate(x_test, y_test) # np.save('./data/' + dataset + '_data/model/' + model_name + '_y_' + attack_name + '.npy', x_test_pgd) # Evaluate the benign trained model on adv test set labels_pgd = np.argmax(classifier.predict(x_test_pgd), axis=1) print('Accuracy on original PGD adversarial samples: %.2f%%' % (np.sum(labels_pgd == labels_true) / x_test.shape[0] * 100)) trainer = AdversarialTrainer(classifier, attack, ratio=1.0) trainer.fit(x_train, y_train, nb_epochs=60, batch_size=1024) classifier.save(filename='adv_' + model_name + '.h5', path='../data/' + dataset + '_data/model/') # Evaluate the adversarially trained model on clean test set labels_true = np.argmax(y_test, axis=1) labels_test = np.argmax(classifier.predict(x_test), axis=1) print('Accuracy test set: %.2f%%' % (np.sum(labels_test == labels_true) / x_test.shape[0] * 100)) # Evaluate the adversarially trained model on original adversarial samples labels_pgd = np.argmax(classifier.predict(x_test_pgd), axis=1) print('Accuracy on original PGD adversarial samples: %.2f%%' % (np.sum(labels_pgd == labels_true) / x_test.shape[0] * 100))
classifier = KerasClassifier(build_model(), clip_values=(0, 1), use_logits=False) # Create attack for adversarial trainer; here, we use 2 attacks, both crafting adv examples on the target model pgd = ProjectedGradientDescent(classifier, eps=8, eps_step=2, max_iter=10, num_random_init=20) # Create some adversarial samples for evaluation x_test_pgd = pgd.generate(x_test) # Create adversarial trainer and perform adversarial training adv_trainer = AdversarialTrainer(classifier, attacks=pgd, ratio=1.0) adv_trainer.fit_generator(art_datagen, nb_epochs=83) # Evaluate the adversarially trained model on clean test set labels_true = np.argmax(y_test, axis=1) labels_test = np.argmax(classifier.predict(x_test), axis=1) print("Accuracy test set: %.2f%%" % (np.sum(labels_test == labels_true) / x_test.shape[0] * 100)) # Evaluate the adversarially trained model on original adversarial samples labels_pgd = np.argmax(classifier.predict(x_test_pgd), axis=1) print("Accuracy on original PGD adversarial samples: %.2f%%" % (np.sum(labels_pgd == labels_true) / x_test.shape[0] * 100)) # Evaluate the adversarially trained model on fresh adversarial samples produced on the adversarially trained model x_test_pgd = pgd.generate(x_test)
from art.defences import AdversarialTrainer # get a new untrained model and warp it new_model = mnist_cnn_model(x_train, y_train, x_test, y_test, epochs=0) defended_model = KerasClassifier(clip_values=(0, 1), model=new_model) # define the attack we are using fgsm = FastGradientMethod(defended_model) # Create the `AdversarialTrainer` instance. # Train the model and evaluate it on the test data. # In[ ]: # define the adversarial trainer and train the new network adversarial_tranier = AdversarialTrainer(defended_model, fgsm) adversarial_tranier.fit(x_train, y_train, batch_size=100, nb_epochs=2) # evaluate how good our model is defended_model._model.evaluate(x_test, y_test) # Calculate the `empirical robustness` for our now hopfully more robust model # In[ ]: # calculate the empiracal robustness print('robustness of the defended model', empirical_robustness(defended_model, x_test[0:], 'fgsm', {})) x_adv = fgsm.generate(x_test[0].reshape((1, 28, 28, 1))) print('class prediction for the adversarial sample:', clf.predict(x_adv.reshape((1, 28, 28, 1))))