def test_shared_model_mnist(self): """ Test the adversarial trainer using one FGSM attacker. The source and target models of the attack are the same CNN on MNIST trained for 2 epochs. The test cast check if accuracy on adversarial samples increases after adversarially training the model. :return: None """ (x_train, y_train), (x_test, y_test) = self.mnist # Create and fit classifier params = {'nb_epochs': 2, 'batch_size': BATCH_SIZE} classifier = self.classifier_k # Create FGSM attacker adv = FastGradientMethod(classifier) x_adv = adv.generate(x_test) preds = classifier.predict(x_adv) acc = np.sum(np.argmax(preds, axis=1) == np.argmax( y_test, axis=1)) / y_test.shape[0] # Perform adversarial training adv_trainer = StaticAdversarialTrainer(classifier, adv) adv_trainer.fit(x_train, y_train, **params) # Evaluate that accuracy on adversarial sample has improved preds_adv_trained = adv_trainer.classifier.predict(x_adv) acc_adv_trained = np.sum( np.argmax(preds_adv_trained, axis=1) == np.argmax( y_test, axis=1)) / y_test.shape[0] print('\nAccuracy before adversarial training: %.2f%%' % (acc * 100)) print('\nAccuracy after adversarial training: %.2f%%' % (acc_adv_trained * 100))
def test_one_attack_mnist(self): """ Test the adversarial trainer using one FGSM attacker. The source and target models of the attack are two CNNs on MNIST (TensorFlow and Keras backends). The test cast check if accuracy on adversarial samples increases after adversarially training the model. :return: None """ (x_train, y_train), (x_test, y_test) = self.mnist # Get source and target classifiers classifier_src = self.classifier_k classifier_tgt = self.classifier_tf # Create FGSM attacker adv = FastGradientMethod(classifier_src) x_adv = adv.generate(x_test) preds = classifier_tgt.predict(x_adv) acc = np.sum(np.argmax(preds, axis=1) == np.argmax( y_test, axis=1)) / x_adv.shape[0] # Perform adversarial training adv_trainer = StaticAdversarialTrainer(classifier_tgt, adv) adv_trainer.fit(x_train, y_train, nb_epochs=1) # Evaluate that accuracy on adversarial sample has improved preds_adv_trained = adv_trainer.classifier.predict(x_adv) acc_adv_trained = np.sum( np.argmax(preds_adv_trained, axis=1) == np.argmax( y_test, axis=1)) / x_adv.shape[0] print('\nAccuracy before adversarial training: %.2f%%' % (acc * 100)) print('\nAccuracy after adversarial training: %.2f%%' % (acc_adv_trained * 100))
def test_multi_attack_mnist(self): """ Test the adversarial trainer using two attackers: FGSM and DeepFool. The source and target models of the attack are two CNNs on MNIST trained for 2 epochs. FGSM and DeepFool both generate the attack images on the same source classifier. The test cast check if accuracy on adversarial samples increases after adversarially training the model. :return: None """ (x_train, y_train), (x_test, y_test) = self.mnist # Get source and target classifiers classifier_tgt = self.classifier_k classifier_src = self.classifier_tf # Create FGSM and DeepFool attackers adv1 = FastGradientMethod(classifier_src) adv2 = DeepFool(classifier_src) x_adv = np.vstack((adv1.generate(x_test), adv2.generate(x_test))) y_adv = np.vstack((y_test, y_test)) preds = classifier_tgt.predict(x_adv) acc = np.sum(np.argmax(preds, axis=1) == np.argmax(y_adv, axis=1)) / y_adv.shape[0] # Perform adversarial training adv_trainer = StaticAdversarialTrainer(classifier_tgt, [adv1, adv2]) params = {'nb_epochs': 2, 'batch_size': BATCH_SIZE} adv_trainer.fit(x_train, y_train, **params) # Evaluate that accuracy on adversarial sample has improved preds_adv_trained = adv_trainer.classifier.predict(x_adv) acc_adv_trained = np.sum(np.argmax(preds_adv_trained, axis=1) == np.argmax(y_adv, axis=1)) / y_adv.shape[0] logger.info('Accuracy before adversarial training: %.2f%%', (acc * 100)) logger.info('Accuracy after adversarial training: %.2f%%', (acc_adv_trained * 100))
def test_multi_attack_mnist_with_generator(self): """ Test the adversarial trainer using two attackers: FGSM and DeepFool. The source and target models of the attack are two CNNs on MNIST trained for 2 epochs. FGSM and DeepFool both generate the attack images on the same source classifier. The test cast check if accuracy on adversarial samples increases after adversarially training the model. Here a generator is used to yield the data for adversarial training :return: None """ (x_train, y_train), (x_test, y_test) = self.mnist x_train_original = x_train.copy() class MyDataGenerator(DataGenerator): def __init__(self, x, y, size, batch_size): self.x = x self.y = y self.size = size self.batch_size = batch_size def get_batch(self): ids = np.random.choice(self.size, size=min(self.size, self.batch_size), replace=False) return (self.x[ids], self.y[ids]) generator = MyDataGenerator(x_train, y_train, x_train.shape[0], BATCH_SIZE) # Get source and target classifiers classifier_tgt = self.classifier_k classifier_src = self.classifier_tf # Create FGSM and DeepFool attackers adv1 = FastGradientMethod(classifier_src) adv2 = DeepFool(classifier_src) x_adv = np.vstack((adv1.generate(x_test), adv2.generate(x_test))) y_adv = np.vstack((y_test, y_test)) preds = classifier_tgt.predict(x_adv) acc = np.sum(np.argmax(preds, axis=1) == np.argmax( y_adv, axis=1)) / y_adv.shape[0] # Perform adversarial training adv_trainer = StaticAdversarialTrainer(classifier_tgt, [adv1, adv2]) params = {'nb_epochs': 2} adv_trainer.fit_generator(generator, **params) # Evaluate that accuracy on adversarial sample has improved preds_adv_trained = adv_trainer.classifier.predict(x_adv) acc_adv_trained = np.sum( np.argmax(preds_adv_trained, axis=1) == np.argmax( y_adv, axis=1)) / y_adv.shape[0] logger.info('Accuracy before adversarial training: %.2f%%', (acc * 100)) logger.info('Accuracy after adversarial training: %.2f%%', (acc_adv_trained * 100)) # Finally assert that the original training data hasn't changed: self.assertTrue((x_train == x_train_original).all())
def test_targeted_attack_error(self): """ Test the adversarial trainer using a targeted attack, which will currently result in a NotImplementError. :return: None """ (x_train, y_train), (_, _) = self.mnist params = {'nb_epochs': 2, 'batch_size': BATCH_SIZE} classifier = self.classifier_k adv = FastGradientMethod(classifier, targeted=True) adv_trainer = StaticAdversarialTrainer(classifier, attacks=adv) self.assertRaises(NotImplementedError, adv_trainer.fit, x_train, y_train, **params)