def test_one_attack_mnist(self): """ Test the adversarial trainer using one FGSM attacker. The source and target models of the attack are two CNNs on MNIST (TensorFlow and Keras backends). The test cast check if accuracy on adversarial samples increases after adversarially training the model. :return: None """ (x_train, y_train), (x_test, y_test) = self.mnist # Get source and target classifiers classifier_src = self.classifier_k classifier_tgt = self.classifier_tf # Create FGSM attacker adv = FastGradientMethod(classifier_src) x_adv = adv.generate(x_test) preds = classifier_tgt.predict(x_adv) acc = np.sum(np.argmax(preds, axis=1) == np.argmax( y_test, axis=1)) / x_adv.shape[0] # Perform adversarial training adv_trainer = StaticAdversarialTrainer(classifier_tgt, adv) adv_trainer.fit(x_train, y_train, nb_epochs=1) # Evaluate that accuracy on adversarial sample has improved preds_adv_trained = adv_trainer.classifier.predict(x_adv) acc_adv_trained = np.sum( np.argmax(preds_adv_trained, axis=1) == np.argmax( y_test, axis=1)) / x_adv.shape[0] print('\nAccuracy before adversarial training: %.2f%%' % (acc * 100)) print('\nAccuracy after adversarial training: %.2f%%' % (acc_adv_trained * 100))
def test_shared_model_mnist(self): """ Test the adversarial trainer using one FGSM attacker. The source and target models of the attack are the same CNN on MNIST trained for 2 epochs. The test cast check if accuracy on adversarial samples increases after adversarially training the model. :return: None """ (x_train, y_train), (x_test, y_test) = self.mnist # Create and fit classifier params = {'nb_epochs': 2, 'batch_size': BATCH_SIZE} classifier = self.classifier_k # Create FGSM attacker adv = FastGradientMethod(classifier) x_adv = adv.generate(x_test) preds = classifier.predict(x_adv) acc = np.sum(np.argmax(preds, axis=1) == np.argmax( y_test, axis=1)) / y_test.shape[0] # Perform adversarial training adv_trainer = StaticAdversarialTrainer(classifier, adv) adv_trainer.fit(x_train, y_train, **params) # Evaluate that accuracy on adversarial sample has improved preds_adv_trained = adv_trainer.classifier.predict(x_adv) acc_adv_trained = np.sum( np.argmax(preds_adv_trained, axis=1) == np.argmax( y_test, axis=1)) / y_test.shape[0] print('\nAccuracy before adversarial training: %.2f%%' % (acc * 100)) print('\nAccuracy after adversarial training: %.2f%%' % (acc_adv_trained * 100))
def test_multi_attack_mnist(self): """ Test the adversarial trainer using two attackers: FGSM and DeepFool. The source and target models of the attack are two CNNs on MNIST trained for 2 epochs. FGSM and DeepFool both generate the attack images on the same source classifier. The test cast check if accuracy on adversarial samples increases after adversarially training the model. :return: None """ (x_train, y_train), (x_test, y_test) = self.mnist # Get source and target classifiers classifier_tgt = self.classifier_k classifier_src = self.classifier_tf # Create FGSM and DeepFool attackers adv1 = FastGradientMethod(classifier_src) adv2 = DeepFool(classifier_src) x_adv = np.vstack((adv1.generate(x_test), adv2.generate(x_test))) y_adv = np.vstack((y_test, y_test)) preds = classifier_tgt.predict(x_adv) acc = np.sum(np.argmax(preds, axis=1) == np.argmax(y_adv, axis=1)) / y_adv.shape[0] # Perform adversarial training adv_trainer = StaticAdversarialTrainer(classifier_tgt, [adv1, adv2]) params = {'nb_epochs': 2, 'batch_size': BATCH_SIZE} adv_trainer.fit(x_train, y_train, **params) # Evaluate that accuracy on adversarial sample has improved preds_adv_trained = adv_trainer.classifier.predict(x_adv) acc_adv_trained = np.sum(np.argmax(preds_adv_trained, axis=1) == np.argmax(y_adv, axis=1)) / y_adv.shape[0] logger.info('Accuracy before adversarial training: %.2f%%', (acc * 100)) logger.info('Accuracy after adversarial training: %.2f%%', (acc_adv_trained * 100))