예제 #1
0
    def test_one_attack_mnist(self):
        """
        Test the adversarial trainer using one FGSM attacker. The source and target models of the attack
        are two CNNs on MNIST (TensorFlow and Keras backends). The test cast check if accuracy on adversarial samples
        increases after adversarially training the model.

        :return: None
        """
        (x_train, y_train), (x_test, y_test) = self.mnist

        # Get source and target classifiers
        classifier_src = self.classifier_k
        classifier_tgt = self.classifier_tf

        # Create FGSM attacker
        adv = FastGradientMethod(classifier_src)
        x_adv = adv.generate(x_test)
        preds = classifier_tgt.predict(x_adv)
        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(
            y_test, axis=1)) / x_adv.shape[0]

        # Perform adversarial training
        adv_trainer = StaticAdversarialTrainer(classifier_tgt, adv)
        adv_trainer.fit(x_train, y_train, nb_epochs=1)

        # Evaluate that accuracy on adversarial sample has improved
        preds_adv_trained = adv_trainer.classifier.predict(x_adv)
        acc_adv_trained = np.sum(
            np.argmax(preds_adv_trained, axis=1) == np.argmax(
                y_test, axis=1)) / x_adv.shape[0]
        print('\nAccuracy before adversarial training: %.2f%%' % (acc * 100))
        print('\nAccuracy after adversarial training: %.2f%%' %
              (acc_adv_trained * 100))
예제 #2
0
    def test_shared_model_mnist(self):
        """
        Test the adversarial trainer using one FGSM attacker. The source and target models of the attack are the same
        CNN on MNIST trained for 2 epochs. The test cast check if accuracy on adversarial samples increases
        after adversarially training the model.

        :return: None
        """
        (x_train, y_train), (x_test, y_test) = self.mnist

        # Create and fit classifier
        params = {'nb_epochs': 2, 'batch_size': BATCH_SIZE}
        classifier = self.classifier_k

        # Create FGSM attacker
        adv = FastGradientMethod(classifier)
        x_adv = adv.generate(x_test)
        preds = classifier.predict(x_adv)
        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(
            y_test, axis=1)) / y_test.shape[0]

        # Perform adversarial training
        adv_trainer = StaticAdversarialTrainer(classifier, adv)
        adv_trainer.fit(x_train, y_train, **params)

        # Evaluate that accuracy on adversarial sample has improved
        preds_adv_trained = adv_trainer.classifier.predict(x_adv)
        acc_adv_trained = np.sum(
            np.argmax(preds_adv_trained, axis=1) == np.argmax(
                y_test, axis=1)) / y_test.shape[0]
        print('\nAccuracy before adversarial training: %.2f%%' % (acc * 100))
        print('\nAccuracy after adversarial training: %.2f%%' %
              (acc_adv_trained * 100))
예제 #3
0
    def test_multi_attack_mnist(self):
        """
        Test the adversarial trainer using two attackers: FGSM and DeepFool. The source and target models of the attack
        are two CNNs on MNIST trained for 2 epochs. FGSM and DeepFool both generate the attack images on the same
        source classifier. The test cast check if accuracy on adversarial samples increases
        after adversarially training the model.

        :return: None
        """
        (x_train, y_train), (x_test, y_test) = self.mnist

        # Get source and target classifiers
        classifier_tgt = self.classifier_k
        classifier_src = self.classifier_tf

        # Create FGSM and DeepFool attackers
        adv1 = FastGradientMethod(classifier_src)
        adv2 = DeepFool(classifier_src)
        x_adv = np.vstack((adv1.generate(x_test), adv2.generate(x_test)))
        y_adv = np.vstack((y_test, y_test))
        preds = classifier_tgt.predict(x_adv)
        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(y_adv, axis=1)) / y_adv.shape[0]

        # Perform adversarial training
        adv_trainer = StaticAdversarialTrainer(classifier_tgt, [adv1, adv2])
        params = {'nb_epochs': 2, 'batch_size': BATCH_SIZE}
        adv_trainer.fit(x_train, y_train, **params)

        # Evaluate that accuracy on adversarial sample has improved
        preds_adv_trained = adv_trainer.classifier.predict(x_adv)
        acc_adv_trained = np.sum(np.argmax(preds_adv_trained, axis=1) == np.argmax(y_adv, axis=1)) / y_adv.shape[0]
        logger.info('Accuracy before adversarial training: %.2f%%', (acc * 100))
        logger.info('Accuracy after adversarial training: %.2f%%', (acc_adv_trained * 100))