Exemple #1
0
    def test_shared_model_mnist(self):
        """
        Test the adversarial trainer using one FGSM attacker. The source and target models of the attack are the same
        CNN on MNIST trained for 2 epochs. The test cast check if accuracy on adversarial samples increases
        after adversarially training the model.

        :return: None
        """
        (x_train, y_train), (x_test, y_test) = self.mnist

        # Create and fit classifier
        params = {'nb_epochs': 2, 'batch_size': BATCH_SIZE}
        classifier = self.classifier_k

        # Create FGSM attacker
        adv = FastGradientMethod(classifier)
        x_adv = adv.generate(x_test)
        preds = classifier.predict(x_adv)
        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(
            y_test, axis=1)) / y_test.shape[0]

        # Perform adversarial training
        adv_trainer = StaticAdversarialTrainer(classifier, adv)
        adv_trainer.fit(x_train, y_train, **params)

        # Evaluate that accuracy on adversarial sample has improved
        preds_adv_trained = adv_trainer.classifier.predict(x_adv)
        acc_adv_trained = np.sum(
            np.argmax(preds_adv_trained, axis=1) == np.argmax(
                y_test, axis=1)) / y_test.shape[0]
        print('\nAccuracy before adversarial training: %.2f%%' % (acc * 100))
        print('\nAccuracy after adversarial training: %.2f%%' %
              (acc_adv_trained * 100))
Exemple #2
0
    def test_one_attack_mnist(self):
        """
        Test the adversarial trainer using one FGSM attacker. The source and target models of the attack
        are two CNNs on MNIST (TensorFlow and Keras backends). The test cast check if accuracy on adversarial samples
        increases after adversarially training the model.

        :return: None
        """
        (x_train, y_train), (x_test, y_test) = self.mnist

        # Get source and target classifiers
        classifier_src = self.classifier_k
        classifier_tgt = self.classifier_tf

        # Create FGSM attacker
        adv = FastGradientMethod(classifier_src)
        x_adv = adv.generate(x_test)
        preds = classifier_tgt.predict(x_adv)
        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(
            y_test, axis=1)) / x_adv.shape[0]

        # Perform adversarial training
        adv_trainer = StaticAdversarialTrainer(classifier_tgt, adv)
        adv_trainer.fit(x_train, y_train, nb_epochs=1)

        # Evaluate that accuracy on adversarial sample has improved
        preds_adv_trained = adv_trainer.classifier.predict(x_adv)
        acc_adv_trained = np.sum(
            np.argmax(preds_adv_trained, axis=1) == np.argmax(
                y_test, axis=1)) / x_adv.shape[0]
        print('\nAccuracy before adversarial training: %.2f%%' % (acc * 100))
        print('\nAccuracy after adversarial training: %.2f%%' %
              (acc_adv_trained * 100))
Exemple #3
0
    def test_multi_attack_mnist(self):
        """
        Test the adversarial trainer using two attackers: FGSM and DeepFool. The source and target models of the attack
        are two CNNs on MNIST trained for 2 epochs. FGSM and DeepFool both generate the attack images on the same
        source classifier. The test cast check if accuracy on adversarial samples increases
        after adversarially training the model.

        :return: None
        """
        (x_train, y_train), (x_test, y_test) = self.mnist

        # Get source and target classifiers
        classifier_tgt = self.classifier_k
        classifier_src = self.classifier_tf

        # Create FGSM and DeepFool attackers
        adv1 = FastGradientMethod(classifier_src)
        adv2 = DeepFool(classifier_src)
        x_adv = np.vstack((adv1.generate(x_test), adv2.generate(x_test)))
        y_adv = np.vstack((y_test, y_test))
        preds = classifier_tgt.predict(x_adv)
        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(y_adv, axis=1)) / y_adv.shape[0]

        # Perform adversarial training
        adv_trainer = StaticAdversarialTrainer(classifier_tgt, [adv1, adv2])
        params = {'nb_epochs': 2, 'batch_size': BATCH_SIZE}
        adv_trainer.fit(x_train, y_train, **params)

        # Evaluate that accuracy on adversarial sample has improved
        preds_adv_trained = adv_trainer.classifier.predict(x_adv)
        acc_adv_trained = np.sum(np.argmax(preds_adv_trained, axis=1) == np.argmax(y_adv, axis=1)) / y_adv.shape[0]
        logger.info('Accuracy before adversarial training: %.2f%%', (acc * 100))
        logger.info('Accuracy after adversarial training: %.2f%%', (acc_adv_trained * 100))
    def test_multi_attack_mnist_with_generator(self):
        """
        Test the adversarial trainer using two attackers: FGSM and DeepFool. The source and target models of the attack
        are two CNNs on MNIST trained for 2 epochs. FGSM and DeepFool both generate the attack images on the same
        source classifier. The test cast check if accuracy on adversarial samples increases
        after adversarially training the model. Here a generator is used to yield the data for adversarial training

        :return: None
        """
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_train_original = x_train.copy()

        class MyDataGenerator(DataGenerator):
            def __init__(self, x, y, size, batch_size):
                self.x = x
                self.y = y
                self.size = size
                self.batch_size = batch_size

            def get_batch(self):
                ids = np.random.choice(self.size,
                                       size=min(self.size, self.batch_size),
                                       replace=False)
                return (self.x[ids], self.y[ids])

        generator = MyDataGenerator(x_train, y_train, x_train.shape[0],
                                    BATCH_SIZE)

        # Get source and target classifiers
        classifier_tgt = self.classifier_k
        classifier_src = self.classifier_tf

        # Create FGSM and DeepFool attackers
        adv1 = FastGradientMethod(classifier_src)
        adv2 = DeepFool(classifier_src)
        x_adv = np.vstack((adv1.generate(x_test), adv2.generate(x_test)))
        y_adv = np.vstack((y_test, y_test))
        preds = classifier_tgt.predict(x_adv)
        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(
            y_adv, axis=1)) / y_adv.shape[0]

        # Perform adversarial training
        adv_trainer = StaticAdversarialTrainer(classifier_tgt, [adv1, adv2])
        params = {'nb_epochs': 2}
        adv_trainer.fit_generator(generator, **params)

        # Evaluate that accuracy on adversarial sample has improved
        preds_adv_trained = adv_trainer.classifier.predict(x_adv)
        acc_adv_trained = np.sum(
            np.argmax(preds_adv_trained, axis=1) == np.argmax(
                y_adv, axis=1)) / y_adv.shape[0]
        logger.info('Accuracy before adversarial training: %.2f%%',
                    (acc * 100))
        logger.info('Accuracy after adversarial training: %.2f%%',
                    (acc_adv_trained * 100))

        # Finally assert that the original training data hasn't changed:
        self.assertTrue((x_train == x_train_original).all())
Exemple #5
0
    def test_targeted_attack_error(self):
        """
        Test the adversarial trainer using a targeted attack, which will currently result in a
        NotImplementError.

        :return: None
        """
        (x_train, y_train), (_, _) = self.mnist
        params = {'nb_epochs': 2, 'batch_size': BATCH_SIZE}

        classifier = self.classifier_k
        adv = FastGradientMethod(classifier, targeted=True)
        adv_trainer = StaticAdversarialTrainer(classifier, attacks=adv)
        self.assertRaises(NotImplementedError, adv_trainer.fit, x_train, y_train, **params)