Ejemplo n.º 1
0
    def test_two_attacks(self):
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_test_original = x_test.copy()

        attack1 = FastGradientMethod(classifier=self.classifier, batch_size=16)
        attack2 = DeepFool(classifier=self.classifier,
                           max_iter=5,
                           batch_size=16)
        x_test_adv = attack1.generate(x_test)
        predictions = np.argmax(self.classifier.predict(x_test_adv), axis=1)
        accuracy = np.sum(predictions == np.argmax(y_test, axis=1)) / NB_TEST

        adv_trainer = AdversarialTrainer(self.classifier,
                                         attacks=[attack1, attack2])
        adv_trainer.fit(x_train, y_train, nb_epochs=2, batch_size=16)

        predictions_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1)
        accuracy_new = np.sum(
            predictions_new == np.argmax(y_test, axis=1)) / NB_TEST

        self.assertEqual(accuracy_new, 0.36)
        self.assertEqual(accuracy, 0.13)

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))),
                               0.0,
                               delta=0.00001)
Ejemplo n.º 2
0
    def test_two_attacks_with_generator(self):
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_train_original = x_train.copy()

        class MyDataGenerator(DataGenerator):
            def __init__(self, x, y, size, batch_size):
                self.x = x
                self.y = y
                self.size = size
                self.batch_size = batch_size

            def get_batch(self):
                ids = np.random.choice(self.size, size=min(self.size, self.batch_size), replace=False)
                return self.x[ids], self.y[ids]
        generator = MyDataGenerator(x_train, y_train, x_train.shape[0], 128)

        attack1 = FastGradientMethod(self.classifier_k)
        attack2 = DeepFool(self.classifier_tf)
        x_test_adv = attack1.generate(x_test)
        preds = np.argmax(self.classifier_k.predict(x_test_adv), axis=1)
        acc = np.sum(preds == np.argmax(y_test, axis=1)) / NB_TEST

        adv_trainer = AdversarialTrainer(self.classifier_k, attacks=[attack1, attack2])
        adv_trainer.fit_generator(generator, nb_epochs=5)

        preds_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1)
        acc_new = np.sum(preds_new == np.argmax(y_test, axis=1)) / NB_TEST
        # No reason to assert the newer accuracy is higher. It might go down slightly
        self.assertGreaterEqual(acc_new, acc * ACCURACY_DROP)

        logger.info('Accuracy before adversarial training: %.2f%%', (acc * 100))
        logger.info('\nAccuracy after adversarial training: %.2f%%', (acc_new * 100))

        # Finally assert that the original training data hasn't changed:
        self.assertTrue((x_train == x_train_original).all())
Ejemplo n.º 3
0
    def test_two_attacks_with_generator(self):
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_train_original = x_train.copy()
        x_test_original = x_test.copy()

        class MyDataGenerator(DataGenerator):
            def __init__(self, x, y, size, batch_size):
                super().__init__(size=size, batch_size=batch_size)
                self.x = x
                self.y = y
                self._size = size
                self._batch_size = batch_size

            def get_batch(self):
                ids = np.random.choice(self.size,
                                       size=min(self.size, self.batch_size),
                                       replace=False)
                return self.x[ids], self.y[ids]

        generator = MyDataGenerator(x_train,
                                    y_train,
                                    size=x_train.shape[0],
                                    batch_size=16)

        attack1 = FastGradientMethod(classifier=self.classifier, batch_size=16)
        attack2 = DeepFool(classifier=self.classifier,
                           max_iter=5,
                           batch_size=16)
        x_test_adv = attack1.generate(x_test)
        predictions = np.argmax(self.classifier.predict(x_test_adv), axis=1)
        accuracy = np.sum(predictions == np.argmax(y_test, axis=1)) / NB_TEST

        adv_trainer = AdversarialTrainer(self.classifier,
                                         attacks=[attack1, attack2])
        adv_trainer.fit_generator(generator, nb_epochs=3)

        predictions_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1)
        accuracy_new = np.sum(
            predictions_new == np.argmax(y_test, axis=1)) / NB_TEST

        self.assertAlmostEqual(accuracy_new, 0.25, delta=0.02)
        self.assertAlmostEqual(accuracy, 0.11, delta=0.0)

        # Check that x_train and x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_train_original -
                                                   x_train))),
                               0.0,
                               delta=0.00001)
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))),
                               0.0,
                               delta=0.00001)
Ejemplo n.º 4
0
    def test_classifier_match(self):
        attack = FastGradientMethod(self.classifier_k)
        adv_trainer = AdversarialTrainer(self.classifier_k, attack)

        self.assertEqual(len(adv_trainer.attacks), 1)
        self.assertEqual(adv_trainer.attacks[0].classifier,
                         adv_trainer.classifier)
Ejemplo n.º 5
0
    def test_transfer(self):
        (x_train, y_train), (x_test, y_test) = self.mnist

        attack = DeepFool(self.classifier_tf)
        x_test_adv = attack.generate(x_test)
        preds = np.argmax(self.classifier_k.predict(x_test_adv), axis=1)
        acc = np.sum(preds == np.argmax(y_test, axis=1)) / NB_TEST

        adv_trainer = AdversarialTrainer(self.classifier_k, attack)
        adv_trainer.fit(x_train, y_train, nb_epochs=2, batch_size=6)

        preds_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1)
        acc_new = np.sum(preds_new == np.argmax(y_test, axis=1)) / NB_TEST
        self.assertGreaterEqual(acc_new, acc * ACCURACY_DROP)

        logger.info('Accuracy before adversarial training: %.2f%%', (acc * 100))
        logger.info('Accuracy after adversarial training: %.2f%%', (acc_new * 100))
Ejemplo n.º 6
0
    def test_fit_predict(self):
        (x_train, y_train), (x_test, y_test) = self.mnist

        attack = FastGradientMethod(self.classifier_k)
        x_test_adv = attack.generate(x_test)
        preds = np.argmax(self.classifier_k.predict(x_test_adv), axis=1)
        acc = np.sum(preds == np.argmax(y_test, axis=1)) / NB_TEST

        adv_trainer = AdversarialTrainer(self.classifier_k, attack)
        adv_trainer.fit(x_train, y_train, nb_epochs=5, batch_size=128)

        preds_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1)
        acc_new = np.sum(preds_new == np.argmax(y_test, axis=1)) / NB_TEST
        self.assertGreaterEqual(acc_new, acc * accuracy_drop)

        print('\nAccuracy before adversarial training: %.2f%%' % (acc * 100))
        print('\nAccuracy after adversarial training: %.2f%%' %
              (acc_new * 100))
Ejemplo n.º 7
0
    def test_two_attacks(self):
        (x_train, y_train), (x_test, y_test) = self.mnist

        attack1 = FastGradientMethod(self.classifier_k)
        attack2 = DeepFool(self.classifier_tf)
        x_test_adv = attack1.generate(x_test)
        preds = np.argmax(self.classifier_k.predict(x_test_adv), axis=1)
        acc = np.sum(preds == np.argmax(y_test, axis=1)) / NB_TEST

        adv_trainer = AdversarialTrainer(self.classifier_k, attacks=[attack1, attack2])
        adv_trainer.fit(x_train, y_train, nb_epochs=5, batch_size=128)

        preds_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1)
        acc_new = np.sum(preds_new == np.argmax(y_test, axis=1)) / NB_TEST
        # No reason to assert the newer accuracy is higher. It might go down slightly
        self.assertGreaterEqual(acc_new, acc * ACCURACY_DROP)

        logger.info('Accuracy before adversarial training: %.2f%%', (acc * 100))
        logger.info('\nAccuracy after adversarial training: %.2f%%', (acc_new * 100))
Ejemplo n.º 8
0
    def test_transfer(self):
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_test_original = x_test.copy()

        attack = DeepFool(self.classifier_tf)
        x_test_adv = attack.generate(x_test)
        preds = np.argmax(self.classifier_k.predict(x_test_adv), axis=1)
        acc = np.sum(preds == np.argmax(y_test, axis=1)) / NB_TEST

        adv_trainer = AdversarialTrainer(self.classifier_k, attack)
        adv_trainer.fit(x_train, y_train, nb_epochs=2, batch_size=6)

        preds_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1)
        acc_new = np.sum(preds_new == np.argmax(y_test, axis=1)) / NB_TEST
        self.assertGreaterEqual(acc_new, acc * ACCURACY_DROP)

        logger.info('Accuracy before adversarial training: %.2f%%', (acc * 100))
        logger.info('Accuracy after adversarial training: %.2f%%', (acc_new * 100))

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
Ejemplo n.º 9
0
    def test_targeted_attack_error(self):
        """
        Test the adversarial trainer using a targeted attack, which will currently result in a NotImplementError.

        :return: None
        """
        (x_train, y_train), (_, _) = self.mnist
        params = {"nb_epochs": 2, "batch_size": BATCH_SIZE}

        adv = FastGradientMethod(self.classifier, targeted=True)
        adv_trainer = AdversarialTrainer(self.classifier, attacks=adv)
        self.assertRaises(NotImplementedError, adv_trainer.fit, x_train,
                          y_train, **params)
Ejemplo n.º 10
0
    def test_two_attacks(self):
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_test_original = x_test.copy()

        attack1 = FastGradientMethod(self.classifier_k)
        attack2 = DeepFool(self.classifier_tf)
        x_test_adv = attack1.generate(x_test)
        preds = np.argmax(self.classifier_k.predict(x_test_adv), axis=1)
        acc = np.sum(preds == np.argmax(y_test, axis=1)) / NB_TEST

        adv_trainer = AdversarialTrainer(self.classifier_k, attacks=[attack1, attack2])
        adv_trainer.fit(x_train, y_train, nb_epochs=5, batch_size=128)

        preds_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1)
        acc_new = np.sum(preds_new == np.argmax(y_test, axis=1)) / NB_TEST
        # No reason to assert the newer accuracy is higher. It might go down slightly
        self.assertGreaterEqual(acc_new, acc * ACCURACY_DROP)

        logger.info('Accuracy before adversarial training: %.2f%%', (acc * 100))
        logger.info('\nAccuracy after adversarial training: %.2f%%', (acc_new * 100))

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
Ejemplo n.º 11
0
                                 use_logits=False)
    attack = ProjectedGradientDescent(classifier,
                                      eps=8 / 255,
                                      eps_step=1 / 255,
                                      max_iter=20,
                                      batch_size=512)

    x_test_pgd = attack.generate(x_test, y_test)
    # np.save('./data/' + dataset + '_data/model/' + model_name + '_y_' + attack_name + '.npy', x_test_pgd)

    # Evaluate the benign trained model on adv test set
    labels_pgd = np.argmax(classifier.predict(x_test_pgd), axis=1)
    print('Accuracy on original PGD adversarial samples: %.2f%%' %
          (np.sum(labels_pgd == labels_true) / x_test.shape[0] * 100))

    trainer = AdversarialTrainer(classifier, attack, ratio=1.0)
    trainer.fit(x_train, y_train, nb_epochs=60, batch_size=1024)

    classifier.save(filename='adv_' + model_name + '.h5',
                    path='../data/' + dataset + '_data/model/')

    # Evaluate the adversarially trained model on clean test set
    labels_true = np.argmax(y_test, axis=1)
    labels_test = np.argmax(classifier.predict(x_test), axis=1)
    print('Accuracy test set: %.2f%%' %
          (np.sum(labels_test == labels_true) / x_test.shape[0] * 100))

    # Evaluate the adversarially trained model on original adversarial samples
    labels_pgd = np.argmax(classifier.predict(x_test_pgd), axis=1)
    print('Accuracy on original PGD adversarial samples: %.2f%%' %
          (np.sum(labels_pgd == labels_true) / x_test.shape[0] * 100))
Ejemplo n.º 12
0
classifier = KerasClassifier(build_model(),
                             clip_values=(0, 1),
                             use_logits=False)

# Create attack for adversarial trainer; here, we use 2 attacks, both crafting adv examples on the target model
pgd = ProjectedGradientDescent(classifier,
                               eps=8,
                               eps_step=2,
                               max_iter=10,
                               num_random_init=20)

# Create some adversarial samples for evaluation
x_test_pgd = pgd.generate(x_test)

# Create adversarial trainer and perform adversarial training
adv_trainer = AdversarialTrainer(classifier, attacks=pgd, ratio=1.0)
adv_trainer.fit_generator(art_datagen, nb_epochs=83)

# Evaluate the adversarially trained model on clean test set
labels_true = np.argmax(y_test, axis=1)
labels_test = np.argmax(classifier.predict(x_test), axis=1)
print("Accuracy test set: %.2f%%" %
      (np.sum(labels_test == labels_true) / x_test.shape[0] * 100))

# Evaluate the adversarially trained model on original adversarial samples
labels_pgd = np.argmax(classifier.predict(x_test_pgd), axis=1)
print("Accuracy on original PGD adversarial samples: %.2f%%" %
      (np.sum(labels_pgd == labels_true) / x_test.shape[0] * 100))

# Evaluate the adversarially trained model on fresh adversarial samples produced on the adversarially trained model
x_test_pgd = pgd.generate(x_test)
Ejemplo n.º 13
0
from art.defences import AdversarialTrainer

# get a new untrained model and warp it
new_model = mnist_cnn_model(x_train, y_train, x_test, y_test, epochs=0)
defended_model = KerasClassifier(clip_values=(0, 1), model=new_model)
# define the attack we are using
fgsm = FastGradientMethod(defended_model)

# Create the `AdversarialTrainer` instance.
# Train the model and evaluate it on the test data.

# In[ ]:

# define the adversarial trainer and train the new network
adversarial_tranier = AdversarialTrainer(defended_model, fgsm)
adversarial_tranier.fit(x_train, y_train, batch_size=100, nb_epochs=2)

# evaluate how good our model is
defended_model._model.evaluate(x_test, y_test)

# Calculate the `empirical robustness` for our now hopfully more robust model

# In[ ]:

# calculate the empiracal robustness
print('robustness of the defended model',
      empirical_robustness(defended_model, x_test[0:], 'fgsm', {}))
x_adv = fgsm.generate(x_test[0].reshape((1, 28, 28, 1)))
print('class prediction for the adversarial sample:',
      clf.predict(x_adv.reshape((1, 28, 28, 1))))