Example #1
0
    def test_multi_attack_mnist(self):
        """
        Test the adversarial trainer using two attackers: FGSM and DeepFool. The source and target models of the attack
        are two CNNs on MNIST trained for 2 epochs. FGSM and DeepFool both generate the attack images on the same
        source classifier. The test cast check if accuracy on adversarial samples increases
        after adversarially training the model.

        :return: None
        """
        (x_train, y_train), (x_test, y_test) = self.mnist

        # Get source and target classifiers
        classifier_tgt = self.classifier_k
        classifier_src = self.classifier_tf

        # Create FGSM and DeepFool attackers
        adv1 = FastGradientMethod(classifier_src)
        adv2 = DeepFool(classifier_src)
        x_adv = np.vstack((adv1.generate(x_test), adv2.generate(x_test)))
        y_adv = np.vstack((y_test, y_test))
        preds = classifier_tgt.predict(x_adv)
        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(y_adv, axis=1)) / y_adv.shape[0]

        # Perform adversarial training
        adv_trainer = StaticAdversarialTrainer(classifier_tgt, [adv1, adv2])
        params = {'nb_epochs': 2, 'batch_size': BATCH_SIZE}
        adv_trainer.fit(x_train, y_train, **params)

        # Evaluate that accuracy on adversarial sample has improved
        preds_adv_trained = adv_trainer.classifier.predict(x_adv)
        acc_adv_trained = np.sum(np.argmax(preds_adv_trained, axis=1) == np.argmax(y_adv, axis=1)) / y_adv.shape[0]
        logger.info('Accuracy before adversarial training: %.2f%%', (acc * 100))
        logger.info('Accuracy after adversarial training: %.2f%%', (acc_adv_trained * 100))
    def test_multi_attack_mnist_with_generator(self):
        """
        Test the adversarial trainer using two attackers: FGSM and DeepFool. The source and target models of the attack
        are two CNNs on MNIST trained for 2 epochs. FGSM and DeepFool both generate the attack images on the same
        source classifier. The test cast check if accuracy on adversarial samples increases
        after adversarially training the model. Here a generator is used to yield the data for adversarial training

        :return: None
        """
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_train_original = x_train.copy()

        class MyDataGenerator(DataGenerator):
            def __init__(self, x, y, size, batch_size):
                self.x = x
                self.y = y
                self.size = size
                self.batch_size = batch_size

            def get_batch(self):
                ids = np.random.choice(self.size,
                                       size=min(self.size, self.batch_size),
                                       replace=False)
                return (self.x[ids], self.y[ids])

        generator = MyDataGenerator(x_train, y_train, x_train.shape[0],
                                    BATCH_SIZE)

        # Get source and target classifiers
        classifier_tgt = self.classifier_k
        classifier_src = self.classifier_tf

        # Create FGSM and DeepFool attackers
        adv1 = FastGradientMethod(classifier_src)
        adv2 = DeepFool(classifier_src)
        x_adv = np.vstack((adv1.generate(x_test), adv2.generate(x_test)))
        y_adv = np.vstack((y_test, y_test))
        preds = classifier_tgt.predict(x_adv)
        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(
            y_adv, axis=1)) / y_adv.shape[0]

        # Perform adversarial training
        adv_trainer = StaticAdversarialTrainer(classifier_tgt, [adv1, adv2])
        params = {'nb_epochs': 2}
        adv_trainer.fit_generator(generator, **params)

        # Evaluate that accuracy on adversarial sample has improved
        preds_adv_trained = adv_trainer.classifier.predict(x_adv)
        acc_adv_trained = np.sum(
            np.argmax(preds_adv_trained, axis=1) == np.argmax(
                y_adv, axis=1)) / y_adv.shape[0]
        logger.info('Accuracy before adversarial training: %.2f%%',
                    (acc * 100))
        logger.info('Accuracy after adversarial training: %.2f%%',
                    (acc_adv_trained * 100))

        # Finally assert that the original training data hasn't changed:
        self.assertTrue((x_train == x_train_original).all())
Example #3
0
def deep_fool(x_test, model, max_iter, epsilon, nb_grads, batch_size):
    classifier = KerasClassifier(model=model, clip_values=(0, 1))
    attack_cw = DeepFool(classifier=classifier,
                         max_iter=max_iter,
                         epsilon=epsilon,
                         nb_grads=nb_grads,
                         batch_size=batch_size)
    x_test_adv = attack_cw.generate(x_test)
    return np.reshape(x_test_adv, (32, 32, 3))
Example #4
0
def atk_DeepFool(x_train, x_test, y_train, y_test, classifier):
    #print('Create DeepFool attack \n')
    adv_crafter = DeepFool(classifier, max_iter=20)
    x_train_adv = adv_crafter.generate(x_train)
    x_test_adv = adv_crafter.generate(x_test)
    
    print("After DeepFool Attack \n")
    evaluate(x_train, x_test, y_train, y_test, x_train_adv, x_test_adv, classifier)
    return x_test_adv, x_train_adv
    def test_pytorch_mnist(self):
        x_train = np.reshape(self.x_train_mnist,
                             (self.x_train_mnist.shape[0], 1, 28, 28)).astype(
                                 np.float32)
        x_test = np.reshape(self.x_test_mnist,
                            (self.x_test_mnist.shape[0], 1, 28, 28)).astype(
                                np.float32)
        x_test_original = x_test.copy()

        # Create basic PyTorch model
        classifier = get_image_classifier_pt(from_logits=True)

        scores = get_labels_np_array(classifier.predict(x_train))
        sum6 = np.sum(
            np.argmax(scores, axis=1) == np.argmax(self.y_train_mnist, axis=1))
        accuracy = sum6 / self.y_train_mnist.shape[0]
        logger.info("[PyTorch, MNIST] Accuracy on training set: %.2f%%",
                    (accuracy * 100))

        scores = get_labels_np_array(classifier.predict(x_test))
        sum7 = np.sum(
            np.argmax(scores, axis=1) == np.argmax(self.y_test_mnist, axis=1))
        accuracy = sum7 / self.y_test_mnist.shape[0]
        logger.info("[PyTorch, MNIST] Accuracy on test set: %.2f%%",
                    (accuracy * 100))

        attack = DeepFool(classifier, max_iter=5, batch_size=11)
        x_train_adv = attack.generate(x_train)
        x_test_adv = attack.generate(x_test)

        self.assertFalse((x_train == x_train_adv).all())
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((self.y_train_mnist == train_y_pred).all())
        self.assertFalse((self.y_test_mnist == test_y_pred).all())

        sum8 = np.sum(
            np.argmax(train_y_pred, axis=1) == np.argmax(self.y_train_mnist,
                                                         axis=1))
        accuracy = sum8 / self.y_train_mnist.shape[0]
        logger.info("Accuracy on adversarial train examples: %.2f%%",
                    (accuracy * 100))

        sum9 = np.sum(
            np.argmax(test_y_pred, axis=1) == np.argmax(self.y_test_mnist,
                                                        axis=1))
        accuracy = sum9 / self.y_test_mnist.shape[0]
        logger.info("Accuracy on adversarial test examples: %.2f%%",
                    (accuracy * 100))

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))),
                               0.0,
                               delta=0.00001)
Example #6
0
    def test_partial_grads(self):
        attack = DeepFool(self.classifier_k, max_iter=2, nb_grads=3)
        x_test_adv = attack.generate(self.x_test)
        self.assertFalse((self.x_test == x_test_adv).all())

        test_y_pred = get_labels_np_array(self.classifier_k.predict(x_test_adv))
        self.assertFalse((self.y_test == test_y_pred).all())

        accuracy = np.sum(np.argmax(test_y_pred, axis=1) == np.argmax(self.y_test, axis=1)) / self.y_test.shape[0]
        logger.info('Accuracy on adversarial test examples: %.2f%%', (accuracy * 100))
    def test_tensorflow_mnist(self):
        x_test_original = self.x_test_mnist.copy()

        # Create basic CNN on MNIST using TensorFlow
        classifier, sess = get_image_classifier_tf(from_logits=True)

        scores = get_labels_np_array(classifier.predict(self.x_train_mnist))
        sum2 = np.sum(
            np.argmax(scores, axis=1) == np.argmax(self.y_train_mnist, axis=1))
        accuracy = sum2 / self.y_train_mnist.shape[0]
        logger.info("[TF, MNIST] Accuracy on training set: %.2f%%",
                    (accuracy * 100))

        scores = get_labels_np_array(classifier.predict(self.x_test_mnist))
        sum3 = np.sum(
            np.argmax(scores, axis=1) == np.argmax(self.y_test_mnist, axis=1))
        accuracy = sum3 / self.y_test_mnist.shape[0]
        logger.info("[TF, MNIST] Accuracy on test set: %.2f%%",
                    (accuracy * 100))

        attack = DeepFool(classifier, max_iter=5, batch_size=11)
        x_train_adv = attack.generate(self.x_train_mnist)
        x_test_adv = attack.generate(self.x_test_mnist)

        self.assertFalse((self.x_train_mnist == x_train_adv).all())
        self.assertFalse((self.x_test_mnist == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((self.y_train_mnist == train_y_pred).all())
        self.assertFalse((self.y_test_mnist == test_y_pred).all())

        sum4 = np.sum(
            np.argmax(train_y_pred, axis=1) == np.argmax(self.y_train_mnist,
                                                         axis=1))
        accuracy = sum4 / self.y_train_mnist.shape[0]
        logger.info("Accuracy on adversarial train examples: %.2f%%",
                    (accuracy * 100))

        sum5 = np.sum(
            np.argmax(test_y_pred, axis=1) == np.argmax(self.y_test_mnist,
                                                        axis=1))
        accuracy = sum5 / self.y_test_mnist.shape[0]
        logger.info("Accuracy on adversarial test examples: %.2f%%",
                    (accuracy * 100))

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(
            np.max(np.abs(x_test_original - self.x_test_mnist))),
                               0.0,
                               delta=0.00001)
Example #8
0
    def test_iris_k_unbounded(self):
        classifier, _ = get_iris_classifier_kr()

        # Recreate a classifier without clip values
        classifier = KerasClassifier(model=classifier._model, use_logits=False, channel_index=1)
        attack = DeepFool(classifier, max_iter=5, batch_size=128)
        x_test_adv = attack.generate(self.x_test)
        self.assertFalse((self.x_test == x_test_adv).all())

        predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(self.y_test, axis=1) == predictions_adv).all())
        accuracy = np.sum(predictions_adv == np.argmax(self.y_test, axis=1)) / self.y_test.shape[0]
        logger.info('Accuracy on Iris with DeepFool adversarial examples: %.2f%%', (accuracy * 100))
Example #9
0
    def test_iris_pt(self):
        classifier = get_iris_classifier_pt()

        attack = DeepFool(classifier, max_iter=5, batch_size=128)
        x_test_adv = attack.generate(self.x_test)
        self.assertFalse((self.x_test == x_test_adv).all())
        self.assertLessEqual(np.amax(x_test_adv), 1.0)
        self.assertGreaterEqual(np.amin(x_test_adv), 0.0)

        predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(self.y_test, axis=1) == predictions_adv).all())
        accuracy = np.sum(predictions_adv == np.argmax(self.y_test, axis=1)) / self.y_test.shape[0]
        logger.info('Accuracy on Iris with DeepFool adversarial examples: %.2f%%', (accuracy * 100))
    def test_kera_mnist_partial_grads(self):
        classifier = get_image_classifier_kr(from_logits=True)
        attack = DeepFool(classifier, max_iter=2, nb_grads=3)
        x_test_adv = attack.generate(self.x_test_mnist)
        self.assertFalse((self.x_test_mnist == x_test_adv).all())

        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))
        self.assertFalse((self.y_test_mnist == test_y_pred).all())
        sum10 = np.sum(
            np.argmax(test_y_pred, axis=1) == np.argmax(self.y_test_mnist,
                                                        axis=1))
        accuracy = sum10 / self.y_test_mnist.shape[0]
        logger.info("Accuracy on adversarial test examples: %.2f%%",
                    (accuracy * 100))
Example #11
0
 def __init__(self,
              model,
              overshoot=1e-6,
              max_iterations=100,
              n_candidates=10,
              batch_size=16):
     super().__init__(model=model)
     self._overshoot = overshoot
     self._max_iterations = max_iterations
     self._n_candidates = n_candidates
     self._method = DeepFool(classifier=self.model,
                             epsilon=self._overshoot,
                             max_iter=self._max_iterations,
                             nb_grads=self._n_candidates,
                             batch_size=batch_size)
Example #12
0
    def test_two_attacks(self):
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_test_original = x_test.copy()

        attack1 = FastGradientMethod(classifier=self.classifier, batch_size=16)
        attack2 = DeepFool(classifier=self.classifier,
                           max_iter=5,
                           batch_size=16)
        x_test_adv = attack1.generate(x_test)
        predictions = np.argmax(self.classifier.predict(x_test_adv), axis=1)
        accuracy = np.sum(predictions == np.argmax(y_test, axis=1)) / NB_TEST

        adv_trainer = AdversarialTrainer(self.classifier,
                                         attacks=[attack1, attack2])
        adv_trainer.fit(x_train, y_train, nb_epochs=2, batch_size=16)

        predictions_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1)
        accuracy_new = np.sum(
            predictions_new == np.argmax(y_test, axis=1)) / NB_TEST

        self.assertEqual(accuracy_new, 0.36)
        self.assertEqual(accuracy, 0.13)

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))),
                               0.0,
                               delta=0.00001)
Example #13
0
    def test_two_attacks_with_generator(self):
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_train_original = x_train.copy()

        class MyDataGenerator(DataGenerator):
            def __init__(self, x, y, size, batch_size):
                self.x = x
                self.y = y
                self.size = size
                self.batch_size = batch_size

            def get_batch(self):
                ids = np.random.choice(self.size, size=min(self.size, self.batch_size), replace=False)
                return self.x[ids], self.y[ids]
        generator = MyDataGenerator(x_train, y_train, x_train.shape[0], 128)

        attack1 = FastGradientMethod(self.classifier_k)
        attack2 = DeepFool(self.classifier_tf)
        x_test_adv = attack1.generate(x_test)
        preds = np.argmax(self.classifier_k.predict(x_test_adv), axis=1)
        acc = np.sum(preds == np.argmax(y_test, axis=1)) / NB_TEST

        adv_trainer = AdversarialTrainer(self.classifier_k, attacks=[attack1, attack2])
        adv_trainer.fit_generator(generator, nb_epochs=5)

        preds_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1)
        acc_new = np.sum(preds_new == np.argmax(y_test, axis=1)) / NB_TEST
        # No reason to assert the newer accuracy is higher. It might go down slightly
        self.assertGreaterEqual(acc_new, acc * ACCURACY_DROP)

        logger.info('Accuracy before adversarial training: %.2f%%', (acc * 100))
        logger.info('\nAccuracy after adversarial training: %.2f%%', (acc_new * 100))

        # Finally assert that the original training data hasn't changed:
        self.assertTrue((x_train == x_train_original).all())
    def test_keras_mnist(self):
        x_test_original = self.x_test_mnist.copy()

        # Keras classifier
        classifier = get_image_classifier_kr(from_logits=True)

        scores = classifier._model.evaluate(self.x_train_mnist,
                                            self.y_train_mnist)
        logger.info("[Keras, MNIST] Accuracy on training set: %.2f%%",
                    (scores[1] * 100))
        scores = classifier._model.evaluate(self.x_test_mnist,
                                            self.y_test_mnist)
        logger.info("[Keras, MNIST] Accuracy on test set: %.2f%%",
                    (scores[1] * 100))

        attack = DeepFool(classifier, max_iter=5, batch_size=11)
        x_train_adv = attack.generate(self.x_train_mnist)
        x_test_adv = attack.generate(self.x_test_mnist)

        self.assertFalse((self.x_train_mnist == x_train_adv).all())
        self.assertFalse((self.x_test_mnist == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((self.y_train_mnist == train_y_pred).all())
        self.assertFalse((self.y_test_mnist == test_y_pred).all())

        sum = np.sum(
            np.argmax(train_y_pred, axis=1) == np.argmax(self.y_train_mnist,
                                                         axis=1))
        accuracy = sum / self.y_train_mnist.shape[0]
        logger.info("Accuracy on adversarial train examples: %.2f%%",
                    (accuracy * 100))

        sum1 = np.sum(
            np.argmax(test_y_pred, axis=1) == np.argmax(self.y_test_mnist,
                                                        axis=1))
        accuracy = sum1 / self.y_test_mnist.shape[0]
        logger.info("Accuracy on adversarial test examples: %.2f%%",
                    (accuracy * 100))

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(
            np.max(np.abs(x_test_original - self.x_test_mnist))),
                               0.0,
                               delta=0.00001)
    def test_keras_iris_clipped(self):
        (_, _), (x_test, y_test) = self.iris
        classifier = get_iris_classifier_kr()

        attack = DeepFool(classifier, max_iter=5)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertLessEqual(np.amax(x_test_adv), 1.0)
        self.assertGreaterEqual(np.amin(x_test_adv), 0.0)

        predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == predictions_adv).all())
        accuracy = np.sum(
            predictions_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info(
            'Accuracy on Iris with DeepFool adversarial examples: %.2f%%',
            (accuracy * 100))
Example #16
0
    def test_transfer(self):
        (x_train, y_train), (x_test, y_test) = self.mnist

        attack = DeepFool(self.classifier_tf)
        x_test_adv = attack.generate(x_test)
        preds = np.argmax(self.classifier_k.predict(x_test_adv), axis=1)
        acc = np.sum(preds == np.argmax(y_test, axis=1)) / NB_TEST

        adv_trainer = AdversarialTrainer(self.classifier_k, attack)
        adv_trainer.fit(x_train, y_train, nb_epochs=2, batch_size=6)

        preds_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1)
        acc_new = np.sum(preds_new == np.argmax(y_test, axis=1)) / NB_TEST
        self.assertGreaterEqual(acc_new, acc * ACCURACY_DROP)

        logger.info('Accuracy before adversarial training: %.2f%%', (acc * 100))
        logger.info('Accuracy after adversarial training: %.2f%%', (acc_new * 100))
def general_test_v2(model,
                    optimizer,
                    input_shape,
                    nb_classes,
                    test_loader,
                    method,
                    conf,
                    btrain=False,
                    model_file='last_model_92_sgd.pkl'):
    global _classes
    if not btrain:
        checked_state = torch.load(model_file)['state_dict']
        model.load_state_dict(checked_state)
    model.eval()

    loss = nn.CrossEntropyLoss()
    warped_model = PyTorchClassifier(model,
                                     loss,
                                     optimizer,
                                     input_shape,
                                     nb_classes,
                                     clip_values=(.0, 1.))
    if method == 'Deepfool':
        adv_crafter = DeepFool(warped_model)
    elif method == 'BIM':
        adv_crafter = BasicIterativeMethod(warped_model, batch_size=32)
    elif method == 'JSMA':
        adv_crafter = SaliencyMapMethod(warped_model, batch_size=32)
    elif method == 'CW2':
        adv_crafter = CarliniL2Method(warped_model, batch_size=32)
    elif method == 'CWI':
        adv_crafter = CarliniLInfMethod(warped_model, batch_size=32)
    elif method == 'FGSM':
        adv_crafter = FastGradientMethod(warped_model, batch_size=32)

    correct, total = 0, 0

    adv_dataset = adv_generalization(test_loader, adv_crafter, conf)
    temp_loader = DataLoader(dataset=adv_dataset,
                             batch_size=32,
                             shuffle=False,
                             drop_last=True)
    # temp_loader = test_loader

    for images, labels in temp_loader:
        images = Variable(images.cuda())
        labels = Variable(labels.cuda())

        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels.data).sum()

    print('Accuracy of the model on the test images: %d %%' %
          (100 * float(correct) / total))
    print('Accuracy of the model on the test images:', float(correct) / total)
    return correct / total
Example #18
0
    def _test_backend_mnist(self, classifier):
        attack = DeepFool(classifier, max_iter=5, batch_size=11)
        x_train_adv = attack.generate(self.x_train)
        x_test_adv = attack.generate(self.x_test)

        self.assertFalse((self.x_train == x_train_adv).all())
        self.assertFalse((self.x_test == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((self.y_train == train_y_pred).all())
        self.assertFalse((self.y_test == test_y_pred).all())

        accuracy = np.sum(np.argmax(train_y_pred, axis=1) == np.argmax(self.y_train, axis=1)) / self.y_train.shape[0]
        logger.info('Accuracy on adversarial train examples: %.2f%%', (accuracy * 100))

        accuracy = np.sum(np.argmax(test_y_pred, axis=1) == np.argmax(self.y_test, axis=1)) / self.y_test.shape[0]
        logger.info('Accuracy on adversarial test examples: %.2f%%', (accuracy * 100))
Example #19
0
    def test_classifier_type_check_fail_classifier(self):
        # Use a useless test classifier to test basic classifier properties
        class ClassifierNoAPI:
            pass

        classifier = ClassifierNoAPI
        with self.assertRaises(TypeError) as context:
            _ = DeepFool(classifier=classifier)

        self.assertIn('For `DeepFool` classifier must be an instance of '
                      '`art.classifiers.classifier.Classifier`, the provided classifier is instance of '
                      '(<class \'object\'>,).', str(context.exception))
Example #20
0
    def test_transfer(self):
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_test_original = x_test.copy()

        attack = DeepFool(self.classifier_tf)
        x_test_adv = attack.generate(x_test)
        preds = np.argmax(self.classifier_k.predict(x_test_adv), axis=1)
        acc = np.sum(preds == np.argmax(y_test, axis=1)) / NB_TEST

        adv_trainer = AdversarialTrainer(self.classifier_k, attack)
        adv_trainer.fit(x_train, y_train, nb_epochs=2, batch_size=6)

        preds_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1)
        acc_new = np.sum(preds_new == np.argmax(y_test, axis=1)) / NB_TEST
        self.assertGreaterEqual(acc_new, acc * ACCURACY_DROP)

        logger.info('Accuracy before adversarial training: %.2f%%', (acc * 100))
        logger.info('Accuracy after adversarial training: %.2f%%', (acc_new * 100))

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
Example #21
0
    def test_classifier_type_check_fail_gradients(self):
        # Use a test classifier not providing gradients required by white-box attack
        from art.classifiers.scikitlearn import ScikitlearnDecisionTreeClassifier
        from sklearn.tree import DecisionTreeClassifier

        classifier = ScikitlearnDecisionTreeClassifier(model=DecisionTreeClassifier())
        with self.assertRaises(TypeError) as context:
            _ = DeepFool(classifier=classifier)

        self.assertIn('For `DeepFool` classifier must be an instance of '
                      '`art.classifiers.classifier.ClassifierNeuralNetwork` and '
                      '`art.classifiers.classifier.ClassifierGradients`, the provided classifier is instance of '
                      '(<class \'art.classifiers.scikitlearn.ScikitlearnClassifier\'>,).', str(context.exception))
Example #22
0
    def test_two_attacks_with_generator(self):
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_train_original = x_train.copy()
        x_test_original = x_test.copy()

        class MyDataGenerator(DataGenerator):
            def __init__(self, x, y, size, batch_size):
                super().__init__(size=size, batch_size=batch_size)
                self.x = x
                self.y = y
                self._size = size
                self._batch_size = batch_size

            def get_batch(self):
                ids = np.random.choice(self.size,
                                       size=min(self.size, self.batch_size),
                                       replace=False)
                return self.x[ids], self.y[ids]

        generator = MyDataGenerator(x_train,
                                    y_train,
                                    size=x_train.shape[0],
                                    batch_size=16)

        attack1 = FastGradientMethod(classifier=self.classifier, batch_size=16)
        attack2 = DeepFool(classifier=self.classifier,
                           max_iter=5,
                           batch_size=16)
        x_test_adv = attack1.generate(x_test)
        predictions = np.argmax(self.classifier.predict(x_test_adv), axis=1)
        accuracy = np.sum(predictions == np.argmax(y_test, axis=1)) / NB_TEST

        adv_trainer = AdversarialTrainer(self.classifier,
                                         attacks=[attack1, attack2])
        adv_trainer.fit_generator(generator, nb_epochs=3)

        predictions_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1)
        accuracy_new = np.sum(
            predictions_new == np.argmax(y_test, axis=1)) / NB_TEST

        self.assertAlmostEqual(accuracy_new, 0.25, delta=0.02)
        self.assertAlmostEqual(accuracy, 0.11, delta=0.0)

        # Check that x_train and x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_train_original -
                                                   x_train))),
                               0.0,
                               delta=0.00001)
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))),
                               0.0,
                               delta=0.00001)
def general_test(model,
                 optimizer,
                 input_shape,
                 nb_classes,
                 test_loader,
                 method,
                 btrain=False,
                 model_file='last_model_92_sgd.pkl'):
    global _classes
    if not btrain:
        model.load_state_dict(torch.load(model_file))
    model.eval()

    loss = nn.CrossEntropyLoss()
    warped_model = PyTorchClassifier(model,
                                     loss,
                                     optimizer,
                                     input_shape,
                                     nb_classes,
                                     clip_values=(.0, 1.))
    if method == 'Deepfool':
        adv_crafter = DeepFool(warped_model)
    elif method == 'BIM':
        adv_crafter = BasicIterativeMethod(warped_model, batch_size=20)
    elif method == 'JSMA':
        adv_crafter = SaliencyMapMethod(warped_model, batch_size=20)
    elif method == 'CW2':
        adv_crafter = CarliniL2Method(warped_model, batch_size=20)
    elif method == 'CWI':
        adv_crafter = CarliniLInfMethod(warped_model, batch_size=20)

    correct, total = 0, 0
    class_correct = list(0. for _ in range(10))
    class_total = list(0. for _ in range(10))

    for images, labels in test_loader:
        images = adv_crafter.generate(images.numpy())

        images = Variable(torch.from_numpy(images).cuda())
        labels = Variable(labels.cuda())

        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels.data).sum()
        c = (predicted == labels.data).squeeze()
        for i in range(20):
            label = labels.data[i]
            class_correct[label] += c[i]
            class_total[label] += 1

    print('Accuracy of the model on the test images: %d %%' %
          (100 * float(correct) / total))
    print('Accuracy of the model on the test images:', float(correct) / total)
    for i in range(10):
        print('Accuracy of %5s : %2d %%' %
              (_classes[i], 100 * class_correct[i] / class_total[i]))
    return correct / total
Example #24
0
    def test_two_attacks(self):
        (x_train, y_train), (x_test, y_test) = self.mnist

        attack1 = FastGradientMethod(self.classifier_k)
        attack2 = DeepFool(self.classifier_tf)
        x_test_adv = attack1.generate(x_test)
        preds = np.argmax(self.classifier_k.predict(x_test_adv), axis=1)
        acc = np.sum(preds == np.argmax(y_test, axis=1)) / NB_TEST

        adv_trainer = AdversarialTrainer(self.classifier_k, attacks=[attack1, attack2])
        adv_trainer.fit(x_train, y_train, nb_epochs=5, batch_size=128)

        preds_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1)
        acc_new = np.sum(preds_new == np.argmax(y_test, axis=1)) / NB_TEST
        # No reason to assert the newer accuracy is higher. It might go down slightly
        self.assertGreaterEqual(acc_new, acc * ACCURACY_DROP)

        logger.info('Accuracy before adversarial training: %.2f%%', (acc * 100))
        logger.info('\nAccuracy after adversarial training: %.2f%%', (acc_new * 100))
Example #25
0
class DeepFoolAttack(AdversarialAttack):
    def __init__(self,
                 model,
                 overshoot=1e-6,
                 max_iterations=100,
                 n_candidates=10,
                 batch_size=16):
        super().__init__(model=model)
        self._overshoot = overshoot
        self._max_iterations = max_iterations
        self._n_candidates = n_candidates
        self._method = DeepFool(classifier=self.model,
                                epsilon=self._overshoot,
                                max_iter=self._max_iterations,
                                nb_grads=self._n_candidates,
                                batch_size=batch_size)

    def attack_method(self, x, y=None):
        return self._method.generate(x=x)
Example #26
0
def build_adversarial(model, optimizer, loss, input_shape, nb_class, method, batch_size=32, pgd_eps=0.3):
    model.eval()
    wmodel = PyTorchClassifier(model, loss, optimizer, input_shape, nb_class)

    if method == 'deepfool':
        adv_crafter = DeepFool(wmodel)
    elif method == 'bim':
        adv_crafter = BasicIterativeMethod(wmodel, batch_size=batch_size)
    elif method == 'jsma':
        adv_crafter = SaliencyMapMethod(wmodel, batch_size=batch_size)
    elif method == 'cw2':
        adv_crafter = CarliniL2Method(wmodel, batch_size=batch_size)
    elif method == 'cwi':
        adv_crafter = CarliniLInfMethod(wmodel, batch_size=batch_size)
    elif method == 'fgsm':
        adv_crafter = FastGradientMethod(wmodel, batch_size=batch_size)
    elif method == 'pgd':
        adv_crafter = ProjectedGradientDescent(wmodel, batch_size=batch_size, eps=pgd_eps)
    else:
        raise NotImplementedError('Unsupported Attack Method: {}'.format(method))

    return adv_crafter
Example #27
0
    def test_two_attacks(self):
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_test_original = x_test.copy()

        attack1 = FastGradientMethod(self.classifier_k)
        attack2 = DeepFool(self.classifier_tf)
        x_test_adv = attack1.generate(x_test)
        preds = np.argmax(self.classifier_k.predict(x_test_adv), axis=1)
        acc = np.sum(preds == np.argmax(y_test, axis=1)) / NB_TEST

        adv_trainer = AdversarialTrainer(self.classifier_k, attacks=[attack1, attack2])
        adv_trainer.fit(x_train, y_train, nb_epochs=5, batch_size=128)

        preds_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1)
        acc_new = np.sum(preds_new == np.argmax(y_test, axis=1)) / NB_TEST
        # No reason to assert the newer accuracy is higher. It might go down slightly
        self.assertGreaterEqual(acc_new, acc * ACCURACY_DROP)

        logger.info('Accuracy before adversarial training: %.2f%%', (acc * 100))
        logger.info('\nAccuracy after adversarial training: %.2f%%', (acc_new * 100))

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
Example #28
0
def gzsl_launch(dataloader_seen, dataloader_unseen, all_vectors, criterion,
                params):

    if params["dataset"] == "CUB":
        from configs.config_CUB import MODEL_PATH, SMOOTHED_MODEL_PATH
    elif params["dataset"] == "AWA2":
        from configs.config_AWA2 import MODEL_PATH, SMOOTHED_MODEL_PATH
    elif params["dataset"] == "SUN":
        from configs.config_SUN import MODEL_PATH, SMOOTHED_MODEL_PATH

    resnet = torchvision.models.resnet101(pretrained=True).cuda()
    feature_extractor = nn.Sequential(*list(resnet.children())[:-1])

    if params["hasDefense"] and params["defense"] == "label_smooth":
        model_ale = torch.load(SMOOTHED_MODEL_PATH).cuda()
    else:
        model_ale = torch.load(MODEL_PATH).cuda()

    full_graph = FullGraph(feature_extractor, model_ale, all_vectors).cuda()
    full_graph.eval()
    optimizer = optim.SGD(full_graph.parameters(), lr=0.01, momentum=0.5)

    if params["dataset"] == "CUB":
        no_classes = 200
    elif params["dataset"] == "AWA2":
        no_classes = 50
    elif params["dataset"] == "SUN":
        no_classes = 717

    classifier = PyTorchClassifier(model=full_graph,
                                   loss=criterion,
                                   optimizer=optimizer,
                                   input_shape=(1, 150, 150),
                                   nb_classes=no_classes)

    if params["attack"] == "fgsm":
        batch_size = 1
        attack = FastGradientMethod(classifier=classifier,
                                    eps=params["fgsm_params"]["epsilon"],
                                    batch_size=batch_size)

    elif params["attack"] == "deepfool":
        batch_size = 1
        attack = DeepFool(classifier,
                          max_iter=params["deepfool_params"]["max_iter"],
                          epsilon=params["deepfool_params"]["epsilon"],
                          nb_grads=params["deepfool_params"]["nb_grads_gzsl"],
                          batch_size=batch_size)

    elif params["attack"] == "carlini_wagner":
        batch_size = params["batch_size"] if params["custom_collate"] else 1
        attack = CarliniL2Method(
            classifier,
            confidence=params["carliniwagner_params"]["confidence"],
            learning_rate=params["carliniwagner_params"]["learning_rate"],
            binary_search_steps=params["carliniwagner_params"]
            ["binary_search_steps"],
            max_iter=params["carliniwagner_params"]["max_iter"],
            initial_const=params["carliniwagner_params"]["initial_const"],
            max_halving=params["carliniwagner_params"]["max_halving"],
            max_doubling=params["carliniwagner_params"]["max_doubling"],
            batch_size=batch_size)

    preds_seen = []
    preds_seen_defended = []

    adv_preds_seen = []
    adv_preds_seen_defended = []
    labels_seen_ = []

    start = time.time()
    if params["hasDefense"]:
        if params["defense"] == "spatial_smooth":
            defense = SpatialSmoothing(
                window_size=params["ss_params"]["window_size"])
        elif params["defense"] == "totalvar":
            defense = TotalVarMin(
                max_iter=params["totalvar_params"]["max_iter"])

    for index, sample in enumerate(dataloader_seen):
        img = sample[0].numpy()
        label = sample[1].numpy()

        if params["clean_results"]:
            if params["hasDefense"] and params["defense"] != "label_smooth":
                img_def, _ = defense(img)
                predictions_defended = classifier.predict(
                    img_def, batch_size=batch_size)
                preds_seen_defended.extend(
                    np.argmax(predictions_defended, axis=1))
            predictions = classifier.predict(img, batch_size=batch_size)
            preds_seen.extend(np.argmax(predictions, axis=1))

        img_perturbed = attack.generate(x=img)
        if params["hasDefense"] and params["defense"] != "label_smooth":
            img_perturbed_defended, _ = defense(img_perturbed)
            predictions_adv_defended = classifier.predict(
                img_perturbed_defended, batch_size=batch_size)
            adv_preds_seen_defended.extend(
                np.argmax(predictions_adv_defended, axis=1))

        predictions_adv = classifier.predict(img_perturbed,
                                             batch_size=batch_size)
        adv_preds_seen.extend(np.argmax(predictions_adv, axis=1))
        labels_seen_.extend(label)

        if index % 1000 == 0:
            print(index, len(dataloader_seen))

    labels_seen_ = np.array(labels_seen_)
    adv_preds_seen = np.array(adv_preds_seen)
    adv_preds_seen_defended = np.array(adv_preds_seen_defended)
    uniq_labels_seen = np.unique(labels_seen_)

    adv_preds_unseen = []
    adv_preds_unseen_defended = []
    labels_unseen_ = []

    if params["clean_results"]:
        preds_unseen = []
        preds_seen = np.array(preds_seen)
        preds_unseen_defended = []
        preds_seen_defended = np.array(preds_seen_defended)

    for index, sample in enumerate(dataloader_unseen):
        img = sample[0].numpy()
        label = sample[1].numpy()

        if params["clean_results"]:
            if params["hasDefense"] and params["defense"] != "label_smooth":
                img_def, _ = defense(img)
                predictions_defended = classifier.predict(
                    img_def, batch_size=batch_size)
                preds_unseen_defended.extend(
                    np.argmax(predictions_defended, axis=1))
            predictions = classifier.predict(img, batch_size=batch_size)
            preds_unseen.extend(np.argmax(predictions, axis=1))

        img_perturbed = attack.generate(x=img)
        if params["hasDefense"] and params["defense"] != "label_smooth":
            img_perturbed_defended, _ = defense(img_perturbed)
            predictions_adv_defended = classifier.predict(
                img_perturbed_defended, batch_size=batch_size)
            adv_preds_unseen_defended.extend(
                np.argmax(predictions_adv_defended, axis=1))

        predictions_adv = classifier.predict(img_perturbed,
                                             batch_size=batch_size)
        adv_preds_unseen.extend(np.argmax(predictions_adv, axis=1))
        labels_unseen_.extend(label)

        if index % 1000 == 0:
            print(index, len(dataloader_unseen))

    end = time.time()

    labels_unseen_ = np.array(labels_unseen_)
    adv_preds_unseen = np.array(adv_preds_unseen)
    adv_preds_unseen_defended = np.array(adv_preds_unseen_defended)
    uniq_labels_unseen = np.unique(labels_unseen_)

    combined_labels = np.concatenate((labels_seen_, labels_unseen_))
    combined_preds_adv = np.concatenate((adv_preds_seen, adv_preds_unseen))
    combined_preds_adv_defended = np.concatenate(
        (adv_preds_seen_defended, adv_preds_unseen_defended))

    if params["clean_results"]:
        preds_unseen = np.array(preds_unseen)
        combined_preds = np.concatenate((preds_seen, preds_unseen))

        seen, unseen, h = harmonic_score_gzsl(combined_preds, combined_labels,
                                              uniq_labels_seen,
                                              uniq_labels_unseen)
        print("GZSL Clean (s/u/h):", seen, unseen, h)

        if params["hasDefense"] and params["defense"] != "label_smooth":
            preds_unseen_defended = np.array(preds_unseen_defended)
            combined_preds_defended = np.concatenate(
                (preds_seen_defended, preds_unseen_defended))
            seen, unseen, h = harmonic_score_gzsl(combined_preds_defended,
                                                  combined_labels,
                                                  uniq_labels_seen,
                                                  uniq_labels_unseen)
            print("GZSL Clean + defended (s/u/h):", seen, unseen, h)

    seen, unseen, h = harmonic_score_gzsl(combined_preds_adv, combined_labels,
                                          uniq_labels_seen, uniq_labels_unseen)
    print("GZSL Attacked (s/u/h):", seen, unseen, h)

    if params["hasDefense"] and params["defense"] != "label_smooth":
        seen, unseen, h = harmonic_score_gzsl(combined_preds_adv_defended,
                                              combined_labels,
                                              uniq_labels_seen,
                                              uniq_labels_unseen)
        print("GZSL Attacked + defended (s/u/h):", seen, unseen, h)

    print(end - start, "seconds passed for GZSL.")
Example #29
0
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(10))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

# Create classifier wrapper
classifier = KerasClassifier(model=model, clip_values=(min_, max_))
classifier.fit(x_train, y_train, nb_epochs=10, batch_size=128)

# Craft adversarial samples with DeepFool
logger.info('Create DeepFool attack')
adv_crafter = DeepFool(classifier)
logger.info('Craft attack on training examples')
x_train_adv = adv_crafter.generate(x_train)
logger.info('Craft attack test examples')
x_test_adv = adv_crafter.generate(x_test)

# Evaluate the classifier on the adversarial samples
preds = np.argmax(classifier.predict(x_test_adv), axis=1)
acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0]
logger.info('Classifier before adversarial training')
logger.info('Accuracy on adversarial samples: %.2f%%', (acc * 100))

# Data augmentation: expand the training set with the adversarial samples
x_train = np.append(x_train, x_train_adv, axis=0)
y_train = np.append(y_train, y_train, axis=0)
    classifier = KerasClassifier(model=model, clip_values=(0, 1))
    return classifier


# Get session
session = tf.Session()
k.set_session(session)

# Read MNIST dataset
(x_train, y_train), (x_test, y_test), min_, max_ = load_mnist()

# Construct and train a convolutional neural network on MNIST using Keras
source = cnn_mnist_k(x_train.shape[1:])
source.fit(x_train, y_train, nb_epochs=5, batch_size=128)

# Craft adversarial samples with DeepFool
adv_crafter = DeepFool(source)
x_train_adv = adv_crafter.generate(x_train)
x_test_adv = adv_crafter.generate(x_test)

# Construct and train a convolutional neural network
target = cnn_mnist_tf(x_train.shape[1:])
target.fit(x_train, y_train, nb_epochs=5, batch_size=128)

# Evaluate the CNN on the adversarial samples
preds = target.predict(x_test_adv)
acc = np.sum(np.equal(np.argmax(preds, axis=1), np.argmax(
    y_test, axis=1))) / y_test.shape[0]
print("\nAccuracy on adversarial samples: %.2f%%" % (acc * 100))