Esempio n. 1
0
    def test_iris_pt(self):
        (_, _), (x_test, y_test) = self.iris
        classifier = get_iris_classifier_pt()

        # Test untargeted attack
        attack = BasicIterativeMethod(classifier, eps=1, eps_step=0.1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with BIM adversarial examples: %.2f%%',
                    (acc * 100))

        # Test targeted attack
        targets = random_targets(y_test, nb_classes=3)
        attack = BasicIterativeMethod(classifier,
                                      targeted=True,
                                      eps=1,
                                      eps_step=0.1,
                                      batch_size=128)
        x_test_adv = attack.generate(x_test, **{'y': targets})
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any())
        acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / y_test.shape[0]
        logger.info('Success rate of targeted BIM on Iris: %.2f%%',
                    (acc * 100))
Esempio n. 2
0
    def _test_backend_mnist(self, classifier):
        # Get MNIST
        (x_train, y_train), (x_test, y_test) = self.mnist

        # Test BIM with np.inf norm
        attack = BasicIterativeMethod(classifier,
                                      eps=1,
                                      eps_step=0.1,
                                      batch_size=128)
        x_train_adv = attack.generate(x_train)
        x_test_adv = attack.generate(x_test)

        self.assertFalse((x_train == x_train_adv).all())
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((y_train == train_y_pred).all())
        self.assertFalse((y_test == test_y_pred).all())

        acc = np.sum(
            np.argmax(train_y_pred, axis=1) == np.argmax(
                y_train, axis=1)) / y_train.shape[0]
        logger.info('Accuracy on adversarial train examples: %.2f%%',
                    (acc * 100))

        acc = np.sum(
            np.argmax(test_y_pred, axis=1) == np.argmax(
                y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on adversarial test examples: %.2f%%',
                    (acc * 100))
def ifgsm(clf, x_train, x_test, epsilon=0.1, max_iter=10):
    from art.attacks.iterative_method import BasicIterativeMethod
    ifgsm_adv_crafter = BasicIterativeMethod(clf,
                                             eps=epsilon,
                                             eps_step=0.1,
                                             max_iter=max_iter)
    x_test_ifgsm_adv = ifgsm_adv_crafter.generate(x=x_test)
    x_train_ifgsm_adv = ifgsm_adv_crafter.generate(x=x_train)
    return x_train_ifgsm_adv, x_test_ifgsm_adv
Esempio n. 4
0
    def _test_mnist_targeted(self, classifier):
        # Get MNIST
        (_, _), (x_test, _) = self.mnist

        # Test FGSM with np.inf norm
        attack = BasicIterativeMethod(classifier,
                                      eps=1.0,
                                      eps_step=0.01,
                                      targeted=True,
                                      batch_size=128)
        # y_test_adv = to_categorical((np.argmax(y_test, axis=1) + 1)  % 10, 10)
        pred_sort = classifier.predict(x_test).argsort(axis=1)
        y_test_adv = np.zeros((x_test.shape[0], 10))
        for i in range(x_test.shape[0]):
            y_test_adv[i, pred_sort[i, -2]] = 1.0
        x_test_adv = attack.generate(x_test, y=y_test_adv)

        self.assertFalse((x_test == x_test_adv).all())

        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertEqual(y_test_adv.shape, test_y_pred.shape)
        # This doesn't work all the time, especially with small networks
        self.assertGreaterEqual((y_test_adv == test_y_pred).sum(),
                                x_test.shape[0] // 2)
    def test_iris_k_unbounded(self):
        (_, _), (x_test, y_test) = self.iris
        classifier, _ = get_iris_classifier_kr()

        # Recreate a classifier without clip values
        classifier = KerasClassifier(model=classifier._model,
                                     use_logits=False,
                                     channel_index=1)
        attack = BasicIterativeMethod(classifier, eps=1, eps_step=0.2)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv > 1).any())
        self.assertTrue((x_test_adv < 0).any())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with BIM adversarial examples: %.2f%%',
                    (acc * 100))
Esempio n. 6
0
    def attack(self, model=None, attack_str=""):
        imgs = self._load_images(attack_str, self._test_or_val_dataset)

        if self._test_or_val_dataset == "_x_test_set_":
            X = self.__data.x_test
            Y = self.__data.y_test
        else:
            X = self.__data.x_val
            Y = self.__data.y_val

        if type(imgs) != type(None):
            print('\n{0} adversarial examples using {1} attack loaded...\n'.
                  format(self.__dataset, self.__attack))
            return imgs

        if type(model) == type(None):
            model = self.surrogate_model.fit(self.__data.x_train,
                                             self.__data.y_train,
                                             verbose=1,
                                             epochs=self.__epochs,
                                             batch_size=128)
            wrap = KerasClassifier((0., 1.), model=self.surrogate_model)
        else:
            wrap = KerasClassifier((0., 1.), model=model)

        if self.__attack == 'FGSM':
            print('\nCrafting adversarial examples using FGSM attack...\n')
            fgsm = FastGradientMethod(wrap)

            if self.__data.dataset_name == 'MNIST':
                x_adv_images = fgsm.generate(x=X[self.idx_adv][:self._length],
                                             eps=0.2)
            else:
                x_adv_images = fgsm.generate(x=X[self.idx_adv][:self._length],
                                             eps=0.025)

            path = os.path.join(
                self._attack_dir,
                self.__dataset.lower() + self._test_or_val_dataset +
                "fgsm.pkl")
            helpers.save_pkl(x_adv_images, path)

        elif self.__attack.startswith("CW"):
            print('\nCrafting adversarial examples using CW attack...\n')
            cw = CarliniL2Method(wrap,
                                 confidence=0.0,
                                 targeted=False,
                                 binary_search_steps=1,
                                 learning_rate=0.2,
                                 initial_const=10,
                                 max_iter=100)
            x_adv_images = cw.generate(X[self.idx_adv][:self._length])

            path = os.path.join(
                self._attack_dir,
                self.__dataset.lower() + self._test_or_val_dataset + "cw.pkl")
            helpers.save_pkl(x_adv_images, path)

        elif self.__attack == 'BIM':
            print('\nCrafting adversarial examples using BIM attack...\n')

            if self.__dataset == 'MNIST':
                bim = BasicIterativeMethod(wrap,
                                           eps=0.25,
                                           eps_step=0.2,
                                           max_iter=100,
                                           norm=np.inf)
            if self.__dataset == 'CIFAR':
                bim = BasicIterativeMethod(wrap,
                                           eps=0.025,
                                           eps_step=0.01,
                                           max_iter=1000,
                                           norm=np.inf)

            x_adv_images = bim.generate(x=X[self.idx_adv][:self._length])
            path = os.path.join(
                self._attack_dir,
                self.__dataset.lower() + self._test_or_val_dataset + "bim.pkl")
            helpers.save_pkl(x_adv_images, path)

        elif self.__attack == 'DEEPFOOL':
            print('\nCrafting adversarial examples using DeepFool attack...\n')

            deepfool = DeepFool(wrap)
            x_adv_images = deepfool.generate(x=X[self.idx_adv][:self._length])
            path = os.path.join(
                self._attack_dir,
                self.__dataset.lower() + self._test_or_val_dataset +
                "deepfool.pkl")
            helpers.save_pkl(x_adv_images, path)

        return x_adv_images
Esempio n. 7
0
# # https://github.com/keras-team/keras/blob/58399c111a4639526f8d13d4bfa62fc3d0695b02/examples/cifar10_resnet.py

DIR_PATH = os.path.dirname(os.path.realpath(__file__))
save_dir = os.path.join(DIR_PATH, 'saved_models')
model_name = 'cifar10_ResNet29v2_model.196'
filepath = os.path.join(save_dir, model_name + '.h5')

model = load_model(filepath)
classifier = KerasClassifier(model, clip_values=(min_, max_))

epsilon = 0.01
adv_crafter = BasicIterativeMethod(classifier, eps=epsilon, eps_step=0.001)
# adv_crafter = CarliniL2Method(classifier, targeted=False)

# x_train_adv =  adv_crafter.generate(x_train)
x_test_adv = adv_crafter.generate(x_test[:100])

# Evaluate the classifier on the adversarial samples
preds = np.argmax(classifier.predict(x_test_adv), axis=1)
originalpreds = np.argmax(classifier.predict(x_test[:100]), axis=1)

acc = np.sum(preds == np.argmax(y_test[:100], axis=1)) / y_test.shape[0]
print('Accuracy on adversarial samples: %.2f%%' % (acc * 100))

success_adv_indices = (preds != originalpreds)
success_adv = x_test_adv[success_adv_indices]

np.save(os.path.join(DIR_PATH, 'success_adv_' + model_name + '_cl2'),
        success_adv)

detector = SubsetScanningDetector(filepath,
Esempio n. 8
0
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(10))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Create classifier wrapper
classifier = KerasClassifier(clip_values=(0, 1), model=model)
classifier.fit(x_train, y_train, nb_epochs=10, batch_size=128)

# Craft adversarial samples with BIM
logger.info('Create BIM attack')
adv_crafter = BasicIterativeMethod(classifier, eps=0.3, eps_step=0.01, max_iter=40)
logger.info('Craft attack on training examples')
x_train_adv = adv_crafter.generate(x_train)
logger.info('Craft attack test examples')
x_test_adv = adv_crafter.generate(x_test)

# Evaluate the classifier on the adversarial samples
preds = np.argmax(classifier.predict(x_test_adv), axis=1)
acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0]
logger.info('Classifier before adversarial training')
logger.info('Accuracy on adversarial samples: %.2f%%', (acc * 100))

# Data augmentation: expand the training set with the adversarial samples
x_train = np.append(x_train, x_train_adv, axis=0)
y_train = np.append(y_train, y_train, axis=0)

# Retrain the CNN on the extended dataset
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])