Ejemplo n.º 1
0
    def _test_backend_mnist(self, classifier):
        # Get MNIST
        (x_train, y_train), (x_test, y_test) = self.mnist

        # Test DeepFool
        attack = DeepFool(classifier, max_iter=5)
        x_test_adv = attack.generate(x_test)
        x_train_adv = attack.generate(x_train)

        self.assertFalse((x_train == x_train_adv).all())
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((y_train == train_y_pred).all())
        self.assertFalse((y_test == test_y_pred).all())

        acc = np.sum(
            np.argmax(train_y_pred, axis=1) == np.argmax(
                y_train, axis=1)) / y_train.shape[0]
        logger.info('Accuracy on adversarial train examples: %.2f%%',
                    (acc * 100))

        acc = np.sum(
            np.argmax(test_y_pred, axis=1) == np.argmax(
                y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on adversarial test examples: %.2f%%',
                    (acc * 100))
    def test_mnist(self):
        session = tf.Session()
        k.set_session(session)

        comp_params = {"loss": 'categorical_crossentropy',
                       "optimizer": 'adam',
                       "metrics": ['accuracy']}

        # get MNIST
        batch_size, nb_train, nb_test = 100, 1000, 11
        (X_train, Y_train), (X_test, Y_test), _, _ = load_mnist()
        X_train, Y_train = X_train[:nb_train], Y_train[:nb_train]
        X_test, Y_test = X_test[:nb_test], Y_test[:nb_test]
        im_shape = X_train[0].shape

        # get classifier
        classifier = CNN(im_shape, act="relu")
        classifier.compile(comp_params)
        classifier.fit(X_train, Y_train, epochs=1, batch_size=batch_size, verbose=0)
        scores = classifier.evaluate(X_test, Y_test)
        print("\naccuracy on test set: %.2f%%" % (scores[1] * 100))

        df = DeepFool(classifier, sess=session)
        df.set_params(clip_min=0., clip_max=1.)
        x_test_adv = df.generate(X_test)
        self.assertFalse((X_test == x_test_adv).all())
        y_pred = classifier.predict(x_test_adv)
        self.assertFalse((Y_test == y_pred).all())

        scores = classifier.evaluate(x_test_adv, Y_test)
        print('\naccuracy on adversarial examples: %.2f%%' % (scores[1] * 100))
    def _test_backend_mnist(self, classifier):
        # Get MNIST
        (x_train, y_train), (x_test, y_test), _, _ = load_mnist()
        x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN]
        x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST]

        # Test DeepFool
        attack = DeepFool(classifier, max_iter=5)
        x_test_adv = attack.generate(x_test)
        x_train_adv = attack.generate(x_train)

        self.assertFalse((x_train == x_train_adv).all())
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((y_train == train_y_pred).all())
        self.assertFalse((y_test == test_y_pred).all())

        acc = np.sum(
            np.argmax(train_y_pred, axis=1) == np.argmax(
                y_train, axis=1)) / y_train.shape[0]
        print('\nAccuracy on adversarial train examples: %.2f%%' % (acc * 100))

        acc = np.sum(
            np.argmax(test_y_pred, axis=1) == np.argmax(
                y_test, axis=1)) / y_test.shape[0]
        print('\nAccuracy on adversarial test examples: %.2f%%' % (acc * 100))
Ejemplo n.º 4
0
    def test_multi_attack_mnist(self):
        """
        Test the adversarial trainer using two attackers: FGSM and DeepFool. The source and target models of the attack
        are two CNNs on MNIST trained for 5 epochs. FGSM and DeepFool both generate the attack images on the same
        source classifier. The test cast check if accuracy on adversarial samples increases
        after adversarially training the model.

        :return: None
        """
        session = tf.Session()
        k.set_session(session)

        # Load MNIST
        (x_train, y_train), (x_test, y_test), _, _ = load_dataset('mnist')
        x_train, y_train, x_test, y_test = x_train[:
                                                   NB_TRAIN], y_train[:
                                                                      NB_TRAIN], x_test[:
                                                                                        NB_TEST], y_test[:
                                                                                                         NB_TEST]
        im_shape = x_train[0].shape

        # Create and fit target classifier
        comp_params = {
            'loss': 'categorical_crossentropy',
            'optimizer': 'adam',
            'metrics': ['accuracy']
        }
        params = {'epochs': 5, 'batch_size': BATCH_SIZE}
        classifier_tgt = CNN(im_shape, dataset='mnist')
        classifier_tgt.compile(comp_params)
        classifier_tgt.fit(x_train, y_train, **params)

        # Create source classifier
        classifier_src = CNN(im_shape, dataset='mnist')
        classifier_src.compile(comp_params)
        classifier_tgt.fit(x_train, y_train, **params)

        # Create FGSM and DeepFool attackers
        adv1 = FastGradientMethod(classifier_src, session)
        adv2 = DeepFool(classifier_src, session)
        x_adv = np.vstack((adv1.generate(x_test), adv2.generate(x_test)))
        y_adv = np.vstack((y_test, y_test))
        print(y_adv.shape)
        acc = classifier_tgt.evaluate(x_adv, y_adv)

        # Perform adversarial training
        adv_trainer = AdversarialTrainer(classifier_tgt, [adv1, adv2])
        adv_trainer.fit(x_train, y_train, **params)

        # Evaluate that accuracy on adversarial sample has improved
        acc_adv_trained = adv_trainer.classifier.evaluate(x_adv, y_adv)
        self.assertTrue(acc_adv_trained >= acc)
Ejemplo n.º 5
0
    def test_iris_pt(self):
        (_, _), (x_test, y_test) = self.iris
        classifier = get_iris_classifier_pt()

        attack = DeepFool(classifier, max_iter=5, batch_size=128)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info(
            'Accuracy on Iris with DeepFool adversarial examples: %.2f%%',
            (acc * 100))
Ejemplo n.º 6
0
    def test_partial_grads(self):
        # Get MNIST
        (_, _), (x_test, y_test) = self.mnist

        attack = DeepFool(self.classifier_k, max_iter=2, nb_grads=3)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())

        test_y_pred = get_labels_np_array(
            self.classifier_k.predict(x_test_adv))
        self.assertFalse((y_test == test_y_pred).all())

        acc = np.sum(
            np.argmax(test_y_pred, axis=1) == np.argmax(
                y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on adversarial test examples: %.2f%%',
                    (acc * 100))
Ejemplo n.º 7
0
    def test_iris_k_unbounded(self):
        (_, _), (x_test, y_test) = self.iris
        classifier, _ = get_iris_classifier_kr()

        # Recreate a classifier without clip values
        classifier = KerasClassifier(model=classifier._model,
                                     use_logits=False,
                                     channel_index=1)
        attack = DeepFool(classifier, max_iter=5, batch_size=128)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info(
            'Accuracy on Iris with DeepFool adversarial examples: %.2f%%',
            (acc * 100))
Ejemplo n.º 8
0
def GetAttackers(classifier, x_test, attacker_name):
    """
    Function:
        Load classifier and generate adversarial samples
    """
    t_start = time.time()
    if attacker_name == "FGSM":
        attacker = FastGradientMethod(classifier=classifier, eps=0.3)
    elif attacker_name == "Elastic":
        attacker = ElasticNet(classifier=classifier, confidence=0.5)
    elif attacker_name == "BasicIterativeMethod":
        attacker = BasicIterativeMethod(classifier=classifier, eps=0.3)
    elif attacker_name == "NewtonFool":
        attacker = NewtonFool(classifier=classifier, max_iter=20)
    elif attacker_name == "HopSkipJump":
        attacker = HopSkipJump(classifier=classifier, max_iter=20)
    elif attacker_name == "ZooAttack":
        attacker = ZooAttack(classifier=classifier, max_iter=20)
    elif attacker_name == "VirtualAdversarialMethod":
        attacker = VirtualAdversarialMethod(classifier=classifier, max_iter=20)
    elif attacker_name == "UniversalPerturbation":
        attacker = UniversalPerturbation(classifier=classifier, max_iter=20)
    elif attacker_name == "AdversarialPatch":
        attacker = AdversarialPatch(classifier=classifier, max_iter=20)
    elif attacker_name == "Attack":
        attacker = Attack(classifier=classifier)
    elif attacker_name == "BoundaryAttack":
        attacker = BoundaryAttack(classifier=classifier,
                                  targeted=False,
                                  epsilon=0.05,
                                  max_iter=20)  #, max_iter=20
    elif attacker_name == "CarliniL2":
        attacker = CarliniL2Method(classifier=classifier,
                                   confidence=0.5,
                                   learning_rate=0.001,
                                   max_iter=15)
    elif attacker_name == "CarliniLinf":
        attacker = CarliniLInfMethod(classifier=classifier,
                                     confidence=0.5,
                                     learning_rate=0.001,
                                     max_iter=15)
    elif attacker_name == "DeepFool":
        attacker = DeepFool(classifier)
    elif attacker_name == "SMM":
        attacker = SaliencyMapMethod(classifier=classifier, theta=2)
    elif attacker_name == "PGD":
        attacker = ProjectedGradientDescent(classifier=classifier,
                                            norm=2,
                                            eps=1,
                                            eps_step=0.5)
    else:
        raise ValueError("Please get the right attacker's name for the input.")
    test_adv = attacker.generate(x_test)
    dt = time.time() - t_start
    return test_adv, dt
    classifier = KerasClassifier((0, 1), model, use_logits=False)
    return classifier


# Get session
session = tf.Session()
k.set_session(session)

# Read MNIST dataset
(x_train, y_train), (x_test, y_test), min_, max_ = load_mnist()

# Construct and train a convolutional neural network on MNIST using Keras
source = cnn_mnist_k(x_train.shape[1:])
source.fit(x_train, y_train, nb_epochs=5, batch_size=128)

# Craft adversarial samples with DeepFool
adv_crafter = DeepFool(source)
x_train_adv = adv_crafter.generate(x_train)
x_test_adv = adv_crafter.generate(x_test)

# Construct and train a convolutional neural network
target = cnn_mnist_tf(x_train.shape[1:])
target.fit(x_train, y_train, nb_epochs=5, batch_size=128)

# Evaluate the CNN on the adversarial samples
preds = target.predict(x_test_adv)
acc = np.sum(np.equal(np.argmax(preds, axis=1), np.argmax(
    y_test, axis=1))) / y_test.shape[0]
print("\nAccuracy on adversarial samples: %.2f%%" % (acc * 100))
Ejemplo n.º 10
0
    The `ratio` determines how many of the clean samples in each batch are replaced with their adversarial counterpart.
    warning: Both successful and unsuccessful adversarial samples are used for training. In the case of unbounded attacks
            (e.g., DeepFool), this can result in invalid (very noisy) samples being included.
    """
    if adv_train_attack == "FGM":
        attacks = FastGradientMethod(robust_classifier, eps=attack_par["epsilon"], norm=attack_par["norm_type"])
    elif adv_train_attack == "BIM":
        attacks = BasicIterativeMethod(robust_classifier, norm=attack_par["norm_type"], eps=attack_par["epsilon"],
                                              eps_step=attack_par["epsilon_step"], max_iter=attack_par["max_iteration"])
    elif adv_train_attack == "PGD":
        attacks = ProjectedGradientDescent(robust_classifier, norm=attack_par["norm_type"], eps=attack_par["epsilon"],
                                          eps_step=attack_par["epsilon_step"], max_iter=attack_par["max_iteration"])
    elif adv_train_attack == "JSMA":
        attacks = SaliencyMapMethod(robust_classifier, theta=attack_par["theta"], gamma=attack_par["gamma"])
    elif adv_train_attack == "DeepFool":
        attacks = DeepFool(robust_classifier, max_iter=attack_par["max_iteration"], epsilon=attack_par["epsilon"])


    # ==================================2-2.开始对抗训练 =============================== #
    trainer = AdversarialTrainer(robust_classifier, attacks, ratio=ratio_value)
    trainer.fit(x_train, y_train, nb_epochs=adv_train_num, batch_size=128, verbose=2)
    robust_classifier_model.save("./model/adv_model")
    end_time = time.time()
    model = load_model("./model/adv_model")
    scores = model.evaluate(x_test, y_test, verbose=0)
    # print('Test loss:', scores[0])
    # print('Test accuracy:', scores[1])
    # print("adv_model generation's timecost: " + str(end_time - begin_time))
    print("adversarial_training completed!")
    os.system("echo " + data_type + "_" + model_type + ": " + str(end_time - begin_time) + " > ../evaluation/adv_time.txt")
    os.system("rm -r ./model/origin_model")
Ejemplo n.º 11
0
x_test_adv_robust = attacker_robust.generate(x_test[:100])
x_test_adv_robust_pred = np.argmax(
    robust_classifier.predict(x_test_adv_robust), axis=1)
nb_correct_adv_robust_pred = np.sum(x_test_adv_robust_pred == true_label)
print("Correctly classified against PGD attack: {}".format(
    nb_correct_adv_robust_pred))
# CW
attacker_robust = cw(robust_classifier, targeted=False, batch_size=100)
x_test_adv_robust = attacker_robust.generate(x_test[:100])
x_test_adv_robust_pred = np.argmax(
    robust_classifier.predict(x_test_adv_robust), axis=1)
nb_correct_adv_robust_pred = np.sum(x_test_adv_robust_pred == true_label)
print("Correctly classified against CW attack: {}".format(
    nb_correct_adv_robust_pred))
# DeepFool
adv_crafter_df = DeepFool(robust_classifier)
img_adv_df = adv_crafter_df.generate(x_test[0:100])
x_test_adv_robust_pred_df = np.argmax(robust_classifier.predict(img_adv_df),
                                      axis=1)
nb_correct_adv_robust_pred_df = np.sum(x_test_adv_robust_pred_df == true_label)
print("Correctly classified against DeepFool attack: {}".format(
    nb_correct_adv_robust_pred_df))

# Normal images
original_model = load_model('saved_models/mnist_cnn_original.h5')  # original
classifier = KerasClassifier(clip_values=(0, 1),
                             model=original_model,
                             use_logits=False)
x_test_pred = np.argmax(classifier.predict(x_test), axis=1)
nb_correct_pred = np.sum(
    x_test_pred == np.argmax(y_test, axis=1)) / y_test.shape[0] * 100
Ejemplo n.º 12
0
    def attack(self, model=None, attack_str=""):
        imgs = self._load_images(attack_str, self._test_or_val_dataset)

        if self._test_or_val_dataset == "_x_test_set_":
            X = self.__data.x_test
            Y = self.__data.y_test
        else:
            X = self.__data.x_val
            Y = self.__data.y_val

        if type(imgs) != type(None):
            print('\n{0} adversarial examples using {1} attack loaded...\n'.
                  format(self.__dataset, self.__attack))
            return imgs

        if type(model) == type(None):
            model = self.surrogate_model.fit(self.__data.x_train,
                                             self.__data.y_train,
                                             verbose=1,
                                             epochs=self.__epochs,
                                             batch_size=128)
            wrap = KerasClassifier((0., 1.), model=self.surrogate_model)
        else:
            wrap = KerasClassifier((0., 1.), model=model)

        if self.__attack == 'FGSM':
            print('\nCrafting adversarial examples using FGSM attack...\n')
            fgsm = FastGradientMethod(wrap)

            if self.__data.dataset_name == 'MNIST':
                x_adv_images = fgsm.generate(x=X[self.idx_adv][:self._length],
                                             eps=0.2)
            else:
                x_adv_images = fgsm.generate(x=X[self.idx_adv][:self._length],
                                             eps=0.025)

            path = os.path.join(
                self._attack_dir,
                self.__dataset.lower() + self._test_or_val_dataset +
                "fgsm.pkl")
            helpers.save_pkl(x_adv_images, path)

        elif self.__attack.startswith("CW"):
            print('\nCrafting adversarial examples using CW attack...\n')
            cw = CarliniL2Method(wrap,
                                 confidence=0.0,
                                 targeted=False,
                                 binary_search_steps=1,
                                 learning_rate=0.2,
                                 initial_const=10,
                                 max_iter=100)
            x_adv_images = cw.generate(X[self.idx_adv][:self._length])

            path = os.path.join(
                self._attack_dir,
                self.__dataset.lower() + self._test_or_val_dataset + "cw.pkl")
            helpers.save_pkl(x_adv_images, path)

        elif self.__attack == 'BIM':
            print('\nCrafting adversarial examples using BIM attack...\n')

            if self.__dataset == 'MNIST':
                bim = BasicIterativeMethod(wrap,
                                           eps=0.25,
                                           eps_step=0.2,
                                           max_iter=100,
                                           norm=np.inf)
            if self.__dataset == 'CIFAR':
                bim = BasicIterativeMethod(wrap,
                                           eps=0.025,
                                           eps_step=0.01,
                                           max_iter=1000,
                                           norm=np.inf)

            x_adv_images = bim.generate(x=X[self.idx_adv][:self._length])
            path = os.path.join(
                self._attack_dir,
                self.__dataset.lower() + self._test_or_val_dataset + "bim.pkl")
            helpers.save_pkl(x_adv_images, path)

        elif self.__attack == 'DEEPFOOL':
            print('\nCrafting adversarial examples using DeepFool attack...\n')

            deepfool = DeepFool(wrap)
            x_adv_images = deepfool.generate(x=X[self.idx_adv][:self._length])
            path = os.path.join(
                self._attack_dir,
                self.__dataset.lower() + self._test_or_val_dataset +
                "deepfool.pkl")
            helpers.save_pkl(x_adv_images, path)

        return x_adv_images
Ejemplo n.º 13
0
    'loss': 'categorical_crossentropy',
    'optimizer': 'adam',
    'metrics': ['accuracy']
}
classifier = CNN(im_shape, act='relu', dataset='cifar10')
classifier.compile(comp_params)
classifier.fit(x_train,
               y_train,
               validation_split=.1,
               epochs=10,
               batch_size=128)

# Craft adversarial samples with DeepFool
print('Create DeepFool attack')
epsilon = .1  # Maximum perturbation
adv_crafter = DeepFool(classifier, sess=session)
print('Craft training examples')
x_train_adv = adv_crafter.generate(x_val=x_train,
                                   eps=epsilon,
                                   clip_min=min_,
                                   clip_max=max_)
print('Craft test examples')
x_test_adv = adv_crafter.generate(x_val=x_test,
                                  eps=epsilon,
                                  clip_min=min_,
                                  clip_max=max_)

# Evaluate the classifier on the adversarial samples
scores = classifier.evaluate(x_test, y_test)
print("\nClassifier before adversarial training")
print(
    # FGSM
    adv_crafter_fgsm = FastGradientMethod(cifar_classifier,
                                          eps=epsilon,
                                          eps_step=0.01,
                                          batch_size=batch_size)
    x_test_adv = adv_crafter_fgsm.generate(x=test_dataset_array)

    # Test the classifier on adversarial exmaples
    predictions = cifar_classifier.predict(x_test_adv)
    accuracy = np.sum(
        np.argmax(predictions, axis=1) == test_label_dataset_array) / len(
            test_label_dataset_array)
    print('Accuracy after FGSM attack: {}%'.format(accuracy * 100))

    # Deepfool
    adv_crafter_deepfool = DeepFool(cifar_classifier, batch_size=batch_size)
    x_test_adv = adv_crafter_deepfool.generate(x=test_dataset_array)

    predictions = cifar_classifier.predict(x_test_adv)
    accuracy = np.sum(
        np.argmax(predictions, axis=1) == test_label_dataset_array) / len(
            test_label_dataset_array)
    print('Accuracy after DeepFool attack: {}%'.format(accuracy * 100))

    # C&W

    adv_crafter_cwinf = CarliniLInfMethod(cifar_classifier,
                                          eps=epsilon,
                                          batch_size=batch_size)
    x_test_adv = adv_crafter_cwinf.generate(x=test_dataset_array)
# Evaluate the classifier on the test set
preds = np.argmax(classifier.predict(x_test), axis=1)
acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0]
print("\nTest accuracy: %.2f%%" % (acc * 100))

# Craft adversarial samples with FGSM
epsilon = .1  # Maximum perturbation
adv_crafter = FastGradientMethod(classifier)
x_test_adv = adv_crafter.generate(x=x_test, eps=epsilon)
print(x_test_adv.shape)

local_path = "C:\\Users\\alonh\Documents\\Thesis\\MNIST-adversarial-images\\"
np.save(local_path + "adv_img_list_FGSM.npy", x_test_adv)

adv_crafter = DeepFool(classifier)
x_test_adv_DeepFool = adv_crafter.generate(x_test)
np.save(local_path + "adv_img_list_DeepFool.npy", x_test_adv_DeepFool)

# Evaluate the classifier on the adversarial examples
preds = np.argmax(classifier.predict(x_test_adv), axis=1)
acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0]
print("\nTest accuracy on adversarial sample: %.2f%%" % (acc * 100))

# reset and restore old variables
local_path = "C:\\Users\\alonh\Documents\\Thesis\\MNIST-adversarial-images\\original-nn-data.ckpt"

model_yaml = model.to_yaml()
with open(
        "C:\\Users\\alonh\\Documents\\Thesis\\MNIST-adversarial-images\\model.yaml",
        "w") as yaml_file:
Ejemplo n.º 16
0
                                           clip_max=max_)
        X_test_adv = adv_crafter.generate(x_val=x_test,
                                          eps=e,
                                          clip_min=min_,
                                          clip_max=max_)

        if args.save:
            np.save(os.path.join(SAVE_ADV, "eps%.2f_train.npy" % eps),
                    X_train_adv)
            np.save(os.path.join(SAVE_ADV, "eps%.2f_test.npy" % eps),
                    X_test_adv)

else:
    if args.adv_method == 'deepfool':
        adv_crafter = DeepFool(classifier,
                               session,
                               clip_min=min_,
                               clip_max=max_)
    elif args.adv_method == 'jsma':
        adv_crafter = SaliencyMapMethod(classifier,
                                        sess=session,
                                        clip_min=min_,
                                        clip_max=max_,
                                        gamma=1,
                                        theta=max_)
    elif args.adv_method == 'carlini':
        adv_crafter = CarliniL2Method(classifier,
                                      sess=session,
                                      targeted=False,
                                      confidence=10)
    else:
        adv_crafter = UniversalPerturbation(classifier,
Ejemplo n.º 17
0
def evaluation(x_test, y_test, classify_idx_lst, model, test_acc, ws,
               current_line, attack_name, flag, column_i):

    classifier = KerasClassifier((0., 1.), model=model)

    if attack_name == "FGM":
        # ===========================参数设置========================= #
        # Maximum perturbation
        # Order of the norm
        parameter_lst = [[10, 1], [20, 1], [30, 1], [40, 1], [50, 1], [60, 1],
                         [70, 1], [80, 1], [90, 1], [100, 1], [1, 2], [2, 2],
                         [3, 2], [4, 2], [5, 2], [6, 2], [7, 2], [8, 2],
                         [9, 2], [10, 2], [0.05, np.inf], [0.10, np.inf],
                         [0.15, np.inf], [0.20, np.inf], [0.25, np.inf],
                         [0.30, np.inf], [0.35, np.inf], [0.40, np.inf],
                         [0.45, np.inf], [0.50, np.inf]]
        # ===========================进行攻击========================= #
        for [epsilon, norm_type] in parameter_lst:
            # print("current parameter: " + str(epsilon) + ", " + str(norm_type))
            adv_crafter = FastGradientMethod(classifier)
            x_test_adv = adv_crafter.generate(x=x_test[classify_idx_lst],
                                              eps=epsilon,
                                              norm=norm_type)
            score = model.evaluate(x_test_adv,
                                   y_test[classify_idx_lst],
                                   verbose=0)
            acc = score[1]
            ws.write(current_line, 0, attack_name)
            ws.write(
                current_line, 1,
                "(" + str(round(epsilon, 4)) + ", " + str(norm_type) + ")")
            if flag == "ori":
                ws.write(current_line, 3, test_acc)
                ws.write(current_line, 4, acc)
            elif flag == "adv":
                ws.write(current_line, 3, test_acc)
                ws.write(current_line, 4, acc)
            else:
                ws.write(current_line, 0 + 3 * column_i, test_acc)
                ws.write(current_line, 1 + 3 * column_i, acc)
            current_line += 1

    elif attack_name == "BIM":
        # ===========================参数设置========================= #
        # Order of the norm
        # Maximum perturbation that the attacker can introduce
        # Attack step size (input variation) at each iteration
        # The maximum number of iterations.
        parameter_lst = [[1, 20.0, 2.0, 10], [1, 20.0, 4.0, 10],
                         [1, 20.0, 6.0, 10], [1, 20.0, 8.0, 10],
                         [1, 20.0, 10.0, 10], [1, 20.0, 2.0, 50],
                         [1, 20.0, 4.0, 50], [1, 20.0, 6.0, 50],
                         [1, 20.0, 8.0, 50], [1, 20.0, 10.0, 50],
                         [2, 2.0, 0.2, 10], [2, 2.0, 0.4,
                                             10], [2, 2.0, 0.6, 10],
                         [2, 2.0, 0.8, 10], [2, 2.0, 1.0,
                                             10], [2, 2.0, 0.2, 50],
                         [2, 2.0, 0.4, 50], [2, 2.0, 0.6, 50],
                         [2, 2.0, 0.8, 50], [2, 2.0, 1.0, 50],
                         [np.inf, 0.1, 0.002, 10], [np.inf, 0.1, 0.004, 10],
                         [np.inf, 0.1, 0.006, 10], [np.inf, 0.1, 0.008, 10],
                         [np.inf, 0.1, 0.010, 10], [np.inf, 0.1, 0.002, 50],
                         [np.inf, 0.1, 0.004, 50], [np.inf, 0.1, 0.006, 50],
                         [np.inf, 0.1, 0.008, 50], [np.inf, 0.1, 0.010, 50]]
        # ===========================进行攻击========================= #
        for [norm_type, epsilon, epsilon_step, max_iteration] in parameter_lst:
            # print("current parameter: " + str(norm_type) + ", " + str(epsilon) + ", " + str(epsilon_step) + ", " + str(
            #     max_iteration))
            adv_crafter = BasicIterativeMethod(classifier)
            x_test_adv = adv_crafter.generate(x=x_test[classify_idx_lst],
                                              norm=norm_type,
                                              eps=epsilon,
                                              eps_step=epsilon_step,
                                              max_iter=max_iteration)
            score = model.evaluate(x_test_adv,
                                   y_test[classify_idx_lst],
                                   verbose=0)
            acc = score[1]
            ws.write(current_line, 0, attack_name)
            ws.write(
                current_line, 1,
                "(" + str(norm_type) + ", " + str(round(epsilon, 4)) + ", " +
                str(round(epsilon_step, 4)) + ", " + str(max_iteration) + ")")
            if flag == "ori":
                ws.write(current_line, 3, test_acc)
                ws.write(current_line, 4, acc)
            elif flag == "adv":
                ws.write(current_line, 3, test_acc)
                ws.write(current_line, 4, acc)
            else:
                ws.write(current_line, 0 + 3 * column_i, test_acc)
                ws.write(current_line, 1 + 3 * column_i, acc)
            current_line += 1

    elif attack_name == "JSMA":
        # ===========================参数设置========================= #
        # Perturbation introduced to each modified feature per step (can be positive or negative).
        # Maximum percentage of perturbed features (between 0 and 1).
        parameter_lst = [[0.5, 0.5], [0.4, 0.5], [0.3, 0.5], [0.2, 0.5],
                         [0.1, 0.5], [-0.1, 0.5], [-0.2, 0.5], [-0.3, 0.5],
                         [-0.4, 0.5], [-0.5, 0.5]]
        # ===========================进行攻击========================= #
        for [theta, gamma] in parameter_lst:
            # print("current parameter: " + str(theta) + ", " + str(gamma))
            adv_crafter = SaliencyMapMethod(classifier)
            x_test_adv = adv_crafter.generate(x=x_test[classify_idx_lst],
                                              theta=theta,
                                              gamma=gamma)
            score = model.evaluate(x_test_adv,
                                   y_test[classify_idx_lst],
                                   verbose=0)
            acc = score[1]
            ws.write(current_line, 0, attack_name)
            ws.write(
                current_line, 1,
                "(" + str(round(theta, 4)) + ", " + str(round(gamma, 4)) + ")")

            if flag == "ori":
                ws.write(current_line, 3, test_acc)
                ws.write(current_line, 4, acc)
            elif flag == "adv":
                ws.write(current_line, 3, test_acc)
                ws.write(current_line, 4, acc)
            else:
                ws.write(current_line, 0 + 3 * column_i, test_acc)
                ws.write(current_line, 1 + 3 * column_i, acc)
            current_line += 1

    elif attack_name == "DeepFool":
        # ===========================参数设置========================= #
        # The maximum number of iterations.
        # Overshoot parameter.
        parameter_lst = [[2, 0.10], [4, 0.10], [6, 0.10], [8, 0.10],
                         [10, 0.10], [12, 0.10], [14, 0.10], [16, 0.10],
                         [18, 0.10], [20, 0.10]]
        # ===========================进行攻击========================= #
        for [max_iteration, epsilon] in parameter_lst:
            # print("current parameter: " + str(max_iteration) + ", " + str(epsilon))
            adv_crafter = DeepFool(classifier)
            x_test_adv = adv_crafter.generate(x=x_test[classify_idx_lst],
                                              max_iter=max_iteration,
                                              epsilon=epsilon)
            score = model.evaluate(x_test_adv,
                                   y_test[classify_idx_lst],
                                   verbose=0)
            acc = score[1]
            ws.write(current_line, 0, attack_name)
            ws.write(
                current_line, 1,
                "(" + str(max_iteration) + ", " + str(round(epsilon, 4)) + ")")
            if flag == "ori":
                ws.write(current_line, 3, test_acc)
                ws.write(current_line, 4, acc)
            elif flag == "adv":
                ws.write(current_line, 3, test_acc)
                ws.write(current_line, 4, acc)
            else:
                ws.write(current_line, 0 + 3 * column_i, test_acc)
                ws.write(current_line, 1 + 3 * column_i, acc)
            current_line += 1

    elif attack_name == "CW-L2":
        # ===========================参数设置========================= #
        # confidence: Confidence of adversarial examples: a higher value produces examples that are farther away,
        #         from the original input, but classified with higher confidence as the target class.
        # The maximum number of iterations.
        parameter_lst = [[0, 1], [0, 2], [0, 3], [0, 4], [0, 5]]
        # ===========================进行攻击========================= #
        for [confidence_value, max_iter_value] in parameter_lst:
            # print("current parameter: " + str(confidence_value) + ", " + str(max_iter_value))
            adv_crafter = CarliniL2Method(classifier)
            sum_adv_acc = 0
            for adv_label in range(0, 10):
                one_hot_label = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
                one_hot_label[adv_label] = 1
                x_test_adv = adv_crafter.generate(
                    x=x_test[classify_idx_lst],
                    confidence=confidence_value,
                    targeted=True,
                    max_iter=max_iter_value,
                    y=np.array([one_hot_label] *
                               x_test[classify_idx_lst].shape[0]))
                score = model.evaluate(x_test_adv,
                                       y_test[classify_idx_lst],
                                       verbose=0)
                acc = score[1]
                sum_adv_acc += acc
            ws.write(current_line, 0, attack_name)
            ws.write(
                current_line, 1, "(" + str(round(confidence_value, 4)) + ", " +
                str(max_iter_value) + ")")
            if flag == "ori":
                ws.write(current_line, 3, test_acc)
                ws.write(current_line, 4, sum_adv_acc / 10)
            elif flag == "adv":
                ws.write(current_line, 3, test_acc)
                ws.write(current_line, 4, sum_adv_acc / 10)
            else:
                ws.write(current_line, 0 + 3 * column_i, test_acc)
                ws.write(current_line, 1 + 3 * column_i, sum_adv_acc / 10)
            current_line += 1

    elif attack_name == "CW-Linf":
        # ===========================参数设置========================= #
        # confidence: Confidence of adversarial examples: a higher value produces examples that are farther away,
        #         from the original input, but classified with higher confidence as the target class.
        # The maximum number of iterations.
        parameter_lst = [[0, 1], [0, 2], [0, 3], [0, 4], [0, 5]]
        # ===========================进行攻击========================= #
        for [confidence_value, max_iter_value] in parameter_lst:
            # print("current parameter: " + str(confidence_value) + ", " + str(max_iter_value))
            adv_crafter = CarliniLInfMethod(classifier)
            sum_adv_acc = 0
            for adv_label in range(0, 10):
                one_hot_label = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
                one_hot_label[adv_label] = 1
                x_test_adv = adv_crafter.generate(
                    x=x_test[classify_idx_lst],
                    confidence=confidence_value,
                    targeted=True,
                    max_iter=max_iter_value,
                    y=np.array([one_hot_label] *
                               x_test[classify_idx_lst].shape[0]))
                score = model.evaluate(x_test_adv,
                                       y_test[classify_idx_lst],
                                       verbose=0)
                acc = score[1]
                sum_adv_acc += acc
            ws.write(current_line, 0, attack_name)
            ws.write(
                current_line, 1, "(" + str(round(confidence_value, 4)) + ", " +
                str(max_iter_value) + ")")
            if flag == "ori":
                ws.write(current_line, 3, test_acc)
                ws.write(current_line, 4, sum_adv_acc / 10)
            elif flag == "adv":
                ws.write(current_line, 3, test_acc)
                ws.write(current_line, 4, sum_adv_acc / 10)
            else:
                ws.write(current_line, 0 + 3 * column_i, test_acc)
                ws.write(current_line, 1 + 3 * column_i, sum_adv_acc / 10)
            current_line += 1

    current_line += 1
    # print("\n------------------------------------------------")
    return ws, current_line
Ejemplo n.º 18
0
# Read MNIST dataset
(x_train, y_train), (x_test, y_test), min_, max_ = load_mnist()

# Construct and train a convolutional neural network on MNIST using Keras
source = cnn_mnist_k()
source.compile(loss=keras.losses.categorical_crossentropy,
               optimizer=Adam(lr=0.01),
               metrics=['accuracy'])
source = KerasClassifier(clip_values=(min_, max_),
                         model=source,
                         use_logits=False)
source.fit(x_train, y_train, nb_epochs=5, batch_size=128)

# Craft adversarial samples with DeepFool
adv_crafter = DeepFool(source)
x_test_adv = adv_crafter.generate(x_test)

# Compare with existing Adversarial Training (from ART)
robust_classifier = load_model('saved_models/mnist_cnn_robust.h5')
robust_classifier = KerasClassifier(clip_values=(0, 1),
                                    model=robust_classifier,
                                    use_logits=False)
print('compare_transfer.py for mnist dataset v2')
print('based on inf norm')

# Normal images
original_model = load_model('saved_models/mnist_cnn_original.h5')  # original
classifier = KerasClassifier(clip_values=(0, 1),
                             model=original_model,
                             use_logits=False)
Ejemplo n.º 19
0
     if (preds_pgd_random[i] != preds_pgd[i]):
         TP_comb_pgd = TP_comb_pgd + 1
     else:
         TP_comb_pgd = TP_comb_pgd + Tpgd
 TPR_pgd_random = TP_pgd_random / adv_sample_cw
 print("\nTPR for PGD when random noise is added: %.3f%%" %
       (TPR_pgd_random * 100))
 TPR_pgd_comb = TP_comb_pgd / adv_sample_cw
 TPR_pgd = TP_pgd / adv_sample_cw
 print("\nTPR for PGD: %.3f%%" % (TPR_pgd * 100))
 print("\nTPR for PGD when combining: %.3f%%" % (TPR_pgd_comb * 100))
 # =============================================================================
 #     # Craft adversarial samples using DeepFool
 #   check
 # =============================================================================
 attack_DeepFool = DeepFool(classifier)
 x_test_adv_df = attack_DeepFool.generate(
     x=x_test[3 * adv_sample:3 * adv_sample + adv_sample_cw])
 # Evaluate the classifier on the adversarial examples
 # add test image noise
 x_test_adv_df_random = x_test_adv_df + np.random.normal(
     mean, 0.01, x_test_adv_df.shape)
 preds_df_random = np.argmax(classifier.predict(x_test_adv_df_random),
                             axis=1)
 preds_df = np.argmax(classifier.predict(x_test_adv_df), axis=1)
 y_adv = y_test[3 * adv_sample:3 * adv_sample + adv_sample_cw]
 TP_df_random = 0
 TP_df = 0
 TP_comb_df = 0
 for i in np.arange(adv_sample_cw):
     diff_random = x_test[i + 3 * adv_sample] - x_test_adv_df_random[i]