Beispiel #1
0
def get_DeepFool_adversarial(targeted, xs, classifier, batch_size):

    # Targeted DeepFool attack not possible
    if targeted:
        print('DeepFool attack cannot be targeted.')
        exit()

    ATTACK_BATCH = batch_size
    samples_range = int(xs.shape[0] / ATTACK_BATCH)

    wrap = KerasModelWrapper(classifier)
    attack = DeepFool(wrap, sess=K.get_session())
    fgsm_params = {
        'overshoot': 0.02,
        'max_iter': 50,
        'nb_candidate': 2,
        'clip_min': -5,
        'clip_max': 5
    }

    attack_xs = attack.generate_np(xs[:ATTACK_BATCH, :, :, :], **fgsm_params)
    for ii in range(1, samples_range):
        print('ITER', ii)
        new_attack_batch = attack.generate_np(
            xs[ii * ATTACK_BATCH:(ii + 1) * ATTACK_BATCH, :, :, :],
            **fgsm_params)
        attack_xs = np.concatenate((attack_xs, new_attack_batch), axis=0)
    return attack_xs
Beispiel #2
0
class TestDeepFool(CleverHansTest):
    def setUp(self):
        super(TestDeepFool, self).setUp()

        self.sess = tf.Session()
        self.model = SimpleModel()
        self.attack = DeepFool(self.model, sess=self.sess)

    def test_generate_np_gives_adversarial_example(self):
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val,
                                        over_shoot=0.02,
                                        max_iter=50,
                                        nb_candidate=2,
                                        clip_min=-5,
                                        clip_max=5)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)

        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

    def test_generate_gives_adversarial_example(self):

        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        x = tf.placeholder(tf.float32, x_val.shape)

        x_adv_p = self.attack.generate(x,
                                       over_shoot=0.02,
                                       max_iter=50,
                                       nb_candidate=2,
                                       clip_min=-5,
                                       clip_max=5)
        self.assertEqual(x_val.shape, x_adv_p.shape)
        x_adv = self.sess.run(x_adv_p, {x: x_val})

        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)

        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

    def test_generate_np_gives_clipped_adversarial_examples(self):
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val,
                                        over_shoot=0.02,
                                        max_iter=50,
                                        nb_candidate=2,
                                        clip_min=-0.2,
                                        clip_max=0.3)

        self.assertTrue(-0.201 < np.min(x_adv))
        self.assertTrue(np.max(x_adv) < .301)
Beispiel #3
0
class TestDeepFool(CleverHansTest):
    def setUp(self):
        super(TestDeepFool, self).setUp()
        import tensorflow as tf

        # The world's simplest neural network
        def my_model(x):
            W1 = tf.constant([[1.5, .3], [-2, 0.3]], dtype=tf.float32)
            h1 = tf.nn.sigmoid(tf.matmul(x, W1))
            W2 = tf.constant([[-2.4, 1.2], [0.5, -2.3]], dtype=tf.float32)
            res = tf.matmul(h1, W2)
            return res

        self.sess = tf.Session()
        self.model = my_model
        self.attack = DeepFool(self.model, sess=self.sess)

    def test_generate_np_gives_adversarial_example(self):
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val, over_shoot=0.02, max_iter=50,
                                        nb_candidate=2, clip_min=-5,
                                        clip_max=5)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)

        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

    def test_generate_gives_adversarial_example(self):
        import tensorflow as tf

        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        x = tf.placeholder(tf.float32, x_val.shape)

        x_adv_p = self.attack.generate(x, over_shoot=0.02, max_iter=50,
                                       nb_candidate=2, clip_min=-5, clip_max=5)
        x_adv = self.sess.run(x_adv_p, {x: x_val})

        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)

        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

    def test_generate_np_gives_clipped_adversarial_examples(self):
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val, over_shoot=0.02, max_iter=50,
                                        nb_candidate=2, clip_min=-0.2,
                                        clip_max=0.3)

        self.assertTrue(-0.201 < np.min(x_adv))
        self.assertTrue(np.max(x_adv) < .301)
Beispiel #4
0
def untargeted_attack(model, images):

    sess = backend.get_session()
    wrap = KerasModelWrapper(model)
    df = DeepFool(wrap, back='tf', sess=sess)
    adv_x = df.generate_np(images)
    return adv_x
Beispiel #5
0
class TestDeepFool(CleverHansTest):
    def setUp(self):
        super(TestDeepFool, self).setUp()

        self.sess = tf.Session()
        self.model = SimpleModel()
        self.attack = DeepFool(self.model, sess=self.sess)

    def test_generate_np_gives_adversarial_example(self):
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val, over_shoot=0.02, max_iter=50,
                                        nb_candidate=2, clip_min=-5,
                                        clip_max=5)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)

        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

    def test_generate_gives_adversarial_example(self):

        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        x = tf.placeholder(tf.float32, x_val.shape)

        x_adv_p = self.attack.generate(x, over_shoot=0.02, max_iter=50,
                                       nb_candidate=2, clip_min=-5, clip_max=5)
        x_adv = self.sess.run(x_adv_p, {x: x_val})

        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)

        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

    def test_generate_np_gives_clipped_adversarial_examples(self):
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val, over_shoot=0.02, max_iter=50,
                                        nb_candidate=2, clip_min=-0.2,
                                        clip_max=0.3)

        self.assertTrue(-0.201 < np.min(x_adv))
        self.assertTrue(np.max(x_adv) < .301)
    def query(self, X_train, Y_train, labeled_idx, amount):

        unlabeled_idx = get_unlabeled_idx(X_train, labeled_idx)
        unlabeled = X_train[unlabeled_idx]

        keras_wrapper = KerasModelWrapper(self.model)
        sess = K.get_session()
        deep_fool = DeepFool(keras_wrapper, sess=sess)
        deep_fool_params = {
            'over_shoot': 0.02,
            'clip_min': 0.,
            'clip_max': 1.,
            'nb_candidate': Y_train.shape[1],
            'max_iter': 10
        }
        true_predictions = np.argmax(self.model.predict(unlabeled,
                                                        batch_size=256),
                                     axis=1)
        adversarial_predictions = np.copy(true_predictions)
        while np.sum(true_predictions != adversarial_predictions) < amount:
            adversarial_images = np.zeros(unlabeled.shape)
            for i in range(0, unlabeled.shape[0], 100):
                print("At {i} out of {n}".format(i=i, n=unlabeled.shape[0]))
                if i + 100 > unlabeled.shape[0]:
                    adversarial_images[i:] = deep_fool.generate_np(
                        unlabeled[i:], **deep_fool_params)
                else:
                    adversarial_images[i:i + 100] = deep_fool.generate_np(
                        unlabeled[i:i + 100], **deep_fool_params)
            pertubations = adversarial_images - unlabeled
            norms = np.linalg.norm(np.reshape(pertubations,
                                              (unlabeled.shape[0], -1)),
                                   axis=1)
            adversarial_predictions = np.argmax(self.model.predict(
                adversarial_images, batch_size=256),
                                                axis=1)
            norms[true_predictions == adversarial_predictions] = np.inf
            deep_fool_params['max_iter'] *= 2

        selected_indices = np.argpartition(norms, amount)[:amount]

        del keras_wrapper
        del deep_fool
        gc.collect()

        return np.hstack((labeled_idx, unlabeled_idx[selected_indices]))
def get_DeepFool_samples(loaded_model, samples, max_iter):

    sess = backend.get_session()
    wrap = KerasModelWrapper(loaded_model)

    deepfool = DeepFool(wrap, sess=sess)
    deepfool_params = {
        'max_iter': max_iter,
        'clip_min': 0.,
        'clip_max': 1.,
        'nb_candidate': 10
    }

    adv_x = deepfool.generate_np(samples, **deepfool_params)

    return adv_x
def mnist_deepfool_attack(sample,
                          target,
                          model,
                          sess,
                          targeted=True,
                          attack_iterations=100):
    print('deepfool attack start')
    deepfool = DeepFool(model, sess=sess)
    deepfool_params = {
        'over_shoot': 0.02,
        'clip_min': 0.,
        'clip_max': 1.,
        'max_iter': 300,
        'nb_candidate': 2,
    }
    adv_x = deepfool.generate_np(sample, **deepfool_params)
    return adv_x
            X_train[st:st + batch_size].reshape(-1, 32 * 32 * 3) / 255,
            dtype=np.float)
        # sample = np.array([sample])
        sess = keras.backend.get_session()
        model = KerasModelWrapper(model_keras)
        attack = DeepFool(model, sess=sess)
        # print(model.predict(panda.reshape(1, *panda.shape)))

        param = dict(
            nb_candidate=10,
            overshoot=0.01,
            #overshoot=0.0,
            max_iter=20,
            clip_min=0.,
            clip_max=1.)
        advs = attack.generate_np(sample, **param)
        # plt.imsave("sample.png", advs[0])
        adv_train.append(advs)
        preb = model_keras.predict(advs).argmax(axis=1).reshape(
            (sample.shape[0], ))
        y_sample = model_keras.predict(sample).argmax(axis=1).reshape(
            (sample.shape[0], ))
        success += (preb != y_sample).sum()
        print((preb != y_sample).sum())

    print(success / data_size)
    new_train = np.concatenate(adv_train)
    np.save('new_train_deepfool', new_train)
    # plt.imshow(adv[0])
    # plt.show()
      if np.argmax(model.predict(x_test[i:i+1])) == np.argmax(y_test[i]):
        adv_inputs[j] = x_test[i]
        adv_labels[j] = y_test[i]
        # csvFile1.append([[i,j]])
        j += 1
    adv_inputs = adv_inputs[:100]
    adv_labels = adv_labels[:100]
    print("Legitimate test accuracy = %0.3f" % (j/y_test.shape[0]))
    print("Dataset of %d to be attacked." % adv_inputs.shape[0])
    print(adv_inputs.shape, adv_labels.shape)  

    # Attack
    wrap = KerasModelWrapper(model)
    deepfool = DeepFool(wrap, sess=sess)
    params = {}
    x_adv_1 = deepfool.generate_np(adv_inputs[:20], **params)
    x_adv_2 = deepfool.generate_np(adv_inputs[20:40], **params)
    x_adv_3 = deepfool.generate_np(adv_inputs[40:60], **params)
    x_adv_4 = deepfool.generate_np(adv_inputs[60:80], **params)
    x_adv_5 = deepfool.generate_np(adv_inputs[80:], **params)
    x_adv = np.concatenate((x_adv_1, x_adv_2, x_adv_3, x_adv_4, x_adv_5), axis=0)
    score = model.evaluate(x_adv, adv_labels, verbose=0)
    print('Adv. Test accuracy: %0.3f' % score[1])

    # Initialize random choosing of adversarial images
    num_examples = 100

    index_list = list(range(x_adv.shape[0]))
    import random
    random.seed(9123)
    random.shuffle(index_list)
Beispiel #11
0
    def get_adversarial_version(self,
                                x,
                                y=None,
                                eps=0.3,
                                iterations=100,
                                attack='FGSM',
                                targeted=False,
                                y_tar=None,
                                clip_min=0.0,
                                clip_max=1.0,
                                nb_candidate=10,
                                num_params=100):
        """
        Desc:
            Caclulate the adversarial version for point x using FGSM
            x: matrix of n x input_shape samples
            y: matrix of n x input_label samples
            eps: used for FGSM
            attack: FGMS or CW
        
        """
        if self.dataset == 'cifar10':
            model = KerasModelWrapper(self.model)
        else:
            model = KerasModelWrapper(self.model.model)
        if attack == 'CW-l2':
            K.set_learning_phase(0)
            # Instantiate a CW attack object
            cw = CarliniWagnerL2(model, sess=self.sess)

            cw_params = {
                'batch_size': 10,
                'confidence': 0,
                'learning_rate': 1e-2,
                'binary_search_steps': 5,
                'max_iterations': iterations,
                'abort_early': True,
                'initial_const': 1e-4,
                'clip_min': 0.0,
                'clip_max': 1.0
            }

            x_adv = cw.generate_np(x, **cw_params)

        elif attack == 'CW-l0':
            K.set_learning_phase(0)
            # Instantiate a CW attack object
            cw = CarliniWagnerL0(model, sess=self.sess)

            cw_params = {
                'batch_size': 1,
                'confidence': 0.,
                'learning_rate': 1e-2,
                'binary_search_steps': 5,
                'max_iterations': iterations,
                'abort_early': True,
                'initial_const': 1e-4,
                'clip_min': 0.0,
                'clip_max': 1.0
            }

            x_adv = cw.generate_np(x, **cw_params)

        elif attack == 'DF':
            K.set_learning_phase(0)
            df = DeepFool(model, sess=self.sess)
            df_params = {'nb_candidate': nb_candidate}
            x_adv = df.generate_np(x, **df_params)

        elif attack == 'JSMA':
            K.set_learning_phase(0)
            jsma = SaliencyMapMethod(model, sess=self.sess)
            jsma_params = {
                'theta': 1.,
                'gamma': 0.03,
                'clip_min': clip_min,
                'clip_max': clip_max,
                'y_target': y_tar
            }
            x_adv = jsma.generate_np(x, **jsma_params)

        elif attack == 'FGSM':
            K.set_learning_phase(0)
            fgsm = FastGradientMethod(model, sess=self.sess)
            fgsm_params = {
                'eps': 0.15,
                'clip_min': clip_min,
                'clip_max': clip_max,
                'y_target': y_tar
            }
            x_adv = fgsm.generate_np(x, **fgsm_params)

        elif attack == 'BIM':
            K.set_learning_phase(0)
            fgsm = BasicIterativeMethod(model, sess=self.sess)
            fgsm_params = {
                'eps': 0.015,
                'eps_iter': 0.005,
                'nb_iter': 100,
                'clip_min': clip_min,
                'clip_max': clip_max,
                'y_target': y_tar
            }
            x_adv = fgsm.generate_np(x, **fgsm_params)

        return x_adv
Beispiel #12
0
def mnist_tutorial_deepfool(train_start=0, train_end=60000, #读60000训练
                            test_start=0,test_end=10000, #读10000测试
                            viz_enabled=True, nb_epochs=6,
                            batch_size=128, nb_classes=2, source_samples=10,
                            learning_rate=0.001, attack_iterations=100,
                            model_path=os.path.join("models", "mnist")):
    """
    MNIST tutorial for Deepfool's attack
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples激活对抗例子
    :param nb_epochs: number of epochs to train model(一个epoch指代所有的数据送入网络中完成一次前向计算及反向传播的过程。)
    :param batch_size: size of training batches
    :param nb_classes: number of output classes(输出几类)
    :param source_samples: number of test inputs to attack(测试输入用于攻击的数量)
    :param learning_rate: learning rate for training(学习率)
    :param model_path: path to the model file(文件路径)
    :param attack_iterations: 攻击迭代次数
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies精确度报告
    report = AccuracyReport()

    # MNIST-specific dimensions图像尺寸28*28*1
    img_rows = 28
    img_cols = 28
    channels = 1

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph
    model = make_basic_picklable_cnn()
    preds = model(x)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow(构建训练模型)
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': os.path.join(*os.path.split(model_path)[:-1]),
        'filename': os.path.split(model_path)[-1]
    }

    rng = np.random.RandomState([2018, 8, 9])
    # check if we've trained before, and if we have, use that pre-trained model
    if os.path.exists(model_path+".meta"):
        tf_model_load(sess, model_path)
    else:
        model_train(sess, x, y, preds, X_train, Y_train, args=train_params,
                    save=os.path.exists("models"), rng=rng)
        print("save success")

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params)
    assert X_test.shape[0] == test_end - test_start, X_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using Carlini and Wagner's approach
    ###########################################################################
    print('Crafting ' + str(source_samples) + ' * ' + str(nb_classes-1) +
          ' adversarial examples')
    print("This could take some time ...")

    # Instantiate a DeepFool attack object
    deepfool = DeepFool(model, back='tf', sess=sess)


    idxs = [np.where(np.argmax(Y_test, axis=1) == i)[0][1] for i in range(10)]
    print("idxs:",idxs)

    # construct adv_inputs
    grid_shape = (nb_classes, 2, img_rows, img_cols, channels)
    grid_viz_data = np.zeros(grid_shape, dtype='f')
    print("grid_viz_data",grid_viz_data.shape)
    adv_inputs = X_test[idxs].reshape([-1,28,28,1])

    deepfool_params = {'nb_candidate': 10,
                       'overshoot': 0.02,
                       'max_iter': attack_iterations,
                       'nb_classes': 10,
                       'clip_min': 0.,
                       'clip_max': 1.}

    adv = deepfool.generate_np(adv_inputs, **deepfool_params)

    print("adv success")

    adv_accuracy = 1-model_eval(sess, x, y, preds, adv, Y_test[idxs],
                                args={'batch_size': 10})

    for j in range(10):
        grid_viz_data[j, 0] = adv_inputs[j]
        grid_viz_data[j, 1] = adv[j]

    print(grid_viz_data.shape)

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy))
    report.clean_train_adv_eval = 1.-adv_accuracy

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(np.sum((adv - adv_inputs)**2,
                                       axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        import matplotlib.pyplot as plt
        _ = grid_visual(grid_viz_data)

    return report
pred = np.argmax(keras_model.predict(x_test), axis=1)
acc = np.mean(np.equal(pred, y_test))

print("The Test accuracy is: {}".format(acc))

#################################### Adversarial Attack (DF=X_train (30000 samples) ###################################
wrap = KerasModelWrapper(keras_model)
df = DeepFool(wrap, back='tf', sess=sess)
df_params = {
    'over_shoot': 0.09,
    'max_iter': 10,
    'clip_max': 1,
    'clip_min': 0,
    'nb_candidate': 10
}
adv_x = df.generate_np(x_test, **df_params)
adv_conf = keras_model.predict(adv_x)
adv_pred = np.argmax(adv_conf, axis=1)
adv_acc = np.mean(np.equal(adv_pred, y_test))

print("The adversarial  accuracy is: {}".format(adv_acc))

###################################### Original Image ##########################################
x_sample = x_test[5].reshape(28, 28)
plt.imshow(x_sample, cmap='Blues')
plt.show()

###################################### Adversarial Image ########################################
adv_x_sample = adv_x[5].reshape(28, 28)
plt.imshow(adv_x_sample, cmap='Blues')
plt.show()