Beispiel #1
0
    def setUp(self):
        super(TestElasticNetMethod, self).setUp()
        import tensorflow as tf

        # The world's simplest neural network
        def my_model(x):
            W1 = tf.constant([[1.5, .3], [-2, 0.3]], dtype=tf.float32)
            h1 = tf.nn.sigmoid(tf.matmul(x, W1))
            W2 = tf.constant([[-2.4, 1.2], [0.5, -2.3]], dtype=tf.float32)
            res = tf.matmul(h1, W2)
            return res

        self.sess = tf.Session()
        self.model = my_model
        self.attack = ElasticNetMethod(self.model, sess=self.sess)
Beispiel #2
0
def main(_):
    # Images for inception classifier are normalized to be in [-1, 1] interval,
    # eps is a difference between pixels so it should be in [0, 2] interval.
    # Renormalizing epsilon from [0, 255] to [0, 2].
    batch_size = FLAGS.batch_size
    batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3]
    num_classes = 1001
    targeted = False
    tf.logging.set_verbosity(tf.logging.DEBUG)

    with tf.Graph().as_default():
        # Prepare graph
        x_input = tf.placeholder(tf.float32, shape=batch_shape)
        model = InceptionModel(num_classes)
        with tf.Session() as sess:

            enm = ENM(model, back='tf', sess=None)
            enm_params = {
                'beta': 0,
                'batch_size': batch_size,
                'learning_rate': 0.1,
                'max_iterations': 1000,
                'binary_search_steps': 9
            }

            x_adv = enm.generate(x_input, **mim_params)

            saver = tf.train.Saver(slim.get_model_variables())
            session_creator = tf.train.ChiefSessionCreator(
                scaffold=tf.train.Scaffold(saver=saver),
                checkpoint_filename_with_path=FLAGS.checkpoint_path,
                master=FLAGS.master)
            saver.restore(sess, FLAGS.checkpoint_path)
            sess.run(tf.global_variables_initializer())
            # with tf.train.MonitoredSession(session_creator=session_creator) as sess:
            i = 0
            for filenames, images in load_images(FLAGS.input_dir, batch_shape):
                adv_images = sess.run(x_adv, feed_dict={x_input: images})
                print "input images: ", images.shape
                #adv_images = cw.generate_np(images, **cw_params)
                i += 16
                print i
                # print filenames
                # print adv_images.shape
                # adv_images = cw.generate_np(
                save_images(adv_images, filenames, FLAGS.output_dir)
    def setUp(self):
        super(TestElasticNetMethod, self).setUp()

        self.sess = tf.Session()
        self.model = SimpleModel()
        self.attack = ElasticNetMethod(self.model, sess=self.sess)
Beispiel #4
0
fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
adv_x = fgsm.generate_np(x_test[:, None, :, :], **fgsm_params)

# In[32]:

#checking the accuracy of the generated adverserial examples
adv_pred = np.argmax(model.predict(adv_x), axis=1)
#advpred_ohe = np_utils.to_categorical(adv_pred)
adv_acc = np.mean(np.equal(adv_pred, y_test))

print("After attack, the accuracy is: {}".format(adv_acc * 100))

# In[35]:

wrap = KerasModelWrapper(model)
en = ElasticNetMethod(wrap, sess=sess)
en_params = {
    "beta": 0.01,
    "decision_rule": 'L1',
    "batch_size": 1000,
    "confidence": 0,
    "learning_rate": 0.1,
    "binary_search_steps": 9,
    "max_iterations": 10,
    "abort_early": True,
    "initial_const": 0.01,
    "clip_min": 0,
    "clip_max": 1
}
adv_x = en.generate_np(x_test[:, None, :, :], **en_params)
Beispiel #5
0
def attack_lisa_cnn(sess, cnn_weight_file, y_target=None, standardize=True):
    """ Generates AE for the LISA-CNN.
        Assumes you have already run train_lisa_cnn() to train the network.
    """
    epsilon_map = {np.inf : [.02, .05, .075, .1, .15, .2],   # assumes values in [0,1]
                        1 :      [.1, 1, 10], 
                        2 :      [.1, 1, 10]}

    #--------------------------------------------------
    # data set prep
    #--------------------------------------------------
    # Note: we load the version of the data *without* extra context
    X_train, Y_train, X_test, Y_test = data_lisa(with_context=False)

    # Create one-hot target labels (needed for targeted attacks only)
    if y_target is not None:
        Y_target_OB = categorical_matrix(y_target, FLAGS.batch_size, Y_test.shape[1])
        Y_target = categorical_matrix(y_target, Y_test.shape[0], Y_test.shape[1])
    else:
        Y_target_OB = None
        Y_target = None

    # bound the perturbation
    c_max = np.max(X_test)
    assert(c_max <= 1.0) # assuming this for now

    #--------------------------------------------------
    # Initialize model that we will attack
    #--------------------------------------------------
    model, x_tf, y_tf = make_lisa_cnn(sess, FLAGS.batch_size, X_train.shape[1])
    model_CH = KerasModelWrapper(model) # to make CH happy

    # the input may or may not require some additional transformation
    if standardize:
        x_input = tf.map_fn(lambda z: per_image_standardization(z), x_tf)
    else:
        x_input = x_tf
    model_output = model(x_input)


    saver = tf.train.Saver()
    saver.restore(sess, cnn_weight_file)

    #--------------------------------------------------
    # Performance on clean data
    # (try this before attacking)
    #--------------------------------------------------
    predictions = run_in_batches(sess, x_tf, y_tf, model_output, X_test, Y_test, FLAGS.batch_size)
    acc_clean = calc_acc(Y_test, predictions)
    print('[info]: accuracy on clean test data: %0.2f' % acc_clean)
    print(confusion_matrix(np.argmax(Y_test, axis=1), np.argmax(predictions, axis=1)))

    save_images_and_estimates(X_test, Y_test, predictions, 'output/Images/Original', CLASSES)


    #--------------------------------------------------
    # Fast Gradient Attack
    #--------------------------------------------------
    # symbolic representation of attack
    attack = FastGradientMethod(model_CH, sess=sess)
    acc_fgm = {}
    acc_tgt_fgm = {}

    for ord in [np.inf, 1, 2]:
        epsilon_values = epsilon_map[ord]
        acc_fgm[ord] = []
        acc_tgt_fgm[ord] = []

        for idx, epsilon in enumerate(epsilon_values):
            desc = 'FGM-ell%s-%0.3f' % (ord, epsilon)

            x_adv_tf = attack.generate(x_tf, eps=epsilon, y_target=Y_target_OB, clip_min=0.0, clip_max=c_max, ord=ord)

            if Y_target is not None:
                X_adv = run_in_batches(sess, x_tf, y_tf, x_adv_tf, X_test, Y_target, FLAGS.batch_size)
            else:
                X_adv = run_in_batches(sess, x_tf, y_tf, x_adv_tf, X_test, Y_test, FLAGS.batch_size)

            #
            # Evaluate the AE. 
            # Currently using the same model we originally attacked.
            #
            model_eval = model
            #preds_tf = model_eval(x_tf)
            preds_tf = model_eval(x_input)
            preds = run_in_batches(sess, x_tf, y_tf, preds_tf, X_adv, Y_test, FLAGS.batch_size)
            acc, acc_tgt = analyze_ae(X_test, X_adv, Y_test, preds, desc, y_target)

            save_images_and_estimates(X_adv, Y_test, preds, 'output/Images/%s' % desc, CLASSES)
            save_images_and_estimates(X_test - X_adv, Y_test, preds, 'output/Deltas/%s' % desc, CLASSES)
            acc_fgm[ord].append(acc)
            acc_tgt_fgm[ord].append(acc_tgt)


    #--------------------------------------------------
    # Iterative attack
    #--------------------------------------------------
    attack = BasicIterativeMethod(model_CH, sess=sess)
    acc_ifgm = {}
    acc_tgt_ifgm = {}

    for ord in [np.inf, 1, 2]:
        epsilon_values = epsilon_map[ord]
        acc_ifgm[ord] = []
        acc_tgt_ifgm[ord] = []

        for idx, epsilon in enumerate(epsilon_values):
            desc = 'I-FGM-ell%s-%0.3f' % (ord, epsilon)

            x_adv_tf = attack.generate(x_tf, eps=epsilon, 
                                         eps_iter=epsilon/4., 
                                         nb_iter=100,
                                         y_target=Y_target_OB, 
                                         clip_min=0.0,
                                         clip_max=c_max)

            #
            # Run the attack (targeted or untargeted)
            # on the test data.
            #
            if Y_target is not None:
                X_adv = run_in_batches(sess, x_tf, y_tf, x_adv_tf, X_test, Y_target, FLAGS.batch_size)
            else:
                X_adv = run_in_batches(sess, x_tf, y_tf, x_adv_tf, X_test, Y_test, FLAGS.batch_size)

            #
            # Evaluate the AE. 
            # Currently using the same model we originally attacked.
            #
            model_eval = model
            #preds_tf = model_eval(x_tf)
            preds_tf = model_eval(x_input)
            preds = run_in_batches(sess, x_tf, y_tf, preds_tf, X_adv, Y_test, FLAGS.batch_size)
            acc, acc_tgt = analyze_ae(X_test, X_adv, Y_test, preds, desc, y_target)

            save_images_and_estimates(X_adv, Y_test, preds, 'output/Images/%s' % desc, CLASSES)
            save_images_and_estimates(X_test - X_adv, Y_test, preds, 'output/Deltas/%s' % desc, CLASSES)
            acc_ifgm[ord].append(acc)
            acc_tgt_ifgm[ord].append(acc_tgt)


    #--------------------------------------------------
    # Post-attack Analysis for *FGM
    #--------------------------------------------------
    for ord in [np.inf, 1, 2]:
        plt.plot(epsilon_map[ord], acc_fgm[ord], 'o-', label='FGM')
        plt.plot(epsilon_map[ord], acc_ifgm[ord], 'o-', label='I-FGM')
        plt.legend()
        plt.xlabel('epsilon')
        plt.ylabel('CNN accuracy')
        plt.title('ell_%s' % ord)
        plt.grid('on')
        plt.savefig('./output/attack_accuracy_%s.png' % ord, bbox_inches='tight')
        plt.close()
     
        plt.figure()
        plt.plot(epsilon_map[ord], acc_tgt_fgm[ord], 'o-', label='FGM')
        plt.plot(epsilon_map[ord], acc_tgt_ifgm[ord], 'o-', label='I-FGM')
        plt.legend()
        plt.xlabel('epsilon')
        plt.ylabel('Targeted AE Success Rate')
        plt.title('ell_%s' % ord)
        plt.grid('on')
        plt.savefig('./output/targeted_attack_accuracy_%s.png' % ord, bbox_inches='tight')
        plt.close()




    #--------------------------------------------------
    # Elastic Net
    # Note: this attack takes awhile to compute...(compared to *FGSM)
    #--------------------------------------------------
    attack = ElasticNetMethod(model_CH, sess=sess)
    c_vals = [1e-2, 1e-1, 1, 1e2, 1e4]
    acc_all_elastic = np.zeros((len(c_vals),))

    if 0:   # turn off for now, is slow
    #for idx, c in enumerate(c_vals):
        x_adv_tf = attack.generate(x_tf, 
                                   batch_size=FLAGS.batch_size,
                                   y_target=Y_target_OB, 
                                   beta=1e-3,            # ell_1 coeff
                                   confidence=1e-2,      # \kappa value from equation (4)
                                   initial_const=c,      # (an initial value for) c from eq. (7) - note this value increases as binary search progresses...
                                   clip_min=0.0,
                                   clip_max=c_max)

        #
        # Run the attack (targeted or untargeted)
        # on the test data.
        #
        if Y_target is not None:
            X_adv = run_in_batches(sess, x_tf, y_tf, x_adv_tf, X_test, Y_target, FLAGS.batch_size)
        else:
            X_adv = run_in_batches(sess, x_tf, y_tf, x_adv_tf, X_test, Y_test, FLAGS.batch_size)

        #
        # Evaluate the AE. 
        # Currently using the same model we originally attacked.
        #
        model_eval = model
        preds_tf = model_eval(x_tf)
        preds = run_in_batches(sess, x_tf, y_tf, preds_tf, X_adv, Y_test, FLAGS.batch_size)
        print('Test accuracy after E-Net attack: %0.2f' % calc_acc(Y_test, preds))
        print('Maximum per-pixel delta: %0.3f' % np.max(np.abs(X_test - X_adv)))
        print('Mean per-pixel delta: %0.3f' % np.mean(np.abs(X_test - X_adv)))
        print('l2: ', np.sqrt(np.sum((X_test - X_adv)**2)))
        print('l1: ', np.sum(np.abs(X_test - X_adv)))
        print(confusion_matrix(np.argmax(Y_test, axis=1), np.argmax(preds, axis=1)))

        save_images_and_estimates(X_adv, Y_test, preds, 'output/Images/Elastic_c%03d' % c, CLASSES)
        acc_all_elastic[idx] = calc_acc(Y_test, preds)


    #--------------------------------------------------
    # Saliency Map Attack
    # Note: this is *extremely* slow; will require overnight runs
    #--------------------------------------------------
    attack = SaliencyMapMethod(model_CH, sess=sess)
    acc_all_saliency = np.zeros((len(epsilon_values),))

    #for idx, epsilon in enumerate(epsilon_values):
    if False:
        x_adv_tf = attack.generate(x_tf, theta=epsilon/255., 
                                     y_target=y_tf,
                                     clip_min=0.0, 
                                     clip_max=255.0)

        #
        # Run the attack (targeted or untargeted)
        # on the test data.
        #
        if Y_target is not None:
            X_adv = run_in_batches(sess, x_tf, y_tf, x_adv_tf, X_test, Y_target, FLAGS.batch_size)
        else:
            X_adv = run_in_batches(sess, x_tf, y_tf, x_adv_tf, X_test, Y_test, FLAGS.batch_size)

        #
        # Evaluate the AE. 
        # Currently using the same model we originally attacked.
        #
        model_eval = model
        preds_tf = model_eval(x_tf)
        preds = run_in_batches(sess, x_tf, y_tf, preds_tf, X_adv, Y_test, FLAGS.batch_size)
        print('Test accuracy after SMM attack: %0.3f' % calc_acc(Y_test, preds))
        print('Maximum per-pixel delta: %0.1f' % np.max(np.abs(X_test - X_adv)))
        print(confusion_matrix(np.argmax(Y_test, axis=1), np.argmax(preds, axis=1)))

        save_images_and_estimates(X_adv, Y_test, preds, 'output/Images/Saliency_%02d' % epsilon, CLASSES)
        acc_all_saliency[idx] = calc_acc(Y_test, preds)


    #--------------------------------------------------
    # C&W ell-2
    #--------------------------------------------------
    if 0:
        attack = CarliniWagnerL2(model, sess=sess)
        x_adv_tf = attack.generate(x_tf, confidence=.1, y_target=Y_target_OB)
class TestElasticNetMethod(CleverHansTest):
    def setUp(self):
        super(TestElasticNetMethod, self).setUp()

        self.sess = tf.Session()
        self.model = SimpleModel()
        self.attack = ElasticNetMethod(self.model, sess=self.sess)

    def test_generate_np_untargeted_gives_adversarial_example(self):
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val,
                                        max_iterations=100,
                                        binary_search_steps=3,
                                        initial_const=1,
                                        clip_min=-5,
                                        clip_max=5,
                                        batch_size=10)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)

        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

    def test_generate_np_targeted_gives_adversarial_example(self):
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        feed_labs = np.zeros((100, 2))
        feed_labs[np.arange(100), np.random.randint(0, 1, 100)] = 1
        x_adv = self.attack.generate_np(x_val,
                                        max_iterations=100,
                                        binary_search_steps=3,
                                        initial_const=1,
                                        clip_min=-5,
                                        clip_max=5,
                                        batch_size=100,
                                        y_target=feed_labs)

        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)

        self.assertTrue(
            np.mean(np.argmax(feed_labs, axis=1) == new_labs) > 0.9)

    def test_generate_gives_adversarial_example(self):

        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        feed_labs = np.zeros((100, 2))
        feed_labs[np.arange(100), orig_labs] = 1
        x = tf.placeholder(tf.float32, x_val.shape)
        y = tf.placeholder(tf.float32, feed_labs.shape)

        x_adv_p = self.attack.generate(x,
                                       max_iterations=100,
                                       binary_search_steps=3,
                                       initial_const=1,
                                       clip_min=-5,
                                       clip_max=5,
                                       batch_size=100,
                                       y=y)
        self.assertEqual(x_val.shape, x_adv_p.shape)
        x_adv = self.sess.run(x_adv_p, {x: x_val, y: feed_labs})

        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)

        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

    def test_generate_np_gives_clipped_adversarial_examples(self):
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val,
                                        max_iterations=10,
                                        binary_search_steps=1,
                                        learning_rate=1e-3,
                                        initial_const=1,
                                        clip_min=-0.2,
                                        clip_max=0.3,
                                        batch_size=100)

        self.assertTrue(-0.201 < np.min(x_adv))
        self.assertTrue(np.max(x_adv) < .301)

    def test_generate_np_high_confidence_targeted_examples(self):

        trivial_model = TrivialModel()

        for CONFIDENCE in [0, 2.3]:
            x_val = np.random.rand(10, 1) - .5
            x_val = np.array(x_val, dtype=np.float32)

            feed_labs = np.zeros((10, 2))
            feed_labs[np.arange(10), np.random.randint(0, 2, 10)] = 1
            attack = CarliniWagnerL2(trivial_model, sess=self.sess)
            x_adv = attack.generate_np(x_val,
                                       max_iterations=100,
                                       binary_search_steps=2,
                                       learning_rate=1e-2,
                                       initial_const=1,
                                       clip_min=-10,
                                       clip_max=10,
                                       confidence=CONFIDENCE,
                                       y_target=feed_labs,
                                       batch_size=10)

            new_labs = self.sess.run(trivial_model.get_logits(x_adv))

            good_labs = new_labs[np.arange(10), np.argmax(feed_labs, axis=1)]
            bad_labs = new_labs[np.arange(10),
                                1 - np.argmax(feed_labs, axis=1)]

            self.assertTrue(
                np.isclose(0,
                           np.min(good_labs - (bad_labs + CONFIDENCE)),
                           atol=1e-1))
            self.assertTrue(
                np.mean(
                    np.argmax(new_labs, axis=1) == np.argmax(feed_labs,
                                                             axis=1)) > .9)

    def test_generate_np_high_confidence_untargeted_examples(self):

        trivial_model = TrivialModel()

        for CONFIDENCE in [0, 2.3]:
            x_val = np.random.rand(10, 1) - .5
            x_val = np.array(x_val, dtype=np.float32)

            orig_labs = np.argmax(self.sess.run(
                trivial_model.get_logits(x_val)),
                                  axis=1)
            attack = CarliniWagnerL2(trivial_model, sess=self.sess)
            x_adv = attack.generate_np(x_val,
                                       max_iterations=100,
                                       binary_search_steps=2,
                                       learning_rate=1e-2,
                                       initial_const=1,
                                       clip_min=-10,
                                       clip_max=10,
                                       confidence=CONFIDENCE,
                                       batch_size=10)

            new_labs = self.sess.run(trivial_model.get_logits(x_adv))

            good_labs = new_labs[np.arange(10), 1 - orig_labs]
            bad_labs = new_labs[np.arange(10), orig_labs]

            self.assertTrue(
                np.mean(np.argmax(new_labs, axis=1) == orig_labs) == 0)
            self.assertTrue(
                np.isclose(0,
                           np.min(good_labs - (bad_labs + CONFIDENCE)),
                           atol=1e-1))
Beispiel #7
0
def get_adv_examples(sess, wrap, attack_type, X, Y):
    """
        detect adversarial examples
        :param sess: target model session
        :param wrap: wrap model
        :param attack_type:  attack for generating adversarial examples
        :param X: examples to be attacked
        :param Y: correct label of the examples
        :return: x_adv: adversarial examples
    """
    x = tf.placeholder(tf.float32, shape=(None, X.shape[1], X.shape[2],
                                          X.shape[3]))
    y = tf.placeholder(tf.float32, shape=(None, Y.shape[1]))
    adv_label = np.copy(Y)
    batch_size = 128

    # Define attack method parameters
    if (attack_type == 'fgsm'):
        attack_params = {
            'eps': 0.1,
            'clip_min': 0.,
            'clip_max': 1.
        }
        attack_object = FastGradientMethod(wrap, sess=sess)
    elif (attack_type == 'jsma'):
        attack_params = {
            'theta': 1., 'gamma': 0.1,
            'clip_min': 0., 'clip_max': 1.,
            'y_target': None
        }
        attack_object = SaliencyMapMethod(wrap, sess=sess)
        batch_size = 32
    elif (attack_type == 'cw'):
        attack_params = {
            'binary_search_steps': 1,
            'y': y,
            'max_iterations': 100,
            'learning_rate': .2,
            'batch_size': 128,
            'initial_const': 10
        }
        attack_object = CarliniWagnerL2(wrap, sess=sess)
    elif (attack_type == 'mim'):
        attack_object = MomentumIterativeMethod(wrap, back='tf', sess=sess)
        attack_params = {'clip_min': 0., 'clip_max': 1., 'eps': 0.1}
    elif (attack_type == 'df'):
        attack_params = {
            'max_iterations': 50,
            'clip_min': 0., 'clip_max': 1.,
            'overshoot': 0.02
        }
        attack_object = DeepFool(wrap, sess=sess)
        batch_size = 64
    elif (attack_type == 'bim'):
        attack_object = BasicIterativeMethod(wrap, back='tf', sess=sess)
        attack_params = {'eps': 0.1, 'eps_iter': 0.05,
                         'nb_iter': 10, 'clip_min': 0.,
                         'clip_max': 1.
                         }
    elif (attack_type == 'vam'):
        attack_object = VirtualAdversarialMethod(wrap, back='tf', sess=sess)
        attack_params = {'clip_min': 0., 'clip_max': 1., 'nb_iter': 100, 'eps': 2, 'xi': 1e-6}
    elif (attack_type == 'enm'):
        attack_object = ElasticNetMethod(wrap, back='tf', sess=sess)
        attack_params = {'y': y, 'max_iterations': 10, 'batch_size': 128}
    elif (attack_type == 'spsa'):
        attack_object = SPSA(wrap, sess=sess)
        adv_x = attack_object.generate(x=x, y=y, eps=0.1, clip_min=0., clip_max=1., nb_iter=100,
                                       early_stop_loss_threshold=-5.)
        batch_size = 1
    elif (attack_type == 'lbfgs'):
        attack_object = LBFGS(wrap, sess=sess)
        attack_params = {'clip_min': 0, 'clip_max': 1., 'batch_size': 128,
                         'max_iterations': 10, "y_target": y}
        true_label = np.argmax(Y, axis=-1)
        for i in range(len(Y)):
            ind = (true_label[i] + 1) % FLAGS.nb_classes
            adv_label[i] = np.zeros([FLAGS.nb_classes])
            adv_label[i, ind] = 1
    if (attack_type != 'spsa'):
        adv_x = attack_object.generate(x, **attack_params)

    # Get adversarial examples
    if (attack_type == 'lbfgs'):
        x_adv = get_adv(sess, x, y, adv_x, X, adv_label, batch_size=batch_size)
    else:
        x_adv = get_adv(sess, x, y, adv_x, X, Y, batch_size=batch_size)
    return x_adv
Beispiel #8
0
def mnist_tutorial(train_start=0, train_end=60000, test_start=0,
                   test_end=10000, nb_epochs=6, batch_size=128,
                   learning_rate=0.001,
                   clean_train=True,
                   testing=False,
                   backprop_through_attack=False,
                   nb_filters=64):
    """
    MNIST cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param testing: if true, complete an AccuracyReport for unit tests
                    to verify that performance is adequate
    :param backprop_through_attack: If True, backprop through adversarial
                                    example construction process during
                                    adversarial training.
    :param clean_train: if true, train on clean examples
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    sess = tf.Session()

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)
    source_samples = batch_size
    # Use label smoothing
    # Hopefully this doesn't screw up JSMA...
    # assert Y_train.shape[1] == 10
    # label_smooth = .1
    # Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    model_path = "models/mnist"
    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    fgsm_params = {'eps': 0.3,
                   'clip_min': 0.,
                   'clip_max': 1.}


    rng = np.random.RandomState([2017, 8, 30])

    if clean_train:
        model = make_basic_cnn(nb_filters=nb_filters)
        preds = model.get_probs(x)
        print("evaluate 1")
        def evaluate():
            # Evaluate the accuracy of the MNIST model on legitimate test
            # examples
            eval_params = {'batch_size': batch_size}
            acc = model_eval(
                sess, x, y, preds, X_test, Y_test, args=eval_params)
            report.clean_train_clean_eval = acc
            assert X_test.shape[0] == test_end - test_start, X_test.shape
            print('Test accuracy on legitimate examples: %0.4f' % acc)
        model_train(sess, x, y, preds, X_train, Y_train, evaluate=evaluate,
                    args=train_params, rng=rng)

        # Calculate training error
        if testing:
            eval_params = {'batch_size': batch_size}
            acc = model_eval(
                sess, x, y, preds, X_train, Y_train, args=eval_params)
            report.train_clean_train_clean_eval = acc

        # Initialize the Fast Gradient Sign Method (FGSM) attack object and
        # graph
        fgsm = FastGradientMethod(model, sess=sess)
        adv_x = fgsm.generate(x, **fgsm_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        eval_par = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Test accuracy on FGSM adversarial examples: %0.4f\n' % acc)

        # Calculate training error
        if testing:
            eval_par = {'batch_size': batch_size}
            acc = model_eval(sess, x, y, preds_adv, X_train,
                             Y_train, args=eval_par)
            report.train_clean_train_adv_eval = acc
        ################################################################
        # Init the Elastic Network Method attack object and graph
        en = ElasticNetMethod(model, back='tf', sess=sess)
        en_params = {'binary_search_steps': 1,
             #'y': None,
             'max_iterations': 100,
             'learning_rate': 0.1,
             'batch_size': source_samples,
             'initial_const': 10}
        adv_x_2 = en.generate(x, **en_params)
        preds_adv_2 = model.get_probs(adv_x_2)
        en_eval_params = {'batch_size': source_samples}
        # Evaluate the accuracy of the MNIST model on EN adversarial examples
        acc = model_eval(sess, x, y, preds_adv_2, X_test, Y_test, args=en_eval_params)
        print('Test accuracy on EN adversarial examples: %0.4f\n' % acc)
        ###############################################################
        # Calculate training error
        if testing:
            eval_par = {'batch_size': batch_size}
            acc = model_eval(sess, x, y, preds_adv, X_train,
                             Y_train, args=eval_par)
            report.train_clean_train_adv_eval = acc

        print("Repeating the process, using adversarial training")
    # Redefine TF model graph
    model_2 = make_basic_cnn(nb_filters=nb_filters)
    
    preds_2 = model_2(x)
    fgsm2 = FastGradientMethod(model_2, sess=sess)
    adv_x_fgsm = fgsm2.generate(x, **fgsm_params)
    if not backprop_through_attack:
        # For the fgsm attack used in this tutorial, the attack has zero
        # gradient so enabling this flag does not change the gradient.
        # For some other attacks, enabling this flag increases the cost of
        # training, but gives the defender the ability to anticipate how
        # the atacker will change their strategy in response to updates to
        # the defender's parameters.
        adv_x_2 = tf.stop_gradient(adv_x_fgsm)
    preds_2_adv_fgsm = model_2(adv_x_fgsm)
    ##########################################
    en2 = ElasticNetMethod(model_2, back='tf',sess=sess)

    
    en_params = {'binary_search_steps': 1,
         #'y': None,
         'max_iterations': 100,
         'learning_rate': 0.1,
         'batch_size': source_samples,
         'initial_const': 10}
    adv_x_en = en2.generate(x, **en_params)
    
    preds_2_adv_en = model_2(adv_x_en)
    print("evaluate 2")
    def evaluate_2():
        # evaluate the final result of the model
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess, x, y, preds_2, X_test, Y_test,
                              args=eval_params)
        print('Test accuracy on legitimate examples: %0.4f' % accuracy)

        # Accuracy of the adversarially trained model on FGSM adversarial examples
        accuracy = model_eval(sess, x, y, preds_2_adv_fgsm, X_test,
                              Y_test, args=eval_params)
        print('Test accuracy on FGSM adversarial examples: %0.4f' % accuracy)

        # Accuracy of the adversarially trained model on EN Method adversarial examples
        en_eval_params = {'batch_size': source_samples}
        accuracy = model_eval(sess, x, y, preds_2_adv_en, X_test,
                              Y_test, args=en_eval_params)
        print('Test accuracy on EN adversarial examples: %0.4f' % accuracy)


    # Perform and evaluate adversarial training
    # want to combine preds but can't figure out the data types... ???
    # hope this training style works
    preds_2_adv = [preds_2_adv_fgsm, preds_2_adv_en]
    train_params = {
    'nb_epochs': nb_epochs,
    'batch_size': source_samples,
    'learning_rate': learning_rate
    }
    model_train(sess, x, y, preds_2, X_train, Y_train,
                predictions_adv=[preds_2_adv_en],evaluate = evaluate_2,
                args=train_params, rng=rng)

   
    return report
Beispiel #9
0
def main(type="Resnet", dataset="CIFAR10", attack_type="FGM"):

    size = 256
    eval_params = {'batch_size': 128}

    ############################################# Prepare the Data #####################################################

    if dataset == 'CIFAR10':
        (_, _), (x_test, y_test) = prepare_CIFAR10()
        num_classes = 10
        input_dim = 32
    elif dataset == 'CIFAR100':
        (_, _), (x_test, y_test) = prepare_CIFAR100()
        num_classes = 100
        input_dim = 32
    else:
        (_, _), (x_test, y_test) = prepare_SVHN("./Data/")
        num_classes = 10
        input_dim = 32

    x_test = x_test / 255.
    y_test = keras.utils.to_categorical(y_test, num_classes)

    ############################################# Prepare the Data #####################################################


    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:

        # prepare the placeholders
        x = tf.placeholder(tf.float32, [None, input_dim, input_dim, 3])
        y = tf.placeholder(tf.float32, [None, num_classes])

        input_output = []
        def modelBuilder(x, num_classes, dataset, type, sess, input_output):

            if len(input_output) == 0:

                reuse = False

                # Model/Graph
                if type == 'End2End':
                    _, tf_model = \
                        prepare_GBP_End2End(num_classes,
                                            inputT=x, sess=sess,
                                            checkpoint_dir='./{}_{}/'.format(dataset, type), reuse=reuse)
                else:

                    _, tf_model = \
                        prepare_Resnet(num_classes,
                                       inputT=x, sess=sess,
                                       checkpoint_dir='./{}_{}/'.format(dataset, type), reuse=reuse)

                input_output.append(x)
                input_output.append(tf_model.logits)

            else:

                reuse = True

                # Model/Graph
                if type == 'End2End':
                    _, tf_model = \
                        prepare_GBP_End2End(num_classes, inputT=x, reuse=reuse)
                else:
                    _, tf_model = \
                        prepare_Resnet(num_classes, inputT=x, reuse=reuse)

                input_output.append(x)
                input_output.append(tf_model.logits)


            return tf_model.logits

        # create an attackable model for the cleverhans
        model = CallableModelWrapper(lambda placeholder: modelBuilder(placeholder, num_classes, dataset, type, sess, input_output), 'logits')

        # TODO: check the configurations
        if attack_type == "FGM": # pass
            attack = FastGradientMethod(model, back='tf', sess=sess)
            params = {
                'eps' : 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "CWL2": # pass
            attack = CarliniWagnerL2(model, back='tf', sess=sess)
            params = {
                'confidence': 0.9,
                'batch_size': 128,
                'learning_rate': 0.005,
            }
        elif attack_type == "DF": # pass
            attack = DeepFool(model, back='tf', sess=sess)
            params = {
            }
        elif attack_type == "ENM": # configurations checked, quickly tested
            attack = ElasticNetMethod(model, back='tf', sess=sess)
            params = {
                'confidence': 0.9,
                'batch_size': 128,
                'learning_rate': 0.005,
            }
        elif attack_type == "FFA": # configuration checked
            attack = FastFeatureAdversaries(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'eps_iter': 0.005,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "LBFGS":
            attack = LBFGS(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "MEA":
            attack = MadryEtAl(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "MIM":
            attack = MomentumIterativeMethod(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "SMM":
            attack = SaliencyMapMethod(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "SPSA":
            attack = SPSA(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "VATM":
            attack = vatm(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "VAM":
            attack = VirtualAdversarialMethod(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        else:
            raise Exception("I don't recognize {} this attack type. I will use FGM instead.".format(attack_type))

        # tf operation
        adv_x = attack.generate(x, **params)

        # generate the adversarial examples
        adv_vals = sess.run(adv_x, feed_dict={x: x_test[:size]})

        # notice that "adv_vals" may contain NANs because of the failure of the attack
        # also the input may not be perturbed at all because of the failure of the attack
        to_delete = []
        for idx, adv in enumerate(adv_vals):
            # for nan
            if np.isnan(adv).any():
                to_delete.append(idx)
            # for no perturbation
            if np.array_equiv(adv, x_test[idx]):
                to_delete.append(idx)

        # cleanings
        adv_vals_cleaned = np.delete(adv_vals, to_delete, axis=0)
        ori_cleaned = np.delete(x_test[:size], to_delete, axis=0)
        y_cleaned = np.delete(y_test[:size], to_delete, axis=0)

        if len(adv_vals_cleaned) == 0:
            print("No adversarial example is generated!")
            return

        print("{} out of {} adversarial examples are generated.".format(len(adv_vals_cleaned), size))

        print("The average L_inf distortion is {}".format(
            np.mean([np.max(np.abs(adv - ori_cleaned[idx])) for idx, adv in enumerate(adv_vals_cleaned)])))

        # TODO: visualize the adv_vals

        accuracy = model_eval(sess, input_output[0], y, tf.nn.softmax(input_output[1]), x_test[:size], y_test[:size],
                              args=eval_params)
        print('Test accuracy on normal examples: %0.4f' % accuracy)

        accuracy = model_eval(sess, input_output[0], y, tf.nn.softmax(input_output[1]), adv_vals_cleaned, y_cleaned,
                              args=eval_params)
        print('Test accuracy on adversarial examples: %0.4f' % accuracy)
Beispiel #10
0
def test_attacks(batch_size=128,
                 source_samples=10,
                 model_path=os.path.join("models", "mnist"),
                 targeted=True):
    """
    Test many attacks on MNIST with deep Bayes classifier.
    :param batch_size: size of training batches
    :param source_samples: number of test inputs to attack
    :param model_path: path to the model file
    :param targeted: should we run a targeted attack? or untargeted?
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    from cleverhans.utils_mnist import data_mnist
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=0,
                                                  train_end=60000,
                                                  test_start=0,
                                                  test_end=10000)
    img_rows, img_cols, channels = X_train[0].shape
    nb_classes = Y_train.shape[1]

    # Define input TF placeholder
    batch_size = min(batch_size, source_samples)
    x = tf.placeholder(tf.float32,
                       shape=(batch_size, img_rows, img_cols, channels))
    y = tf.placeholder(tf.float32, shape=(batch_size, nb_classes))

    # Define TF model graph
    model_name = str(sys.argv[1])
    if model_name == 'bayes':
        from load_bayes_classifier import BayesModel
        conv = True
        checkpoint = 0  #int(sys.argv[1])
        K = int(sys.argv[3])
        use_mean = True
        model = BayesModel(sess,
                           'mnist',
                           conv,
                           K,
                           checkpoint=checkpoint,
                           attack_snapshot=False,
                           use_mean=use_mean)
        if use_mean:
            model_name = 'bayes_mean_mlp'
        else:
            model_name = 'bayes_K%d' % K
    if model_name == 'cnn':
        from load_cnn_classifier import CNNModel
        model = CNNModel(sess, 'mnist')
    if model_name == 'wgan':
        from load_wgan_classifier import WGANModel
        conv = True
        checkpoint = 0  #int(sys.argv[1])
        K = int(sys.argv[3])
        T = int(sys.argv[4])
        model = WGANModel(sess, 'mnist', conv, K, T, checkpoint=checkpoint)
        model_name = 'wgan_K%d_T%d' % (K, T)

    preds = model.predict(x, softmax=True)  # output probabilities
    print("Defined TensorFlow model graph.")

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params)
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    # Craft adversarial examples
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
    print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
          ' adversarial examples')
    print("This could take some time ...")

    # make adv inputs and labels for the attack if targeted
    if targeted:
        adv_inputs = np.array([[instance] * nb_classes
                               for instance in X_test[:source_samples]],
                              dtype=np.float32)

        one_hot = np.zeros((nb_classes, nb_classes))
        one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

        adv_inputs = adv_inputs.reshape(
            (source_samples * nb_classes, img_rows, img_cols, 1))
        adv_ys = np.array([one_hot] * source_samples,
                          dtype=np.float32).reshape(
                              (source_samples * nb_classes, nb_classes))
    else:
        adv_inputs = X_test[:source_samples]
        adv_ys = Y_test[:source_samples]

    # Instantiate an attack object
    attack_method = str(sys.argv[2])
    if attack_method == 'fgsm':
        from cleverhans.attacks import FastGradientMethod
        model_prob = lambda x: model.predict(x, softmax=True)
        attack = FastGradientMethod(model_prob, sess=sess)
        from attack_config import config_fgsm
        attack_params = config_fgsm(targeted, adv_ys)
    if attack_method == 'bim':
        from cleverhans.attacks import BasicIterativeMethod
        model_prob = lambda x: model.predict(x, softmax=True)
        attack = BasicIterativeMethod(model_prob, sess=sess)
        from attack_config import config_bim
        attack_params = config_bim(targeted, adv_ys)
    if attack_method == 'mim':
        from cleverhans.attacks import MomentumIterativeMethod
        model_prob = lambda x: model.predict(x, softmax=True)
        attack = MomentumIterativeMethod(model_prob, sess=sess)
        from attack_config import config_mim
        attack_params = config_mim(targeted, adv_ys)
    if attack_method == 'jsma':
        from cleverhans.attacks import SaliencyMapMethod
        model_prob = lambda x: model.predict(x, softmax=True)
        attack = SaliencyMapMethod(model_prob, sess=sess)
        from attack_config import config_jsma
        attack_params = config_jsma(targeted, adv_ys)
    if attack_method == 'vat':
        from cleverhans.attacks import VirtualAdversarialMethod
        model_logit = lambda x: model.predict(x, softmax=False)
        attack = VirtualAdversarialMethod(model_logit, sess=sess)
        from attack_config import config_vat
        attack_params = config_vat(targeted, adv_ys)
    if attack_method == 'cw':
        from cleverhans.attacks import CarliniWagnerL2
        model_logit = lambda x: model.predict(x, softmax=False)
        attack = CarliniWagnerL2(model_logit, sess=sess)
        from attack_config import config_cw
        attack_params = config_cw(targeted, adv_ys)
    if attack_method == 'elastic':
        from cleverhans.attacks import ElasticNetMethod
        model_logit = lambda x: model.predict(x, softmax=False)
        attack = ElasticNetMethod(model_logit, sess=sess)
        from attack_config import config_elastic
        attack_params = config_elastic(targeted, adv_ys)
    if attack_method == 'deepfool':
        from cleverhans.attacks import DeepFool
        model_logit = lambda x: model.predict(x, softmax=False)
        attack = DeepFool(model_logit, sess=sess)
        from attack_config import config_deepfool
        attack_params = config_deepfool(targeted, adv_ys)
    if attack_method == 'madry':
        from cleverhans.attacks import MadryEtAl
        model_prob = lambda x: model.predict(x, softmax=True)
        attack = MadryEtAl(model_prob, sess=sess)
        from attack_config import config_madry
        attack_params = config_madry(targeted, adv_ys)

    attack_params['batch_size'] = batch_size
    print('batchsize', batch_size)

    # perform the attack!
    adv = []
    n_batch = int(adv_inputs.shape[0] / batch_size)
    for i in xrange(n_batch):
        adv_batch = adv_inputs[i * batch_size:(i + 1) * batch_size]
        adv.append(attack.generate_np(adv_batch, **attack_params))
    adv = np.concatenate(adv, axis=0)

    for _ in xrange(5):
        y_adv = []
        for i in xrange(n_batch):
            adv_batch = adv[i * batch_size:(i + 1) * batch_size]
            y_adv.append(sess.run(preds, {x: adv_batch}))
        y_adv = np.concatenate(y_adv, axis=0)

        print('--------------------------------------')
        for i in xrange(10):
            print(np.argmax(y_adv[i * 10:(i + 1) * 10], 1))

    correct_pred = np.asarray(np.argmax(y_adv, 1) == np.argmax(adv_ys, 1),
                              dtype='f')
    adv_accuracy = np.mean(correct_pred)

    if not targeted:
        #        adv_accuracy, y_adv = model_eval(sess, x, y, preds, adv,
        #                                         adv_ys, args=eval_params,
        #                                         return_pred=True)
        #    else:
        #        adv_accuracy, y_adv = model_eval(sess, x, y, preds, adv,
        #                                         Y_test[:source_samples], args=eval_params,
        #                                         return_pred=True)
        adv_accuracy = 1. - adv_accuracy

    print('--------------------------------------')

    print(np.argmax(adv_ys[:10], 1))
    print(np.argmax(y_adv[:10], 1))
    for i in xrange(5):
        tmp = sess.run(preds, {x: adv[:100]})
        print(np.argmax(tmp[:10], 1))

    # Compute the number of adversarial examples that were successfully found
    print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy))
    report.clean_train_adv_eval = 1. - adv_accuracy

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(
        np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Close TF session
    sess.close()

    # visualisation
    vis_adv = True
    if vis_adv:
        N_vis = 100
        sys.path.append('../../utils')
        from visualisation import plot_images
        if channels == 1:
            shape = (img_rows, img_cols)
        else:
            shape = (img_rows, img_cols, channels)
        path = 'figs/'
        filename = model_name + '_' + attack_method
        if targeted:
            filename = filename + '_targeted'
        else:
            filename = filename + '_untargeted'
        plot_images(adv_inputs[:N_vis], shape, path, filename + '_data')
        plot_images(adv[:N_vis], shape, path, filename + '_adv')

    save_result = True
    if save_result:
        path = 'results/'
        filename = model_name + '_' + attack_method
        if targeted:
            filename = filename + '_targeted'
            y_input = adv_ys
        else:
            filename = filename + '_untargeted'
            y_input = Y_test[:source_samples]
        results = [adv_inputs, y_input, adv, y_adv]
        import pickle
        pickle.dump(results, open(path + filename + '.pkl', 'w'))
        print("results saved at %s.pkl" % filename)

    return report
Beispiel #11
0
def evaluate_ch(model, config, sess, norm='l1', bound=None, verbose=True):
    dataset = config['data']
    num_eval_examples = config['num_eval_examples']
    eval_batch_size = config['eval_batch_size']

    if dataset == "mnist":
        from tensorflow.examples.tutorials.mnist import input_data
        mnist = input_data.read_data_sets('MNIST_data', one_hot=False)
        X = mnist.test.images[0:num_eval_examples, :].reshape(-1, 28, 28, 1)
        Y = mnist.test.labels[0:num_eval_examples]
        x_image = tf.placeholder(tf.float32, shape=[None, 28, 28, 1])
    else:
        import cifar10_input
        data_path = config["data_path"]
        cifar = cifar10_input.CIFAR10Data(data_path)
        X = cifar.eval_data.xs[0:num_eval_examples, :].astype(np.float32) / 255.0
        Y = cifar.eval_data.ys[0:num_eval_examples]
        x_image = tf.placeholder(tf.float32, shape=[None, 32, 32, 3])
        assert norm == 'l1'

    if norm=='l2':
        attack = CarliniWagnerL2(model, sess)
        params = {'batch_size': eval_batch_size, 'binary_search_steps': 9}
    else:
        attack = ElasticNetMethod(model, sess, clip_min=0.0, clip_max=1.0)
        params = {'beta': 1e-2,
                  'decision_rule': 'L1',
                  'batch_size': eval_batch_size,
                  'learning_rate': 1e-2,
                  'max_iterations': 1000}

    if verbose:
        set_log_level(logging.DEBUG, name="cleverhans")
    
    y = tf.placeholder(tf.int64, shape=[None, 10])
    params['y'] = y
    adv_x = attack.generate(x_image, **params)
    preds_adv = model.get_predicted_class(adv_x)
    preds_nat = model.get_predicted_class(x_image)

    all_preds, all_preds_adv, all_adv_x = batch_eval(
        sess, [x_image, y], [preds_nat, preds_adv, adv_x], [X, one_hot(Y, 10)], batch_size=eval_batch_size)

    print('acc nat', np.mean(all_preds == Y))
    print('acc adv', np.mean(all_preds_adv == Y))

    if dataset == "cifar10":
        X *= 255.0
        all_adv_x *= 255.0

    if norm == 'l2':
        lps = np.sqrt(np.sum(np.square(all_adv_x - X), axis=(1,2,3)))
    else:
        lps = np.sum(np.abs(all_adv_x - X), axis=(1,2,3))
    print('mean lp: ', np.mean(lps))
    for b in [bound, bound/2.0, bound/4.0, bound/8.0]:
        print('lp={}, acc={}'.format(b, np.mean((all_preds_adv == Y) | (lps > b))))

    all_corr_adv = (all_preds_adv == Y)
    all_corr_nat = (all_preds == Y)
    return all_corr_nat, all_corr_adv, lps
Beispiel #12
0
def attack_classifier(sess, x, model, x_test, attack_method="fgsm", target=None, batch_size=128):

    if attack_method == "fgsm":
        from cleverhans.attacks import FastGradientMethod
        params = {'eps': 8/255,
                  'clip_min': 0.,
                  'clip_max': 1.
                 }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = FastGradientMethod(model, sess=sess)

    elif attack_method == "basic_iterative":
        from cleverhans.attacks import BasicIterativeMethod
        params = {'eps': 8./255,
                  'eps_iter': 1./255,
                  'nb_iter': 10,
                  'clip_min': 0.,
                  'clip_max': 1.,
                  'ord': np.inf
                 }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = BasicIterativeMethod(model,sess = sess)

    elif attack_method == "momentum_iterative":
        from cleverhans.attacks import MomentumIterativeMethod
        params = {'eps':8/255,
                  'eps_iter':1/255,
                  'nb_iter': 10,
                  'clip_min': 0.,
                  'clip_max': 1.
                 }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = MomentumIterativeMethod(model,sess = sess)

    elif attack_method == "saliency":
        from cleverhans.attacks import SaliencyMapMethod
        params = {'theta':8/255,
                  'gamma':0.1,
                  'clip_min': 0.,
                  'clip_max': 1.
                 }
        assert target is None
        method = SaliencyMapMethod(model,sess = sess)

    elif attack_method == "virtual":
        from cleverhans.attacks import VirtualAdversarialMethod
        params = {'eps':8/255,
                  'num_iterations':10,
                  'xi' :1e-6,
                  'clip_min': 0.,
                  'clip_max': 1.
                 }
        assert target is None
        method = VirtualAdversarialMethod(model,sess = sess)

    elif attack_method == "cw":
        from cleverhans.attacks import CarliniWagnerL2
        params = {
            "confidence":0,
            "batch_size":128,
            "learning_rate":1e-4,
            "binary_search_steps":10,
            "max_iterations":1000,
            "abort_early": True,
            "initial_const":1e-2,
            "clip_min":0,
            "clip_max":1
        }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = CarliniWagnerL2(model,sess = sess)

    elif attack_method == "elastic_net":
        from cleverhans.attacks import ElasticNetMethod
        params = {
            "fista": "FISTA",
            "beta": 0.1,
            "decision_rule":"EN",
            "confidence":0,
            "batch_size":128,
            "learning_rate":1e-4,
            "binary_search_steps":10,
            "max_iterations":1000,
            "abort_early": True,
            "initial_const":1e-2,
            "clip_min":0,
            "clip_max":1
        }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = ElasticNetMethod(model,sess = sess)

    elif attack_method == "deepfool":
        from cleverhans.attacks import DeepFool
        params = {
            "nb_candidate":10,
            "overshoot":1e-3,
            "max_iter":100,
            "nb_classes":10,
            "clip_min":0,
            "clip_max":1
        }
        assert target is None
        method = DeepFool(model,sess = sess)

    elif attack_method == "lbfgs":
        from cleverhans.attacks import LBFGS
        params = {
            'batch_size':128,
            "binary_search_steps":10,
            "max_iterations":1000,
            "initial_const":1e-2,
            'clip_min': 0.,
            'clip_max': 1.
        }
        assert target is not None
        params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = LBFGS(model,sess = sess)

    elif attack_method == "madry":
        from cleverhans.attacks import MadryEtAl
        params = {'eps':8/255,
                  'eps_iter':1/255,
                  'nb_iter':10,
                  'ord':np.inf,
                  'clip_min': 0.,
                  'clip_max': 1.
                 }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = MadryEtAl(model, sess = sess)

    elif attack_method == "SPSA":
        from cleverhans.attacks import SPSA
        params = {
            'epsilon':1/255,
            'num_steps':10,
            'is_targeted':False,
            'early_stop_loss_threshold':None,
            'learning_rate':0.01,
            'delta':0.01,
            'batch_size':128,
            'spsa_iters':1,
            'is_debug':False
        }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
            params["is_targeted"] = True
        method = SPSA(model, sess = sess)

    else:
        raise ValueError("Can not recognize this attack method: %s" % attack_method)

    adv_x = method.generate(x, **params)
    num_batch = x_test.shape[0] // batch_size
    adv_imgs = []
    for i in range(num_batch):
        x_feed = x_test[i*batch_size:(i+1)*batch_size]
        #y_feed = y_test[i*batch_size:(i+1)*batch_size]

        adv_img = sess.run(adv_x, feed_dict={x: x_feed})
        adv_imgs.append(adv_img)

    adv_imgs = np.concatenate(adv_imgs, axis=0)
    return adv_imgs
Beispiel #13
0
        'max_iterations':100,
        'binary_search_steps':3,
        'initial_const':1,
        'clip_min':0, 
        'clip_max':1,
        'batch_size':100,
        'rnd': orders,
        'y_target':labels,
    }

from cleverhans.utils_keras import KerasModelWrapper
keras.backend.set_learning_phase(0)
sess = keras.backend.get_session()

models = [KerasModelWrapper(model) for model in models]
attack = ElasticNetMethod(models, sess=sess)
x_adv = attack.generate_np(x_val,**bapp_params)
# orig_labs = np.argmax(model.predict(x_val), axis=1)
# new_labs = np.argmax(model.predict(x_adv), axis=1)
l1dist = np.linalg.norm(x_val-x_adv, ord=1, axis=-1)
# l1dist = np.sum(np.absolute(x_adv-x_val, axis=-1))
print(np.mean(l1dist), np.max(l1dist), np.min(l1dist))
# print('normal mnist model acc:', np.mean(orig_labs==labels))
# print('advs mnist model acc:', np.mean(new_labs==labels))
# print('advs acc:', new_labs[orig_labs==labels] != labels[orig_labs==labels])
np.save('advs/'+conf[:-5].split('/')[-1]+'_'+str(target)+'_ead_show.npy', x_adv)

# x_adv = self.attack.generate_np(x_val, max_iterations=100,
#                                     binary_search_steps=3,
#                                     initial_const=1,
#                                     clip_min=-5, clip_max=5,
def get_appropriate_attack(dataset, clip_range, attack_name, model, session,
                           harden, attack_type):
    # Check if valid dataset specified
    if dataset not in ["mnist", "svhn", "cifar10"]:
        raise ValueError('Mentioned dataset not implemented')
    attack_object = None
    attack_params = {'clip_min': clip_range[0], 'clip_max': clip_range[1]}
    if attack_name == "momentum":
        attack_object = MomentumIterativeMethod(model, sess=session)
        attack_params['eps'], attack_params['eps_iter'], attack_params[
            'nb_iter'] = 0.3, 0.06, 3
    elif attack_name == "fgsm":
        attack_object = FastGradientMethod(model, sess=session)
        if dataset == "mnist":
            attack_params['eps'] = 0.3
            if attack_type == "black":
                attack_params['eps'] = 0.3
        else:
            attack_params['eps'] = 0.1
    elif attack_name == "elastic":
        attack_object = ElasticNetMethod(model, sess=session)
        attack_params['binary_search_steps'], attack_params[
            'max_iterations'], attack_params['beta'] = 1, 5, 1e-2
        attack_params['initial_const'], attack_params[
            'learning_rate'] = 1e-1, 1e-1
        if dataset == "svhn":
            attack_params['initial_const'], attack_params[
                'learning_rate'] = 3e-1, 2e-1
        if attack_type == "black":
            attack_params['max_iterations'], attack_params[
                'binary_search_steps'] = 8, 2
        if dataset == "mnist":
            attack_params['learning_rate'], attack_params[
                'initial_const'] = 1e-1, 1e-3
            attack_params['binary_search_steps'], attack_params[
                'max_iterations'] = 4, 8
            if attack_type == "black":
                attack_params["max_iterations"], attack_params[
                    'binary_search_steps'] = 12, 5
    elif attack_name == "virtual":
        attack_object = VirtualAdversarialMethod(model, sess=session)
        attack_params['xi'] = 1e-6
        attack_params['num_iterations'], attack_params['eps'] = 1, 2.0
        if attack_type == "black":
            attack_params['num_iterations'] = 3
            attack_params['xi'], attack_params['eps'] = 1e-4, 3.0
        if dataset == "mnist":
            attack_params['num_iterations'] = 6
            attack_params['xi'], attack_params['eps'] = 1e0, 5.0
            if attack_type == "black":
                attack_params['num_iterations'], attack_params['eps'] = 10, 8.0
    elif attack_name == "madry":
        attack_object = MadryEtAl(model, sess=session)
        attack_params['nb_iter'], attack_params['eps'] = 5, 0.1
        if dataset == "mnist":
            attack_params['eps'], attack_params['nb_iter'] = 0.3, 15
            if attack_type == "black":
                attack_params['nb_iter'] = 20
    elif attack_name == "jsma":
        attack_object = SaliencyMapMethod(model, sess=session)
        attack_params['gamma'], attack_params['theta'] = 0.1, 1.0
    elif attack_name == "carlini":
        if dataset == "cifar10":
            attack_params["confidence"], attack_params[
                "max_iterations"] = 0.0, 100
            attack_params["binary_search_steps"], attack_params[
                "abort_early"] = 20, False
            attack_params["initial_const"] = 1e-4
        attack_object = CarliniWagnerL2(model, sess=session)
    else:
        raise ValueError('Mentioned attack not implemented')
    print(attack_name, ":", attack_params)
    return attack_object, attack_params
Beispiel #15
0
					attack = FastGradientMethod(model=model, sess=sess)
				if attackMethod == "LBFGS":
					print ("Using LBFGS attack method!")
					attack = LBFGS(model=model, sess=sess)
				if attackMethod == "CarliniWagnerL2":
					print ("Using Carlini and Wagner attack method!")
					attack = CarliniWagnerL2(model=model, sess=sess)
				if attackMethod == "SPSA":
					print ("Using SPSA attack method!")
					attack = SPSA(model=model, sess=sess)
				if attackMethod == "MadryEtAl":
					print ("Using Madry et al. attack method!")
					attack = MadryEtAl(model=model, sess=sess)
				if attackMethod == "ElasticNet":
					print ("Using Elastic Net attack method!")
					attack = ElasticNetMethod(model=model, sess=sess)
				if attackMethod == "DeepFool":
					print ("Using Deep Fool attack method!")
					attack = DeepFool(model=model, sess=sess)
				if attackMethod == "MomentumIterative":
					print ("Using Momentum Iterative attack method!")
					attack = MomentumIterativeMethod(model=model, sess=sess)
				if attackMethod == "BasicIterative":
					print ("Using Basic Iterative attack method!")
					attack = BasicIterativeMethod(model=model, sess=sess)
				if attackMethod == "SaliencyMap":
					print ("Using Saliency Map attack method!")
					attack = SaliencyMapMethod(model=model, sess=sess)

				if attackMethod == "SPSA":
					adversarialOp = attack.generate(x=xPlaceholder, y=yPlaceholder, epsilon=Cfg.epsilon * 5.0, num_steps=Cfg.attackIterations)
Beispiel #16
0
    def setUp(self):
        super(TestElasticNetMethod, self).setUp()

        self.sess = tf.Session()
        self.model = SimpleModel()
        self.attack = ElasticNetMethod(self.model, sess=self.sess)
Beispiel #17
0
class TestElasticNetMethod(CleverHansTest):
    def setUp(self):
        super(TestElasticNetMethod, self).setUp()

        self.sess = tf.Session()
        self.model = SimpleModel()
        self.attack = ElasticNetMethod(self.model, sess=self.sess)

    def test_generate_np_untargeted_gives_adversarial_example(self):
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val, max_iterations=100,
                                        binary_search_steps=3,
                                        initial_const=1,
                                        clip_min=-5, clip_max=5,
                                        batch_size=10)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)

        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

    def test_generate_np_targeted_gives_adversarial_example(self):
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        feed_labs = np.zeros((100, 2))
        feed_labs[np.arange(100), np.random.randint(0, 1, 100)] = 1
        x_adv = self.attack.generate_np(x_val, max_iterations=100,
                                        binary_search_steps=3,
                                        initial_const=1,
                                        clip_min=-5, clip_max=5,
                                        batch_size=100, y_target=feed_labs)

        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)

        self.assertTrue(np.mean(np.argmax(feed_labs, axis=1) == new_labs) >
                        0.9)

    def test_generate_gives_adversarial_example(self):

        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        feed_labs = np.zeros((100, 2))
        feed_labs[np.arange(100), orig_labs] = 1
        x = tf.placeholder(tf.float32, x_val.shape)
        y = tf.placeholder(tf.float32, feed_labs.shape)

        x_adv_p = self.attack.generate(x, max_iterations=100,
                                       binary_search_steps=3,
                                       initial_const=1,
                                       clip_min=-5, clip_max=5,
                                       batch_size=100, y=y)
        x_adv = self.sess.run(x_adv_p, {x: x_val, y: feed_labs})

        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)

        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

    def test_generate_np_gives_clipped_adversarial_examples(self):
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val, max_iterations=10,
                                        binary_search_steps=1,
                                        learning_rate=1e-3,
                                        initial_const=1,
                                        clip_min=-0.2, clip_max=0.3,
                                        batch_size=100)

        self.assertTrue(-0.201 < np.min(x_adv))
        self.assertTrue(np.max(x_adv) < .301)

    def test_generate_np_high_confidence_targeted_examples(self):

        trivial_model = TrivialModel()

        for CONFIDENCE in [0, 2.3]:
            x_val = np.random.rand(10, 1) - .5
            x_val = np.array(x_val, dtype=np.float32)

            feed_labs = np.zeros((10, 2))
            feed_labs[np.arange(10), np.random.randint(0, 2, 10)] = 1
            attack = CarliniWagnerL2(trivial_model, sess=self.sess)
            x_adv = attack.generate_np(x_val,
                                       max_iterations=100,
                                       binary_search_steps=2,
                                       learning_rate=1e-2,
                                       initial_const=1,
                                       clip_min=-10, clip_max=10,
                                       confidence=CONFIDENCE,
                                       y_target=feed_labs,
                                       batch_size=10)

            new_labs = self.sess.run(trivial_model.get_logits(x_adv))

            good_labs = new_labs[np.arange(10), np.argmax(feed_labs, axis=1)]
            bad_labs = new_labs[np.arange(
                10), 1 - np.argmax(feed_labs, axis=1)]

            self.assertTrue(np.isclose(
                0, np.min(good_labs - (bad_labs + CONFIDENCE)), atol=1e-1))
            self.assertTrue(np.mean(np.argmax(new_labs, axis=1) ==
                                    np.argmax(feed_labs, axis=1)) > .9)

    def test_generate_np_high_confidence_untargeted_examples(self):

        trivial_model = TrivialModel()

        for CONFIDENCE in [0, 2.3]:
            x_val = np.random.rand(10, 1) - .5
            x_val = np.array(x_val, dtype=np.float32)

            orig_labs = np.argmax(self.sess.run(trivial_model.get_logits(x_val)), axis=1)
            attack = CarliniWagnerL2(trivial_model, sess=self.sess)
            x_adv = attack.generate_np(x_val,
                                       max_iterations=100,
                                       binary_search_steps=2,
                                       learning_rate=1e-2,
                                       initial_const=1,
                                       clip_min=-10, clip_max=10,
                                       confidence=CONFIDENCE,
                                       batch_size=10)

            new_labs = self.sess.run(trivial_model.get_logits(x_adv))

            good_labs = new_labs[np.arange(10), 1 - orig_labs]
            bad_labs = new_labs[np.arange(10), orig_labs]

            self.assertTrue(np.mean(np.argmax(new_labs, axis=1) == orig_labs)
                            == 0)
            self.assertTrue(np.isclose(
                0, np.min(good_labs - (bad_labs + CONFIDENCE)), atol=1e-1))
Beispiel #18
0
def JSMA_FGSM_BIM(train_start=0,
                  train_end=60000,
                  test_start=0,
                  test_end=10000,
                  nb_epochs=6,
                  batch_size=128,
                  learning_rate=0.001,
                  clean_train=True,
                  testing=False,
                  backprop_through_attack=False,
                  nb_filters=64):
    """
    MNIST cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param testing: if true, complete an AccuracyReport for unit tests
                    to verify that performance is adequate
    :param backprop_through_attack: If True, backprop through adversarial
                                    example construction process during
                                    adversarial training.
    :param clean_train: if true, train on clean examples
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    sess = tf.Session()

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)
    source_samples = batch_size
    # Use label smoothing
    # Hopefully this doesn't screw up JSMA...
    assert Y_train.shape[1] == 10
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    model_path = "models/mnist"
    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_par = {'batch_size': batch_size}
    rng = np.random.RandomState([2017, 8, 30])

    if clean_train:
        model = make_basic_cnn(nb_filters=nb_filters)
        preds = model.get_probs(x)

        def evaluate():
            # Evaluate the accuracy of the MNIST model on legitimate test
            # examples
            eval_params = {'batch_size': batch_size}
            acc = model_eval(sess,
                             x,
                             y,
                             preds,
                             X_test,
                             Y_test,
                             args=eval_params)
            report.clean_train_clean_eval = acc
            assert X_test.shape[0] == test_end - test_start, X_test.shape
            print('Test accuracy on legitimate examples: %0.4f' % acc)

        model_train(sess,
                    x,
                    y,
                    preds,
                    X_train,
                    Y_train,
                    evaluate=evaluate,
                    args=train_params,
                    rng=rng)
        print("#####Starting attacks on clean model#####")
        #################################################################
        #Clean test against JSMA
        jsma_params = {
            'theta': 1.,
            'gamma': 0.1,
            'clip_min': 0.,
            'clip_max': 1.,
            'y_target': None
        }

        jsma = SaliencyMapMethod(model, back='tf', sess=sess)
        adv_x = jsma.generate(x, **jsma_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on JSMA adversarial examples: %0.4f' % acc)
        ################################################################
        #Clean test against FGSM
        fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}

        fgsm = FastGradientMethod(model, sess=sess)
        adv_x = fgsm.generate(x, **fgsm_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on FGSM adversarial examples: %0.4f' % acc)
        ################################################################
        #Clean test against BIM
        bim_params = {
            'eps': 0.3,
            'eps_iter': 0.01,
            'nb_iter': 100,
            'clip_min': 0.,
            'clip_max': 1.
        }
        bim = BasicIterativeMethod(model, sess=sess)
        adv_x = bim.generate(x, **bim_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on BIM adversarial examples: %0.4f' % acc)
        ################################################################
        #Clean test against EN
        en_params = {
            'binary_search_steps': 1,
            #'y': None,
            'max_iterations': 100,
            'learning_rate': 0.1,
            'batch_size': source_samples,
            'initial_const': 10
        }
        en = ElasticNetMethod(model, back='tf', sess=sess)
        adv_x = en.generate(x, **en_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on EN adversarial examples: %0.4f' % acc)
        ################################################################
        #Clean test against DF
        deepfool_params = {
            'nb_candidate': 10,
            'overshoot': 0.02,
            'max_iter': 50,
            'clip_min': 0.,
            'clip_max': 1.
        }
        deepfool = DeepFool(model, sess=sess)
        adv_x = deepfool.generate(x, **deepfool_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on DF adversarial examples: %0.4f' % acc)
        ################################################################
        #Clean test against VAT
        vat_params = {
            'eps': 2.0,
            'num_iterations': 1,
            'xi': 1e-6,
            'clip_min': 0.,
            'clip_max': 1.
        }
        vat = VirtualAdversarialMethod(model, sess=sess)
        adv_x = vat.generate(x, **vat_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on VAT adversarial examples: %0.4f\n' % acc)
        ################################################################
        print("Repeating the process, using adversarial training\n")
    # Redefine TF model graph
    model_2 = make_basic_cnn(nb_filters=nb_filters)
    preds_2 = model_2(x)
    #################################################################
    #Adversarial test against JSMA
    jsma_params = {
        'theta': 1.,
        'gamma': 0.1,
        'clip_min': 0.,
        'clip_max': 1.,
        'y_target': None
    }

    jsma = SaliencyMapMethod(model, back='tf', sess=sess)
    adv_x = jsma.generate(x, **jsma_params)
    preds_adv_jsma = model.get_probs(adv_x)
    ################################################################
    #Adversarial test against FGSM
    fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}

    fgsm = FastGradientMethod(model, sess=sess)
    adv_x = fgsm.generate(x, **fgsm_params)
    preds_adv_fgsm = model.get_probs(adv_x)
    ################################################################
    #Adversarial test against BIM
    bim_params = {
        'eps': 0.3,
        'eps_iter': 0.01,
        'nb_iter': 100,
        'clip_min': 0.,
        'clip_max': 1.
    }
    bim = BasicIterativeMethod(model, sess=sess)
    adv_x = bim.generate(x, **bim_params)
    preds_adv_bim = model.get_probs(adv_x)
    ################################################################
    #Adversarial test against EN
    en_params = {
        'binary_search_steps': 5,
        #'y': None,
        'max_iterations': 100,
        'learning_rate': 0.1,
        'batch_size': source_samples,
        'initial_const': 10
    }
    en = ElasticNetMethod(model, back='tf', sess=sess)
    adv_x = en.generate(x, **en_params)
    preds_adv_en = model.get_probs(adv_x)
    ################################################################
    #Adversarial test against DF
    deepfool_params = {
        'nb_candidate': 10,
        'overshoot': 0.02,
        'max_iter': 200,
        'clip_min': 0.,
        'clip_max': 1.
    }
    deepfool = DeepFool(model, sess=sess)
    adv_x = deepfool.generate(x, **deepfool_params)
    preds_adv_df = model.get_probs(adv_x)
    ################################################################
    #Adversarial test against VAT
    vat_params = {
        'eps': 2.0,
        'num_iterations': 1,
        'xi': 1e-6,
        'clip_min': 0.,
        'clip_max': 1.
    }
    vat = VirtualAdversarialMethod(model, sess=sess)
    adv_x = vat.generate(x, **vat_params)
    preds_adv_vat = model.get_probs(adv_x)
    ################################################################
    print("#####Evaluate trained model#####")

    def evaluate_2():
        # Evaluate the accuracy of the MNIST model on JSMA adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_jsma,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on JSMA adversarial examples: %0.4f' % acc)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_fgsm,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on FGSM adversarial examples: %0.4f' % acc)

        # Evaluate the accuracy of the MNIST model on BIM adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_bim,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on BIM adversarial examples: %0.4f' % acc)

        # Evaluate the accuracy of the MNIST model on EN adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_en,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on EN adversarial examples: %0.4f' % acc)

        # Evaluate the accuracy of the MNIST model on DF adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_df,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on DF adversarial examples: %0.4f' % acc)

        # Evaluate the accuracy of the MNIST model on VAT adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_vat,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on VAT adversarial examples: %0.4f\n' % acc)

    preds_2_adv = [
        preds_adv_jsma, preds_adv_fgsm, preds_adv_bim
        # ,preds_adv_en
        # ,preds_adv_df
    ]

    model_train(sess,
                x,
                y,
                preds_2,
                X_train,
                Y_train,
                predictions_adv=preds_2_adv,
                evaluate=evaluate_2,
                args=train_params,
                rng=rng)
Beispiel #19
0
class TestElasticNetMethod(CleverHansTest):
    def setUp(self):
        super(TestElasticNetMethod, self).setUp()
        import tensorflow as tf

        # The world's simplest neural network
        def my_model(x):
            W1 = tf.constant([[1.5, .3], [-2, 0.3]], dtype=tf.float32)
            h1 = tf.nn.sigmoid(tf.matmul(x, W1))
            W2 = tf.constant([[-2.4, 1.2], [0.5, -2.3]], dtype=tf.float32)
            res = tf.matmul(h1, W2)
            return res

        self.sess = tf.Session()
        self.model = my_model
        self.attack = ElasticNetMethod(self.model, sess=self.sess)

    def test_generate_np_untargeted_gives_adversarial_example(self):
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val, max_iterations=100,
                                        binary_search_steps=3,
                                        initial_const=1,
                                        clip_min=-5, clip_max=5,
                                        batch_size=10)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)

        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

    def test_generate_np_targeted_gives_adversarial_example(self):
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        feed_labs = np.zeros((100, 2))
        feed_labs[np.arange(100), np.random.randint(0, 1, 100)] = 1
        x_adv = self.attack.generate_np(x_val, max_iterations=100,
                                        binary_search_steps=3,
                                        initial_const=1,
                                        clip_min=-5, clip_max=5,
                                        batch_size=100, y_target=feed_labs)

        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)

        self.assertTrue(np.mean(np.argmax(feed_labs, axis=1) == new_labs) >
                        0.9)

    def test_generate_gives_adversarial_example(self):
        import tensorflow as tf

        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        feed_labs = np.zeros((100, 2))
        feed_labs[np.arange(100), orig_labs] = 1
        x = tf.placeholder(tf.float32, x_val.shape)
        y = tf.placeholder(tf.float32, feed_labs.shape)

        x_adv_p = self.attack.generate(x, max_iterations=100,
                                       binary_search_steps=3,
                                       initial_const=1,
                                       clip_min=-5, clip_max=5,
                                       batch_size=100, y=y)
        x_adv = self.sess.run(x_adv_p, {x: x_val, y: feed_labs})

        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)

        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

    def test_generate_np_gives_clipped_adversarial_examples(self):
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val, max_iterations=10,
                                        binary_search_steps=1,
                                        learning_rate=1e-3,
                                        initial_const=1,
                                        clip_min=-0.2, clip_max=0.3,
                                        batch_size=100)

        self.assertTrue(-0.201 < np.min(x_adv))
        self.assertTrue(np.max(x_adv) < .301)

    def test_generate_np_high_confidence_targeted_examples(self):
        import tensorflow as tf

        def trivial_model(x):
            W1 = tf.constant([[1, -1]], dtype=tf.float32)
            res = tf.matmul(x, W1)
            return res

        for CONFIDENCE in [0, 2.3]:
            x_val = np.random.rand(10, 1) - .5
            x_val = np.array(x_val, dtype=np.float32)

            feed_labs = np.zeros((10, 2))
            feed_labs[np.arange(10), np.random.randint(0, 2, 10)] = 1
            attack = CarliniWagnerL2(trivial_model, sess=self.sess)
            x_adv = attack.generate_np(x_val,
                                       max_iterations=100,
                                       binary_search_steps=2,
                                       learning_rate=1e-2,
                                       initial_const=1,
                                       clip_min=-10, clip_max=10,
                                       confidence=CONFIDENCE,
                                       y_target=feed_labs,
                                       batch_size=10)

            new_labs = self.sess.run(trivial_model(x_adv))

            good_labs = new_labs[np.arange(10), np.argmax(feed_labs, axis=1)]
            bad_labs = new_labs[np.arange(
                10), 1 - np.argmax(feed_labs, axis=1)]

            self.assertTrue(np.isclose(
                0, np.min(good_labs - (bad_labs + CONFIDENCE)), atol=1e-1))
            self.assertTrue(np.mean(np.argmax(new_labs, axis=1) ==
                                    np.argmax(feed_labs, axis=1)) > .9)

    def test_generate_np_high_confidence_untargeted_examples(self):
        import tensorflow as tf

        def trivial_model(x):
            W1 = tf.constant([[1, -1]], dtype=tf.float32)
            res = tf.matmul(x, W1)
            return res

        for CONFIDENCE in [0, 2.3]:
            x_val = np.random.rand(10, 1) - .5
            x_val = np.array(x_val, dtype=np.float32)

            orig_labs = np.argmax(self.sess.run(trivial_model(x_val)), axis=1)
            attack = CarliniWagnerL2(trivial_model, sess=self.sess)
            x_adv = attack.generate_np(x_val,
                                       max_iterations=100,
                                       binary_search_steps=2,
                                       learning_rate=1e-2,
                                       initial_const=1,
                                       clip_min=-10, clip_max=10,
                                       confidence=CONFIDENCE,
                                       batch_size=10)

            new_labs = self.sess.run(trivial_model(x_adv))

            good_labs = new_labs[np.arange(10), 1 - orig_labs]
            bad_labs = new_labs[np.arange(10), orig_labs]

            self.assertTrue(np.mean(np.argmax(new_labs, axis=1) == orig_labs)
                            == 0)
            self.assertTrue(np.isclose(
                0, np.min(good_labs - (bad_labs + CONFIDENCE)), atol=1e-1))
def mnist_tutorial_jsma(train_start=0, train_end=5500, test_start=0,
                        test_end=1000, nb_epochs=8,
                        batch_size=100, nb_classes=10,
                        nb_filters=64,
                        learning_rate=0.001):
    """
    MNIST tutorial for the Jacobian-based saliency map approach (JSMA)
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param learning_rate: learning rate for training
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph
    model = make_basic_cnn()
    preds = model(x)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    # sess.run(tf.global_variables_initializer())
    rng = np.random.RandomState([2017, 8, 30])

    print("x_train shape: ", X_train.shape)
    print("y_train shape: ", Y_train.shape)

    # do not log
    model_train(sess, x, y, preds, X_train, Y_train, args=train_params,verbose=False,
                rng=rng)

    f_out_clean = open("Clean_jsma_elastic_against5.log", "w")

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params)
    assert X_test.shape[0] == test_end - test_start, X_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    f_out_clean.write('Test accuracy on legitimate test examples: ' + str(accuracy) + '\n')


    # Clean test against JSMA
    jsma_params = {'theta': 1., 'gamma': 0.1,
                   'clip_min': 0., 'clip_max': 1.,
                   'y_target': None}

    jsma = SaliencyMapMethod(model, back='tf', sess=sess)
    adv_x_jsma = jsma.generate(x, **jsma_params)
    preds_adv_jsma = model.get_probs(adv_x_jsma)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_jsma, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on JSMA adversarial examples: %0.4f' % acc)
    f_out_clean.write('Clean test accuracy on JSMA adversarial examples: ' + str(acc) + '\n')

    ################################################################
    # Clean test against FGSM
    fgsm_params = {'eps': 0.3,
                   'clip_min': 0.,
                   'clip_max': 1.}

    fgsm = FastGradientMethod(model, sess=sess)
    adv_x_fgsm = fgsm.generate(x, **fgsm_params)
    preds_adv_fgsm = model.get_probs(adv_x_fgsm)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_fgsm, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on FGSM adversarial examples: %0.4f' % acc)
    f_out_clean.write('Clean test accuracy on FGSM adversarial examples: ' + str(acc) + '\n')


    ################################################################
    # Clean test against BIM
    bim_params = {'eps': 0.3,
                  'eps_iter': 0.01,
                  'nb_iter': 100,
                  'clip_min': 0.,
                  'clip_max': 1.}
    bim = BasicIterativeMethod(model, sess=sess)
    adv_x_bim = bim.generate(x, **bim_params)
    preds_adv_bim = model.get_probs(adv_x_bim)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_bim, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on BIM adversarial examples: %0.4f' % acc)
    f_out_clean.write('Clean test accuracy on BIM adversarial examples: ' + str(acc) + '\n')

    ################################################################
    # Clean test against EN
    en_params = {'binary_search_steps': 1,
                 # 'y': None,
                 'max_iterations': 100,
                 'learning_rate': 0.1,
                 'batch_size': batch_size,
                 'initial_const': 10}
    en = ElasticNetMethod(model, back='tf', sess=sess)
    adv_x_en = en.generate(x, **en_params)
    preds_adv_en = model.get_probs(adv_x_en)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_en, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on EN adversarial examples: %0.4f' % acc)
    f_out_clean.write('Clean test accuracy on EN adversarial examples: ' + str(acc) + '\n')
    ################################################################
    # Clean test against DF
    deepfool_params = {'nb_candidate': 10,
                       'overshoot': 0.02,
                       'max_iter': 50,
                       'clip_min': 0.,
                       'clip_max': 1.}
    deepfool = DeepFool(model, sess=sess)
    adv_x_df = deepfool.generate(x, **deepfool_params)
    preds_adv_df = model.get_probs(adv_x_df)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_df, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on DF adversarial examples: %0.4f' % acc)
    f_out_clean.write('Clean test accuracy on DF adversarial examples: ' + str(acc) + '\n')

    ################################################################
    # Clean test against VAT
    vat_params = {'eps': 2.0,
                  'num_iterations': 1,
                  'xi': 1e-6,
                  'clip_min': 0.,
                  'clip_max': 1.}
    vat = VirtualAdversarialMethod(model, sess=sess)
    adv_x_vat = vat.generate(x, **vat_params)
    preds_adv_vat = model.get_probs(adv_x_vat)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_vat, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on VAT adversarial examples: %0.4f\n' % acc)
    f_out_clean.write('Clean test accuracy on VAT adversarial examples: ' + str(acc) + '\n')

    f_out_clean.close()

    ###########################################################################
    # Craft adversarial examples using the Jacobian-based saliency map approach
    ###########################################################################
    print('Crafting ' + str(X_train.shape[0]) + ' * ' + str(nb_classes-1) +
          ' adversarial examples')


    model_2 = make_basic_cnn()
    preds_2 = model(x)

    # need this for constructing the array
    sess.run(tf.global_variables_initializer())

    # run this again
    # sess.run(tf.global_variables_initializer())

    # 1. Instantiate a SaliencyMapMethod attack object
    jsma = SaliencyMapMethod(model_2, back='tf', sess=sess)
    jsma_params = {'theta': 1., 'gamma': 0.1,
                   'clip_min': 0., 'clip_max': 1.,
                   'y_target': None}
    adv_random = jsma.generate(x, **jsma_params)
    preds_adv_random = model_2.get_probs(adv_random)

    # 2. Instantiate FGSM attack
    fgsm_params = {'eps': 0.3,
                   'clip_min': 0.,
                   'clip_max': 1.}
    fgsm = FastGradientMethod(model_2, sess=sess)
    adv_x_fgsm = fgsm.generate(x, **fgsm_params)
    preds_adv_fgsm = model_2.get_probs(adv_x_fgsm)


    # 3. Instantiate Elastic net attack
    en_params = {'binary_search_steps': 5,
         #'y': None,
         'max_iterations': 100,
         'learning_rate': 0.1,
         'batch_size': batch_size,
         'initial_const': 10}
    enet = ElasticNetMethod(model_2, sess=sess)
    adv_x_en = enet.generate(x, **en_params)
    preds_adv_elastic_net = model_2.get_probs(adv_x_en)

    # 4. Deepfool
    deepfool_params = {'nb_candidate':10,
                       'overshoot':0.02,
                       'max_iter': 50,
                       'clip_min': 0.,
                       'clip_max': 1.}
    deepfool = DeepFool(model_2, sess=sess)
    adv_x_df = deepfool.generate(x, **deepfool_params)
    preds_adv_deepfool = model_2.get_probs(adv_x_df)

    # 5. Base Iterative
    bim_params = {'eps': 0.3,
                  'eps_iter': 0.01,
                  'nb_iter': 100,
                  'clip_min': 0.,
                  'clip_max': 1.}
    base_iter = BasicIterativeMethod(model_2, sess=sess)
    adv_x_bi = base_iter.generate(x, **bim_params)
    preds_adv_base_iter = model_2.get_probs(adv_x_bi)

    # 6. C & W Attack
    cw = CarliniWagnerL2(model_2, back='tf', sess=sess)
    cw_params = {'binary_search_steps': 1,
                 # 'y': None,
                 'max_iterations': 100,
                 'learning_rate': 0.1,
                 'batch_size': batch_size,
                 'initial_const': 10}
    adv_x_cw = cw.generate(x, **cw_params)
    preds_adv_cw = model_2.get_probs(adv_x_cw)

    #7
    vat_params = {'eps': 2.0,
                  'num_iterations': 1,
                  'xi': 1e-6,
                  'clip_min': 0.,
                  'clip_max': 1.}
    vat = VirtualAdversarialMethod(model_2, sess=sess)
    adv_x = vat.generate(x, **vat_params)
    preds_adv_vat = model_2.get_probs(adv_x)


    # ==> generate 10 targeted classes for every train data regardless
    # This call runs the Jacobian-based saliency map approach
    # Loop over the samples we want to perturb into adversarial examples

    X_train_adv_set = []
    Y_train_adv_set = []
    for index in range(X_train.shape[0]):
        print('--------------------------------------')
        x_val = X_train[index:(index+1)]
        y_val = Y_train[index]


        # add normal sample in!!!!
        X_train_adv_set.append(x_val)
        Y_train_adv_set.append(y_val)

        # We want to find an adversarial example for each possible target class
        # (i.e. all classes that differ from the label given in the dataset)
        current_class = int(np.argmax(y_val))
        target_classes = other_classes(nb_classes, current_class)
        # Loop over all target classes
        for target in target_classes:
            # print('Generating adv. example for target class %i' % target)
            # This call runs the Jacobian-based saliency map approach

            one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
            one_hot_target[0, target] = 1
            jsma_params['y_target'] = one_hot_target
            adv_x = jsma.generate_np(x_val, **jsma_params)

            # append to X_train_adv_set and Y_train_adv_set
            X_train_adv_set.append(adv_x)
            Y_train_adv_set.append(y_val)

            # shape is: (1, 28, 28, 1)
            # print("adv_x shape is: ", adv_x.shape)

            # check for success rate
            # res = int(model_argmax(sess, x, preds, adv_x) == target)

    print('-------------Finished Generating Np Adversarial Data-------------------------')

    X_train_data = np.concatenate(X_train_adv_set, axis=0)
    Y_train_data = np.stack(Y_train_adv_set, axis=0)
    print("X_train_data shape is: ", X_train_data.shape)
    print("Y_train_data shape is: ", Y_train_data.shape)

    # saves the output so later no need to re-fun file
    np.savez("jsma_training_data.npz", x_train=X_train_data
             , y_train=Y_train_data)

    # >>> data = np.load('/tmp/123.npz')
    # >>> data['a']

    f_out = open("Adversarial_jsma_elastic_against5.log", "w")

    # evaluate the function against 5 attacks
    # fgsm, base iterative, jsma, elastic net, and deepfool
    def evaluate_against_all():
            # 1 Clean Data
            eval_params = {'batch_size': batch_size}
            accuracy = model_eval(sess, x, y, preds, X_test, Y_test,
                                  args=eval_params)
            print('Legitimate accuracy: %0.4f' % accuracy)

            tmp = 'Legitimate accuracy: '+ str(accuracy) + "\n"
            f_out.write(tmp)


            # 2 JSMA
            accuracy = model_eval(sess, x, y, preds_adv_random, X_test,
                                  Y_test, args=eval_params)

            print('JSMA accuracy: %0.4f' % accuracy)
            tmp = 'JSMA accuracy:'+ str(accuracy) + "\n"
            f_out.write(tmp)


            # 3 FGSM
            accuracy = model_eval(sess, x, y, preds_adv_fgsm, X_test,
                                  Y_test, args=eval_params)

            print('FGSM accuracy: %0.4f' % accuracy)
            tmp = 'FGSM accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)

            # 4 Base Iterative
            accuracy = model_eval(sess, x, y, preds_adv_base_iter, X_test,
                                  Y_test, args=eval_params)

            print('Base Iterative accuracy: %0.4f' % accuracy)
            tmp = 'Base Iterative accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)

            # 5 Elastic Net
            accuracy = model_eval(sess, x, y, preds_adv_elastic_net, X_test,
                                  Y_test, args=eval_params)

            print('Elastic Net accuracy: %0.4f' % accuracy)
            tmp = 'Elastic Net accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)

            # 6 DeepFool
            accuracy = model_eval(sess, x, y, preds_adv_deepfool, X_test,
                                  Y_test, args=eval_params)
            print('DeepFool accuracy: %0.4f' % accuracy)
            tmp = 'DeepFool accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)

            # 7 C & W Attack
            accuracy = model_eval(sess, x, y, preds_adv_cw, X_test,
                                  Y_test, args=eval_params)
            print('C & W accuracy: %0.4f' % accuracy)
            tmp = 'C & W  accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)
            f_out.write("*******End of Epoch***********\n\n")

            # 8 Virtual Adversarial
            accuracy = model_eval(sess, x, y, preds_adv_vat, X_test,
                                  Y_test, args=eval_params)
            print('VAT accuracy: %0.4f' % accuracy)
            tmp = 'VAT accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)
            f_out.write("*******End of Epoch***********\n\n")

            print("*******End of Epoch***********\n\n")

        # report.adv_train_adv_eval = accuracy

    print("Now Adversarial Training with Elastic Net  + modified X_train and Y_train")
    # trained_model.out
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': '/home/stephen/PycharmProjects/jsma-runall-mac/',
        'filename': 'trained_model.out'
    }
    model_train(sess, x, y, preds_2, X_train_data, Y_train_data,
                 predictions_adv=preds_adv_elastic_net,
                evaluate=evaluate_against_all, verbose=False,
                args=train_params, rng=rng)


    # Close TF session
    sess.close()
    return report
Beispiel #21
0
    }
elif attack_method == 'PGD' and order == 2:
    op = ProjectedGradientDescent(cleverhans_model, sess=sess)
    params = {
        'eps': eps,
        'eps_iter': eps_iter,
        'nb_iter': nb_iter,
        'ord': 2,
        'clip_max': 1.,
        'clip_min': 0
    }
elif attack_method == 'JSMA':
    op = SaliencyMapMethod(cleverhans_model, sess=sess)
    params = {'gamma': eps}
elif attack_method == 'EAD':
    op = ElasticNetMethod(cleverhans_model, sess=sess)
    params = {'confidence': eps, 'abort_early': True, 'max_iterations': 100}
elif attack_method == 'CW':
    op = CarliniWagnerL2(cleverhans_model, sess=sess)
    params = {'confidence': eps}
    x_test = x_test[eps_iter:eps_iter + decay_factor]
    y_test = y_test[eps_iter:eps_iter + decay_factor]
# generate adversarial examples
adv_x_op = op.generate(x_op, **params)

y_test = to_categorical(y_test)

# Run an evaluation of our model against fgsm
total = 0
correct = 0
advs = []