Beispiel #1
0
class TestDeepFool(CleverHansTest):
    def setUp(self):
        super(TestDeepFool, self).setUp()

        self.sess = tf.Session()
        self.model = SimpleModel()
        self.attack = DeepFool(self.model, sess=self.sess)

    def test_generate_np_gives_adversarial_example(self):
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val, over_shoot=0.02, max_iter=50,
                                        nb_candidate=2, clip_min=-5,
                                        clip_max=5)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)

        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

    def test_generate_gives_adversarial_example(self):

        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        x = tf.placeholder(tf.float32, x_val.shape)

        x_adv_p = self.attack.generate(x, over_shoot=0.02, max_iter=50,
                                       nb_candidate=2, clip_min=-5, clip_max=5)
        x_adv = self.sess.run(x_adv_p, {x: x_val})

        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)

        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

    def test_generate_np_gives_clipped_adversarial_examples(self):
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val, over_shoot=0.02, max_iter=50,
                                        nb_candidate=2, clip_min=-0.2,
                                        clip_max=0.3)

        self.assertTrue(-0.201 < np.min(x_adv))
        self.assertTrue(np.max(x_adv) < .301)
Beispiel #2
0
def attack_batch(model, in_im, net_name, attack_name, im_list, gt_labels, sample_size, batch_size):
    print('aaa')
    logging.basicConfig(filename='Logs/'+net_name+"_"+attack_name+'.log', level=logging.INFO,
    format='%(asctime)s:%(levelname)s:%(message)s')
    config = tf.ConfigProto(device_count = {'GPU': 2})
    imgs = open(im_list).readlines()  # [::10]
    gt_labels = open(gt_labels).readlines()  # [::10]
    top_1 = 0;top_1_real = 0;fool_rate = 0
    isotropic, size = get_params(net_name)
    imageModel = CallableModelWrapper(model, 'logits')

    
    with tf.Session(config=config) as sess:
        if attack_name=='fgsm':
	    print('bbb')
            attack = FastGradientMethod(imageModel, back='tf')
            adv_x = attack.generate(in_im,eps=8,clip_min=-124, clip_max=155)
        if attack_name=='ifgsm':
            attack = BasicIterativeMethod(imageModel, back='tf')
            adv_x = attack.generate(in_im,eps=8,eps_iter=1,nb_iter=12,clip_min=-124, clip_max=155)
        if attack_name=='cw2':
            attack = CarliniWagnerL2(imageModel, back='tf')
            adv_x = attack.generate(in_im,clip_min=-124, clip_max=155)
        if attack_name=='jsma':
            attack = SaliencyMapMethod(imageModel, back='tf')
            adv_x = attack.generate(in_im)
        if attack_name=='pgd':
            attack = MadryEtAl(imageModel, back='tf')
            adv_x = attack.generate(in_im,eps=8,eps_iter=1,nb_iter=12,clip_min=-124, clip_max=155)
        if attack_name=='deepfool':
            attack = DeepFool(imageModel, back='tf')
            adv_x = attack.generate(in_im, sess=sess, clip_min=-124, clip_max=155)
        
	print('ccc')
        sess.run(tf.global_variables_initializer())
	print('fdfdfd')
        img_loader = loader_func(net_name, sess, isotropic, size)
        batch_im = np.zeros((batch_size, size, size, 3))
        print('ddd')
        for i in range(sample_size/batch_size):
            lim = min(batch_size, len(imgs)-i*batch_size)
            for j in range(lim):
                im = img_loader(imgs[i*batch_size+j].strip())
                batch_im[j] = np.copy(im)
            gt = np.array([int(gt_labels[i*batch_size+j].strip())
                       for j in range(lim)])
            adv_x_np=adv_x.eval(feed_dict={in_im: batch_im})
	    print('qqq')
            # Calculate the neural probabilities
            y_adv_prob=tf.nn.softmax(model(in_im), name="yadv").eval(feed_dict={in_im: adv_x_np}); y_adv = np.argmax(y_adv_prob,1)
            y_true_prob=tf.nn.softmax(model(in_im), name="ypred").eval(feed_dict={in_im: batch_im}); y_true =  np.argmax(y_true_prob,1)

            # Calculate the top-1, top-1-true accuracies and fooling rate
            top_1 += np.sum(y_adv == gt); top_1_real += np.sum(y_true == gt)
            fool_rate += np.sum(y_true != y_adv )
            

            if i != 0 and i % 2 == 0:
                logging.info("batch: {} ==================================================================".format(i))
                logging.info("fooling rate {}".format((fool_rate)/float((i+1)*batch_size)*100))
            
    print('ddd')
    logging.info("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")           
    logging.info('Real Top-1 Accuracy = {}'.format(
    top_1_real/float(sample_size)*100))
    logging.info('Top-1 Accuracy = {}'.format((top_1/float(sample_size)*100)))
    logging.info('Top-1 Fooling Rate = {}'.format(fool_rate/float(sample_size)*100))
    logging.info("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++") 
#Definning the session
sess = backend.get_session()

# Define input TF placeholder
x = tf.placeholder(tf.float32, shape=(None, 784))
y = tf.placeholder(tf.float32, shape=(None, 10))

pred = np.argmax(keras_model.predict(x_test), axis=1)
acc = np.mean(np.equal(pred, y_test))

print("The Test accuracy is: {}".format(acc))

#################################### Adversarial Attack (DF=X_train (30000 samples) ###################################
wrap = KerasModelWrapper(keras_model)
df = DeepFool(wrap, back='tf', sess=sess)
df_params = {
    'over_shoot': 0.09,
    'max_iter': 10,
    'clip_max': 1,
    'clip_min': 0,
    'nb_candidate': 10
}
adv_x = df.generate_np(x_test, **df_params)
adv_conf = keras_model.predict(adv_x)
adv_pred = np.argmax(adv_conf, axis=1)
adv_acc = np.mean(np.equal(adv_pred, y_test))

print("The adversarial  accuracy is: {}".format(adv_acc))

###################################### Original Image ##########################################
Beispiel #4
0
def build_adv(make_obs_tf, q_func, num_actions, epsilon, attack):
    with tf.variable_scope('deepq', reuse=tf.AUTO_REUSE):
        obs_tf_in = make_obs_tf("observation")
        stochastic_ph_adv = tf.placeholder(tf.bool, (), name="stochastic_adv")
        update_eps_ph_adv = tf.placeholder(tf.float32, (),
                                           name="update_eps_adv")
        eps = tf.get_variable("eps", (),
                              initializer=tf.constant_initializer(0))
        update_eps_expr_adv = eps.assign(
            tf.cond(update_eps_ph_adv >= 0, lambda: update_eps_ph_adv,
                    lambda: eps))

        def wrapper(x):
            return q_func(x, num_actions, scope="q_func", concat_softmax=False)

        if attack == 'fgsm':
            adversary = FastGradientMethod(CallableModelWrapper(
                wrapper, 'logits'),
                                           sess=U.get_session())
            adv_observations = adversary.generate(obs_tf_in.get(),
                                                  eps=epsilon,
                                                  clip_min=0.0,
                                                  clip_max=255.0,
                                                  ord=np.inf)
        elif attack == 'bim':
            adversary = BasicIterativeMethod(CallableModelWrapper(
                wrapper, 'logits'),
                                             sess=U.get_session())
            adv_observations = adversary.generate(obs_tf_in.get(),
                                                  eps=epsilon,
                                                  eps_iter=epsilon / 10,
                                                  nb_iter=10,
                                                  clip_min=0.0,
                                                  clip_max=255.0,
                                                  ord=np.inf)

        elif attack == 'deepfool':
            adversary = DeepFool(CallableModelWrapper(wrapper, 'logits'),
                                 sess=U.get_session())
            adv_observations = adversary.generate(obs_tf_in.get(),
                                                  clip_min=0.0,
                                                  clip_max=255.0,
                                                  nb_candidate=num_actions)
        elif attack == 'momentum':
            adversary = MomentumIterativeMethod(CallableModelWrapper(
                wrapper, 'logits'),
                                                sess=U.get_session())
            adv_observations = adversary.generate(obs_tf_in.get(),
                                                  eps=epsilon,
                                                  eps_iter=epsilon / 10,
                                                  nb_iter=10,
                                                  clip_min=0.0,
                                                  clip_max=255.0)
        elif attack == 'jsma':
            adversary = SaliencyMapMethod(CallableModelWrapper(
                wrapper, 'logits'),
                                          sess=U.get_session())
            adv_observations = adversary.generate(obs_tf_in.get(),
                                                  clip_min=0.0,
                                                  clip_max=255.0)

        craft_adv_obs = U.function(
            inputs=[obs_tf_in, stochastic_ph_adv, update_eps_ph_adv],
            outputs=adv_observations,
            givens={
                update_eps_ph_adv: -1.0,
                stochastic_ph_adv: True
            },
            updates=[update_eps_expr_adv])

        return craft_adv_obs
Beispiel #5
0
def test_attacks(batch_size=128,
                 source_samples=10,
                 model_path=os.path.join("models", "mnist"),
                 targeted=True):
    """
    Test many attacks on MNIST with deep Bayes classifier.
    :param batch_size: size of training batches
    :param source_samples: number of test inputs to attack
    :param model_path: path to the model file
    :param targeted: should we run a targeted attack? or untargeted?
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    from cleverhans.utils_mnist import data_mnist
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=0,
                                                  train_end=60000,
                                                  test_start=0,
                                                  test_end=10000)
    img_rows, img_cols, channels = X_train[0].shape
    nb_classes = Y_train.shape[1]

    # Define input TF placeholder
    batch_size = min(batch_size, source_samples)
    x = tf.placeholder(tf.float32,
                       shape=(batch_size, img_rows, img_cols, channels))
    y = tf.placeholder(tf.float32, shape=(batch_size, nb_classes))

    # Define TF model graph
    model_name = str(sys.argv[1])
    if model_name == 'bayes':
        from load_bayes_classifier import BayesModel
        conv = True
        checkpoint = 0  #int(sys.argv[1])
        K = int(sys.argv[3])
        use_mean = True
        model = BayesModel(sess,
                           'mnist',
                           conv,
                           K,
                           checkpoint=checkpoint,
                           attack_snapshot=False,
                           use_mean=use_mean)
        if use_mean:
            model_name = 'bayes_mean_mlp'
        else:
            model_name = 'bayes_K%d' % K
    if model_name == 'cnn':
        from load_cnn_classifier import CNNModel
        model = CNNModel(sess, 'mnist')
    if model_name == 'wgan':
        from load_wgan_classifier import WGANModel
        conv = True
        checkpoint = 0  #int(sys.argv[1])
        K = int(sys.argv[3])
        T = int(sys.argv[4])
        model = WGANModel(sess, 'mnist', conv, K, T, checkpoint=checkpoint)
        model_name = 'wgan_K%d_T%d' % (K, T)

    preds = model.predict(x, softmax=True)  # output probabilities
    print("Defined TensorFlow model graph.")

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params)
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    # Craft adversarial examples
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
    print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
          ' adversarial examples')
    print("This could take some time ...")

    # make adv inputs and labels for the attack if targeted
    if targeted:
        adv_inputs = np.array([[instance] * nb_classes
                               for instance in X_test[:source_samples]],
                              dtype=np.float32)

        one_hot = np.zeros((nb_classes, nb_classes))
        one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

        adv_inputs = adv_inputs.reshape(
            (source_samples * nb_classes, img_rows, img_cols, 1))
        adv_ys = np.array([one_hot] * source_samples,
                          dtype=np.float32).reshape(
                              (source_samples * nb_classes, nb_classes))
    else:
        adv_inputs = X_test[:source_samples]
        adv_ys = Y_test[:source_samples]

    # Instantiate an attack object
    attack_method = str(sys.argv[2])
    if attack_method == 'fgsm':
        from cleverhans.attacks import FastGradientMethod
        model_prob = lambda x: model.predict(x, softmax=True)
        attack = FastGradientMethod(model_prob, sess=sess)
        from attack_config import config_fgsm
        attack_params = config_fgsm(targeted, adv_ys)
    if attack_method == 'bim':
        from cleverhans.attacks import BasicIterativeMethod
        model_prob = lambda x: model.predict(x, softmax=True)
        attack = BasicIterativeMethod(model_prob, sess=sess)
        from attack_config import config_bim
        attack_params = config_bim(targeted, adv_ys)
    if attack_method == 'mim':
        from cleverhans.attacks import MomentumIterativeMethod
        model_prob = lambda x: model.predict(x, softmax=True)
        attack = MomentumIterativeMethod(model_prob, sess=sess)
        from attack_config import config_mim
        attack_params = config_mim(targeted, adv_ys)
    if attack_method == 'jsma':
        from cleverhans.attacks import SaliencyMapMethod
        model_prob = lambda x: model.predict(x, softmax=True)
        attack = SaliencyMapMethod(model_prob, sess=sess)
        from attack_config import config_jsma
        attack_params = config_jsma(targeted, adv_ys)
    if attack_method == 'vat':
        from cleverhans.attacks import VirtualAdversarialMethod
        model_logit = lambda x: model.predict(x, softmax=False)
        attack = VirtualAdversarialMethod(model_logit, sess=sess)
        from attack_config import config_vat
        attack_params = config_vat(targeted, adv_ys)
    if attack_method == 'cw':
        from cleverhans.attacks import CarliniWagnerL2
        model_logit = lambda x: model.predict(x, softmax=False)
        attack = CarliniWagnerL2(model_logit, sess=sess)
        from attack_config import config_cw
        attack_params = config_cw(targeted, adv_ys)
    if attack_method == 'elastic':
        from cleverhans.attacks import ElasticNetMethod
        model_logit = lambda x: model.predict(x, softmax=False)
        attack = ElasticNetMethod(model_logit, sess=sess)
        from attack_config import config_elastic
        attack_params = config_elastic(targeted, adv_ys)
    if attack_method == 'deepfool':
        from cleverhans.attacks import DeepFool
        model_logit = lambda x: model.predict(x, softmax=False)
        attack = DeepFool(model_logit, sess=sess)
        from attack_config import config_deepfool
        attack_params = config_deepfool(targeted, adv_ys)
    if attack_method == 'madry':
        from cleverhans.attacks import MadryEtAl
        model_prob = lambda x: model.predict(x, softmax=True)
        attack = MadryEtAl(model_prob, sess=sess)
        from attack_config import config_madry
        attack_params = config_madry(targeted, adv_ys)

    attack_params['batch_size'] = batch_size
    print('batchsize', batch_size)

    # perform the attack!
    adv = []
    n_batch = int(adv_inputs.shape[0] / batch_size)
    for i in xrange(n_batch):
        adv_batch = adv_inputs[i * batch_size:(i + 1) * batch_size]
        adv.append(attack.generate_np(adv_batch, **attack_params))
    adv = np.concatenate(adv, axis=0)

    for _ in xrange(5):
        y_adv = []
        for i in xrange(n_batch):
            adv_batch = adv[i * batch_size:(i + 1) * batch_size]
            y_adv.append(sess.run(preds, {x: adv_batch}))
        y_adv = np.concatenate(y_adv, axis=0)

        print('--------------------------------------')
        for i in xrange(10):
            print(np.argmax(y_adv[i * 10:(i + 1) * 10], 1))

    correct_pred = np.asarray(np.argmax(y_adv, 1) == np.argmax(adv_ys, 1),
                              dtype='f')
    adv_accuracy = np.mean(correct_pred)

    if not targeted:
        #        adv_accuracy, y_adv = model_eval(sess, x, y, preds, adv,
        #                                         adv_ys, args=eval_params,
        #                                         return_pred=True)
        #    else:
        #        adv_accuracy, y_adv = model_eval(sess, x, y, preds, adv,
        #                                         Y_test[:source_samples], args=eval_params,
        #                                         return_pred=True)
        adv_accuracy = 1. - adv_accuracy

    print('--------------------------------------')

    print(np.argmax(adv_ys[:10], 1))
    print(np.argmax(y_adv[:10], 1))
    for i in xrange(5):
        tmp = sess.run(preds, {x: adv[:100]})
        print(np.argmax(tmp[:10], 1))

    # Compute the number of adversarial examples that were successfully found
    print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy))
    report.clean_train_adv_eval = 1. - adv_accuracy

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(
        np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Close TF session
    sess.close()

    # visualisation
    vis_adv = True
    if vis_adv:
        N_vis = 100
        sys.path.append('../../utils')
        from visualisation import plot_images
        if channels == 1:
            shape = (img_rows, img_cols)
        else:
            shape = (img_rows, img_cols, channels)
        path = 'figs/'
        filename = model_name + '_' + attack_method
        if targeted:
            filename = filename + '_targeted'
        else:
            filename = filename + '_untargeted'
        plot_images(adv_inputs[:N_vis], shape, path, filename + '_data')
        plot_images(adv[:N_vis], shape, path, filename + '_adv')

    save_result = True
    if save_result:
        path = 'results/'
        filename = model_name + '_' + attack_method
        if targeted:
            filename = filename + '_targeted'
            y_input = adv_ys
        else:
            filename = filename + '_untargeted'
            y_input = Y_test[:source_samples]
        results = [adv_inputs, y_input, adv, y_adv]
        import pickle
        pickle.dump(results, open(path + filename + '.pkl', 'w'))
        print("results saved at %s.pkl" % filename)

    return report
Beispiel #6
0
    def setUp(self):
        super(TestDeepFool, self).setUp()

        self.sess = tf.Session()
        self.model = SimpleModel()
        self.attack = DeepFool(self.model, sess=self.sess)
Beispiel #7
0
def generate(sess, model, X, Y, attack_method, dataset, attack_params):
    """
    detect adversarial examples
    :param model_name: the name of the target model. Models are named in the form of
                        model-<dataset>-<architecture>-<transform_type>.h5
    :param attack_method:  attack for generating adversarial examples
    :param X: examples to be attacked
    :param Y: correct label of the examples
    :return: adversarial examples
    """
    batch_size = 128

    img_rows, img_cols, nb_channels = X.shape[1:4]
    nb_classes = Y.shape[1]
    # label smoothing
    label_smoothing_rate = 0.1
    Y -= label_smoothing_rate * (Y - 1. / nb_classes)

    # to be able to call the model in the custom loss, we need to call it once before.
    # see https://github.com/tensorflow/tensorflow/issues/23769
    model(model.input)
    # wrap a keras model, making it fit the cleverhans framework
    wrap_model = KerasModelWrapper(model)

    # initialize the attack object
    attacker = None
    if attack_method == ATTACK.FGSM:
        """
        The Fast Gradient Sign Method,
        by Ian J. Goodfellow, Jonathon Shlens, Christian Szegedy 2014
        link: https://arxiv.org/abs/1412.6572
        """
        attacker = FastGradientMethod(wrap_model, sess=sess)
    elif attack_method == ATTACK.JSMA:
        """
        The Jacobian-based Saliency Map Method
        by Nicolas Papernot, Patrick McDaniel, Somesh Jha, Matt Fredrikson, Z. Berkay Celik, Ananthram Swami 2016
        link: https://arxiv.org/abs/1511.07528
        """
        batch_size = 64
        attacker = SaliencyMapMethod(wrap_model, sess=sess)
    elif attack_method == ATTACK.CW_L2:
        """
        Untageted attack
        """
        attacker = CarliniWagnerL2(wrap_model, sess=sess)

    elif attack_method == ATTACK.CW_Linf:
        """
        Untageted attack
        """
        # TODO: bug fix --- cannot compute gradients correctly
        # attacker = CarliniWagnerLinf(wrap_model, sess=sess)

    elif attack_method == ATTACK.CW_L0:
        """
        Untargeted attack
        """
        # TODO: bug fix --- cannot compute gradients correctly
        # attacker = CarliniWagnerL0(wrap_model, sess=sess)

    elif attack_method == ATTACK.DEEPFOOL:
        """
        The DeepFool Method, is an untargeted & iterative attack
        which is based on an iterative linearization of the classifier.
        by Seyed-Mohsen Moosavi-Dezfooli, Alhussein Fawzi, Pascal Frossard, 2016
        link: https://arxiv.org/abs/1511.04599
        """
        batch_size = 64
        ord = attack_params['ord']
        attack_params.pop('ord')

        if ord == 2:
            # cleverhans supports only l2 norm so far.
            attacker = DeepFool(wrap_model, sess=sess)
        elif ord == np.inf:
            # TODO
            pass
        else:
            raise ValueError('DeepFool supports only l2 and l-inf norms.')

    elif attack_method == ATTACK.BIM:
        """
        The Basic Iterative Method (also, iterative FGSM)
        by Alexey Kurakin, Ian Goodfellow, Samy Bengio, 2016
        link: https://arxiv.org/abs/1607.02533
        """
        attacker = BasicIterativeMethod(wrap_model, back='tf', sess=sess)
    elif attack_method == ATTACK.PGD:
        """
        The Projected Gradient Descent approach.
        """
        attacker = ProjectedGradientDescent(wrap_model)
    elif attack_method == ATTACK.MIM:
        """
        The Momentum Iterative Method
        by Yinpeng Dong, Fangzhou Liao, Tianyu Pang, Hang Su, Jun Zhu, Xiaolin Hu, Jianguo Li, 2018
        link: https://arxiv.org/abs/1710.06081
        """
        attacker = MomentumIterativeMethod(wrap_model, sess=sess)
    else:
        raise ValueError('{} attack is not supported.'.format(attack_method.upper()))

    # define custom loss function for adversary
    compile_params = get_compile_params(dataset,
                                        get_adversarial_metric(model, attacker, attack_params))
    print(compile_params)
    print('#### Recompile the model')
    model.compile(optimizer=compile_params['optimizer'],
                  loss=keras.losses.categorical_crossentropy,
                  metrics=['accuracy', compile_params['metrics']])
    # define the graph
    print('define the graph')
    adv_x = attacker.generate(model.input, **attack_params)
    # consider the attack to be constant
    adv_x = tf.stop_gradient(adv_x)

    # generating adversarial examples
    print('generating adversarial example...')
    adv_examples, = batch_eval(sess, [model.input, wrap_model(adv_x)], [adv_x],
                               [X, Y], batch_size=batch_size)

    if MODE.DEBUG:
        score = model.evaluate(adv_examples, Y, verbose=2)
        print('*** Evaluation on adversarial examples: {}'.format(score))

    return adv_examples, Y
Beispiel #8
0
def DF(torch_model, dataset, eps_list, opt, c, h, w, clip_min, clip_max):

    if opt == 'evaluate':
        acclist = []
        for eps in eps_list:
            sess = tf.Session()
            x_op = tf.placeholder(tf.float32, shape=(
                None,
                c,
                h,
                w,
            ))
            # Convert pytorch model to a tf_model and wrap it in cleverhans
            tf_model_fn = convert_pytorch_model_to_tf(torch_model)
            cleverhans_model = CallableModelWrapper(tf_model_fn,
                                                    output_layer='logits')

            # Create an FGSM attack
            atk_op = DeepFool(cleverhans_model, sess=sess)
            atk_params = {'clip_min': clip_min, 'clip_max': clip_max}
            adv_x_op = atk_op.generate(x_op, **atk_params)
            adv_preds_op = tf_model_fn(adv_x_op)

            # Run an evaluation of our model against fgsm
            total = 0
            correct = 0
            for xs, ys in dataset:
                xs, ys = xs.to(device), ys.to(device)
                adv_preds = sess.run(adv_preds_op, feed_dict={x_op: xs})
                correct += (np.argmax(
                    adv_preds, axis=1) == ys.cpu().detach().numpy()).sum()
                total += dataset.batch_size

            acc = float(correct) / total
            print('Adv accuracy: {:.3f}'.format(acc * 100))
            acclist.append(acc)
        return acclist

    elif opt == 'generate':
        advpacklist = []
        for eps in eps_list:
            advlist = []
            sess = tf.Session()
            x_op = tf.placeholder(tf.float32, shape=(
                None,
                c,
                h,
                w,
            ))
            # Convert pytorch model to a tf_model and wrap it in cleverhans
            tf_model_fn = convert_pytorch_model_to_tf(torch_model)
            cleverhans_model = CallableModelWrapper(tf_model_fn,
                                                    output_layer='logits')

            # Create an FGSM attack
            atk_op = DeepFool(cleverhans_model, sess=sess)
            atk_params = {'clip_min': clip_min, 'clip_max': clip_max}
            adv_x_op = atk_op.generate(x_op, **atk_params)

            # Run an evaluation of our model against fgsm
            for xs, ys in dataset:
                xs, ys = xs.to(device), ys.to(device)
                adv = torch.from_numpy(sess.run(adv_x_op, feed_dict={x_op:
                                                                     xs}))
                if ys == np.argmax(torch_model(xs).data.cpu().numpy()):
                    pred = np.argmax(torch_model(adv).data.cpu().numpy())
                    if ys != pred:
                        adv = adv.numpy()
                        advlist.append(adv)
            print(len(advlist))
            advpacklist.append(advlist)
        return advpacklist
Beispiel #9
0
					attack = LBFGS(model=model, sess=sess)
				if attackMethod == "CarliniWagnerL2":
					print ("Using Carlini and Wagner attack method!")
					attack = CarliniWagnerL2(model=model, sess=sess)
				if attackMethod == "SPSA":
					print ("Using SPSA attack method!")
					attack = SPSA(model=model, sess=sess)
				if attackMethod == "MadryEtAl":
					print ("Using Madry et al. attack method!")
					attack = MadryEtAl(model=model, sess=sess)
				if attackMethod == "ElasticNet":
					print ("Using Elastic Net attack method!")
					attack = ElasticNetMethod(model=model, sess=sess)
				if attackMethod == "DeepFool":
					print ("Using Deep Fool attack method!")
					attack = DeepFool(model=model, sess=sess)
				if attackMethod == "MomentumIterative":
					print ("Using Momentum Iterative attack method!")
					attack = MomentumIterativeMethod(model=model, sess=sess)
				if attackMethod == "BasicIterative":
					print ("Using Basic Iterative attack method!")
					attack = BasicIterativeMethod(model=model, sess=sess)
				if attackMethod == "SaliencyMap":
					print ("Using Saliency Map attack method!")
					attack = SaliencyMapMethod(model=model, sess=sess)

				if attackMethod == "SPSA":
					adversarialOp = attack.generate(x=xPlaceholder, y=yPlaceholder, epsilon=Cfg.epsilon * 5.0, num_steps=Cfg.attackIterations)
				elif attackMethod == "LBFGS":
					adversarialOp = attack.generate(x=xPlaceholder, y_target=tf.one_hot(yPlaceholder, depth=Cfg.numClasses), **attackParams)
				else:
def mnist_tutorial(train_start=0, train_end=60000, test_start=0,
                   test_end=10000, nb_epochs=6, batch_size=128,
                   learning_rate=0.001, train_dir="train_dir",
                   filename="mnist.ckpt", load_model=False,
                   testing=False, label_smoothing=0.1, method='FGSM'):
    """
    MNIST  tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param train_dir: Directory storing the saved model
    :param filename: Filename to save model under
    :param load_model: True for load, False for not load
    :param testing: if true, test error is calculated
    :param label_smoothing: float, amount of label smoothing for cross entropy
    :return: an AccuracyReport object
    """
    keras.layers.core.K.set_learning_phase(0)

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    if not hasattr(backend, "tf"):
        raise RuntimeError("This tutorial requires keras to be configured"
                           " to use the TensorFlow backend.")

    if keras.backend.image_dim_ordering() != 'tf':
        keras.backend.set_image_dim_ordering('tf')
        print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to "
              "'th', temporarily setting to 'tf'")

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)

    # Get MNIST test data
    x_train, y_train, x_test, y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)
    print('y_train: ', y_train.shape)

    x_train = np.pad(x_train, ((0, 0), (2, 2), (2, 2), (0, 0)), mode='constant')
    x_test = np.pad(x_test, ((0, 0), (2, 2), (2, 2), (0, 0)), mode='constant')

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    print('img_rows: {}, img_cols: {}, nchannels: {}'.format(img_rows, img_cols, nchannels))
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                          nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Define TF model graph
    if train_dir=='mnist_ff_model':
        model=mnist_ff_model()
    elif train_dir=='mnist_BP_model':
        model = mnist_model(img_rows=img_rows, img_cols=img_cols,
                      channels=nchannels, nb_filters=64,
                      nb_classes=nb_classes)
    preds = model(x)
    print("Defined TensorFlow model graph.")

    def evaluate():
        # Evaluate the accuracy of the MNIST model on legitimate test examples
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
        report.clean_train_clean_eval = acc
#        assert X_test.shape[0] == test_end - test_start, X_test.shape
        print('Test accuracy on legitimate examples: %0.4f' % acc)

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': train_dir,
        'filename': filename
    }

    rng = np.random.RandomState([2017, 8, 30])
    if not os.path.exists(train_dir):
        os.mkdir(train_dir)

    ckpt = tf.train.get_checkpoint_state(train_dir)
    print(train_dir, ckpt)
    ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path
    wrap = KerasModelWrapper(model)

    if load_model and ckpt_path:
        saver = tf.train.Saver()
        print(ckpt_path)
        saver.restore(sess, ckpt_path)
        print("Model loaded from: {}".format(ckpt_path))
        evaluate()
    else:
        print("Model was not loaded, training from scratch.")
        loss = CrossEntropy(wrap, smoothing=label_smoothing)
        train(sess, loss, x, y, x_train, y_train, evaluate=evaluate,
              args=train_params, save=True, rng=rng)
        print('Training done!')

    # Calculate training error
    print('testing param:', testing)
    if testing:
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds, x_train, y_train, args=eval_params)
        report.train_clean_train_clean_eval = acc

    # Initialize the Fast Gradient Sign Method (FGSM) attack object and graph
    # fgsm = FastGradientMethod(wrap, sess=sess)
    if method=='FGSM':
        clw=FastGradientMethod(wrap, sess=sess)
    elif method=='BIM':
        clw=BasicIterativeMethod(wrap, sess=sess)
    elif method=='DeepFool':
        clw=DeepFool(wrap, sess=sess)
    else:
        raise NotImplementedError
    print('method chosen: ', method)
    clw_params = {}
    adv_x = clw.generate(x, **clw_params)
    with sess.as_default():
        feed_dict={x:x_test, y:y_test}
        store_data=adv_x.eval(feed_dict=feed_dict)
        print('store_data: {}'.format(store_data.shape))
        save_name='{}/mnist_{}_data.pkl'.format(train_dir,method)
        with open(save_name,'wb') as fw:
            pickle.dump(store_data, fw, protocol=2)
            print('data stored as {}'.format(save_name))


    # Consider the attack to be constant
    adv_x = tf.stop_gradient(adv_x)

    preds_adv = model(adv_x)

    # Evaluate the accuracy of the MNIST model on adversarial examples
    eval_par = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds_adv, x_test, y_test, args=eval_par)
    print('Test accuracy on adversarial examples: %0.4f\n' % acc)
    report.clean_train_adv_eval = acc

    # Calculating train error
    if testing:
        eval_par = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds_adv, x_train,
                         y_train, args=eval_par)
        report.train_clean_train_adv_eval = acc


    return report
Beispiel #11
0
def baseline_deepfool(train_start=0,
                      train_end=60000,
                      test_start=0,
                      test_end=10000,
                      nb_epochs=6,
                      batch_size=128,
                      learning_rate=0.001,
                      clean_train=True,
                      testing=False,
                      backprop_through_attack=False,
                      nb_filters=64):
    """
    MNIST cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param testing: if true, complete an AccuracyReport for unit tests
                    to verify that performance is adequate
    :param backprop_through_attack: If True, backprop through adversarial
                                    example construction process during
                                    adversarial training.
    :param clean_train: if true, train on clean examples
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    sess = tf.Session()

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Use label smoothing
    assert Y_train.shape[1] == 10
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    model_path = "models/mnist"
    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    deepfool_params = {
        'nb_candidate': 10,
        'overshoot': 0.02,
        'max_iter': 50,
        'clip_min': 0.,
        'clip_max': 1.
    }
    rng = np.random.RandomState([2017, 8, 30])

    if clean_train:
        model = make_basic_cnn(nb_filters=nb_filters)
        preds = model.get_probs(x)

        def evaluate():
            # Evaluate the accuracy of the MNIST model on legitimate test
            # examples
            eval_params = {'batch_size': batch_size}
            acc = model_eval(sess,
                             x,
                             y,
                             preds,
                             X_test,
                             Y_test,
                             args=eval_params)
            report.clean_train_clean_eval = acc
            assert X_test.shape[0] == test_end - test_start, X_test.shape
            print('Test accuracy on legitimate examples: %0.4f' % acc)

        #
        # HERE already trained model, thus we need a new one (model_2)
        model_train(sess,
                    x,
                    y,
                    preds,
                    X_train,
                    Y_train,
                    evaluate=evaluate,
                    args=train_params,
                    rng=rng)

        # Calculate training error
        if testing:
            eval_params = {'batch_size': batch_size}
            acc = model_eval(sess,
                             x,
                             y,
                             preds,
                             X_train,
                             Y_train,
                             args=eval_params)
            report.train_clean_train_clean_eval = acc

        # Initialize the Fast Gradient Sign Method (FGSM) attack object and
        # graph
        deepfool = DeepFool(model, sess=sess)
        adv_x = deepfool.generate(x, **deepfool_params)

        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on adversarial examples
        eval_par = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Test accuracy on DeepFool adversarial examples: %0.4f\n' % acc)
        report.clean_train_adv_eval = acc

        # Calculate training error
        if testing:
            eval_par = {'batch_size': batch_size}
            acc = model_eval(sess,
                             x,
                             y,
                             preds_adv,
                             X_train,
                             Y_train,
                             args=eval_par)
            report.train_clean_train_adv_eval = acc

        print("Repeating the process, using adversarial training")
        # Redefine TF model graph
    model_2 = make_basic_cnn(nb_filters=nb_filters)
    preds_2 = model_2(x)

    deepfool2 = DeepFool(model_2, sess=sess)
    adv_x_2 = deepfool2.generate(x, **deepfool_params)
    if not backprop_through_attack:
        # For the fgsm attack used in this tutorial, the attack has zero
        # gradient so enabling this flag does not change the gradient.
        # For some other attacks, enabling this flag increases the cost of
        # training, but gives the defender the ability to anticipate how
        # the atacker will change their strategy in response to updates to
        # the defender's parameters.
        adv_x_2 = tf.stop_gradient(adv_x_2)
    preds_2_adv = model_2(adv_x_2)

    #
    # let's generate DeepFool examples
    #
    # let's generate FGSM examples
    #
    fgsm = FastGradientMethod(model_2, sess=sess)
    fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
    adv_x_fgsm = fgsm.generate(x, **fgsm_params)
    if not backprop_through_attack:
        # For the fgsm attack used in this tutorial, the attack has zero
        # gradient so enabling this flag does not change the gradient.
        # For some other attacks, enabling this flag increases the cost of
        # training, but gives the defender the ability to anticipate how
        # the atacker will change their strategy in response to updates to
        # the defender's parameters.
        adv_x_fgsm = tf.stop_gradient(adv_x_fgsm)
    preds_2_fgsm = model_2(adv_x_fgsm)

    # DON'T WANT TO TRAIN on FGSM adv examples yet

    def evaluate_2():
        # Accuracy of adversarially trained model on legitimate test inputs
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2,
                              X_test,
                              Y_test,
                              args=eval_params)
        print('Test accuracy on legitimate examples: %0.4f' % accuracy)
        report.adv_train_clean_eval = accuracy

        # Accuracy of the adversarially trained model on FGSM adversarial examples
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2_fgsm,
                              X_test,
                              Y_test,
                              args=eval_params)
        print('Test accuracy on FGSM adversarial examples: %0.4f' % accuracy)
        report.adv_train_adv_eval = accuracy

        # Accuracy of the DeepFool adv trained model on DeepFool examples
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2_adv,
                              X_test,
                              Y_test,
                              args=eval_params)
        print('Test accuracy on DeepFool adversarial examples: %0.4f' %
              accuracy)

    # Perform and evaluate adversarial training
    model_train(sess,
                x,
                y,
                preds_2,
                X_train,
                Y_train,
                predictions_adv=preds_2_adv,
                evaluate=evaluate_2,
                args=train_params,
                rng=rng)

    # Calculate training errors
    if testing:
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2,
                              X_train,
                              Y_train,
                              args=eval_params)
        report.train_adv_train_clean_eval = accuracy
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2_adv,
                              X_train,
                              Y_train,
                              args=eval_params)
        report.train_adv_train_adv_eval = accuracy

    return report
Beispiel #12
0
def attack_classifier(sess,
                      x,
                      y,
                      model,
                      x_test,
                      y_test,
                      attack_method="fgsm",
                      target=None,
                      batch_size=128):
    tf.set_random_seed(1822)
    set_log_level(logging.DEBUG)

    # Initialize attack
    if attack_method == "fgsm":
        from cleverhans.attacks import FastGradientMethod
        params = {'eps': 8 / 255, 'clip_min': 0., 'clip_max': 1.}
        if target is not None:
            params["y_target"] = tf.constant(
                np.repeat(np.eye(10)[target:target + 1], batch_size, axis=0))
        method = FastGradientMethod(model, sess=sess)

    elif attack_method == "basic_iterative":
        from cleverhans.attacks import BasicIterativeMethod
        params = {
            'eps': 8 / 255,
            'eps_iter': 1 / 255,
            'nb_iter': 10,
            'clip_min': 0.,
            'clip_max': 1.
        }
        if target is not None:
            params["y_target"] = tf.constant(
                np.repeat(np.eye(10)[target:target + 1], batch_size, axis=0))
        method = BasicIterativeMethod(model, sess=sess)

    elif attack_method == "momentum_iterative":
        from cleverhans.attacks import MomentumIterativeMethod
        params = {
            'eps': 8 / 255,
            'eps_iter': 1 / 255,
            'nb_iter': 10,
            'clip_min': 0.,
            'clip_max': 1.
        }
        if target is not None:
            params["y_target"] = tf.constant(
                np.repeat(np.eye(10)[target:target + 1], batch_size, axis=0))
        method = MomentumIterativeMethod(model, sess=sess)

    elif attack_method == "saliency":
        from cleverhans.attacks import SaliencyMapMethod
        params = {
            'theta': 8 / 255,
            'gamma': 0.1,
            'clip_min': 0.,
            'clip_max': 1.
        }
        assert target is None
        method = SaliencyMapMethod(model, sess=sess)

    elif attack_method == "virtual":
        from cleverhans.attacks import VirtualAdversarialMethod
        params = {
            'eps': 8 / 255,
            'num_iterations': 10,
            'xi': 1e-6,
            'clip_min': 0.,
            'clip_max': 1.
        }
        assert target is None
        method = VirtualAdversarialMethod(model, sess=sess)

    elif attack_method == "cw":
        from cleverhans.attacks import CarliniWagnerL2
        params = {
            "confidence": 0,
            "batch_size": 128,
            "learning_rate": 1e-4,
            "binary_search_steps": 10,
            "max_iterations": 1000,
            "abort_early": True,
            "initial_const": 1e-2,
            "clip_min": 0,
            "clip_max": 1
        }
        if target is not None:
            params["y_target"] = tf.constant(
                np.repeat(np.eye(10)[target:target + 1], batch_size, axis=0))
        method = CarliniWagnerL2(model, sess=sess)

    elif attack_method == "elastic_net":
        from cleverhans.attacks import ElasticNetMethod
        params = {
            "fista": "FISTA",
            "beta": 0.1,
            "decision_rule": "EN",
            "confidence": 0,
            "batch_size": 128,
            "learning_rate": 1e-4,
            "binary_search_steps": 10,
            "max_iterations": 1000,
            "abort_early": True,
            "initial_const": 1e-2,
            "clip_min": 0,
            "clip_max": 1
        }
        if target is not None:
            params["y_target"] = tf.constant(
                np.repeat(np.eye(10)[target:target + 1], batch_size, axis=0))
        method = ElasticNetMethod(model, sess=sess)

    elif attack_method == "deepfool":
        from cleverhans.attacks import DeepFool
        params = {
            "nb_candidate": 10,
            "overshoot": 1e-3,
            "max_iter": 100,
            "nb_classes": 10,
            "clip_min": 0,
            "clip_max": 1
        }
        assert target is None
        method = DeepFool(model, sess=sess)

    elif attack_method == "lbfgs":
        from cleverhans.attacks import LBFGS
        params = {
            'batch_size': 128,
            "binary_search_steps": 10,
            "max_iterations": 1000,
            "initial_const": 1e-2,
            'clip_min': 0.,
            'clip_max': 1.
        }
        assert target is not None
        params["y_target"] = tf.constant(
            np.repeat(np.eye(10)[target:target + 1], batch_size, axis=0))
        method = LBFGS(model, sess=sess)

    elif attack_method == "madry":
        from cleverhans.attacks import MadryEtAl
        params = {
            'eps': 8 / 255,
            'eps_iter': 1 / 255,
            'nb_iter': 10,
            'ord': np.inf,
            'clip_min': 0.,
            'clip_max': 1.
        }
        if target is not None:
            params["y_target"] = tf.constant(
                np.repeat(np.eye(10)[target:target + 1], batch_size, axis=0))
        method = MadryEtAl(model, sess=sess)

    elif attack_method == "SPSA":
        from cleverhans.attacks import SPSA
        params = {
            'epsilon': 1 / 255,
            'num_steps': 10,
            'is_targeted': False,
            'early_stop_loss_threshold': None,
            'learning_rate': 0.01,
            'delta': 0.01,
            'batch_size': 128,
            'spsa_iters': 1,
            'is_debug': False
        }
        if target is not None:
            params["y_target"] = tf.constant(
                np.repeat(np.eye(10)[target:target + 1], batch_size, axis=0))
            params["is_targeted"] = True
        method = SPSA(model, sess=sess)

    else:
        raise ValueError("Can not recognize this attack method")

    adv_x = method.generate(x, **params)
    num_batch = x_test.shape[0] // batch_size
    adv_imgs = []
    for i in range(num_batch):
        if (i + 1) * batch_size >= x_test.shape[0]:
            adv_imgs.append(
                sess.run(adv_x,
                         feed_dict={
                             x: x_test[i * batch_size:],
                             y: y_test[i * batch_size:]
                         }))
        else:
            adv_imgs.append(
                sess.run(adv_x,
                         feed_dict={
                             x: x_test[i * batch_size:(i + 1) * batch_size],
                             y: y_test[i * batch_size:(i + 1) * batch_size]
                         }))
    adv_imgs = np.concatenate(adv_imgs, axis=0)

    return adv_imgs
Beispiel #13
0
            #     name='my_target',
            #     sparse=K.is_sparse(model.outputs[0]),
            #     dtype=K.dtype(model.outputs[0])
            # )

            # model.compile(optimizer='Adam', loss='sparse_categorical_crossentropy', metrics=['acc'], target=[target])
            model.compile(optimizer='Adam',
                          loss='sparse_categorical_crossentropy',
                          metrics=['acc'])
            model.load_weights(model_path)
            # ------------ Unsupervised Attacking -------------
            y_test = y_test.flatten()
            y_test_pre = np.argmax(model.predict(x_test), axis=1)

            ch_model = KerasModelWrapper(model)
            deepfool = DeepFool(ch_model, back='tf', sess=K.get_session())

            raw_acc = np.sum(y_test_pre == y_test) / len(y_test_pre)
            # print(np.sum(y_test_pre == y_test), y_test_pre.shape, y_test.shape)
            print(raw_acc)
            v, fool_list = universal_perturbation(model, deepfool, x_train)
            print(v, '\n', fool_list)
            adv_x = x_test + v
            print(v.shape)
            # np.savez('adv_x_test.npz', x=adv_x)

            # df_params = {'max_iter': 50, 'nb_candidate': 3,
            #              'clip_min': -1., 'clip_max': 1.}
            # adv_x = deepfool.generate_np(x_test, **df_params)
            # v = adv_x - x_test
Beispiel #14
0
    def setUp(self):
        super(TestDeepFool, self).setUp()

        self.sess = tf.Session()
        self.model = SimpleModel()
        self.attack = DeepFool(self.model, sess=self.sess)
def mnist_tutorial(train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   nb_epochs=6,
                   batch_size=128,
                   learning_rate=0.001,
                   clean_train=True,
                   testing=False,
                   backprop_through_attack=False,
                   nb_filters=64,
                   num_threads=None):
    """
    MNIST cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param testing: if true, complete an AccuracyReport for unit tests
                    to verify that performance is adequate
    :param backprop_through_attack: If True, backprop through adversarial
                                    example construction process during
                                    adversarial training.
    :param clean_train: if true, train on clean examples
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Use label smoothing
    assert Y_train.shape[1] == 10
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    dp_params = {'clip_min': 0., 'clip_max': 1.}
    rng = np.random.RandomState([2017, 8, 30])

    if clean_train:
        model = make_basic_cnn(nb_filters=nb_filters)
        preds = model.get_probs(x)

        init = tf.group(tf.global_variables_initializer(),
                        tf.local_variables_initializer())
        sess.run(init)

        def evaluate():
            # Evaluate the accuracy of the MNIST model on legitimate test
            # examples
            eval_params = {'batch_size': batch_size}
            acc = model_eval(sess,
                             x,
                             y,
                             preds,
                             X_test,
                             Y_test,
                             args=eval_params)
            report.clean_train_clean_eval = acc
            assert X_test.shape[0] == test_end - test_start, X_test.shape
            print('Test accuracy on legitimate examples: %0.4f' % acc)

        model_train(sess,
                    x,
                    y,
                    preds,
                    X_train,
                    Y_train,
                    evaluate=evaluate,
                    args=train_params,
                    rng=rng)

        s = []
        for i in range(0, len(X_test), 1):
            pred = sess.run(preds, {x: X_test[i:i + 1]})
            print(pred)
            print(Y_test[i:i + 1])
            s.append(np.sort(pred)[0, -1] - np.sort(pred)[0, -2])
        #Draw a histogram
        def draw_hist(myList, Title, Xlabel, Ylabel):
            plt.hist(myList,
                     np.arange(0, 1, 0.01),
                     normed=True,
                     stacked=True,
                     facecolor='blue')
            plt.xlabel(Xlabel)
            plt.ylabel(Ylabel)
            plt.title(Title)
            plt.show()

        draw_hist(myList=s,
                  Title='legitimate',
                  Xlabel='difference between max and second largest',
                  Ylabel='Probability')

        # Calculate training error
        if testing:
            eval_params = {'batch_size': batch_size}
            acc = model_eval(sess,
                             x,
                             y,
                             preds,
                             X_train,
                             Y_train,
                             args=eval_params)
            report.train_clean_train_clean_eval = acc

        # Initialize the deepfool attack object and
        # graph

        deepfool = DeepFool(model, back='tf', sess=sess)
        adv_x = deepfool.generate(x, **dp_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on adversarial examples
        eval_par = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Test accuracy on adversarial examples: %0.4f\n' % acc)
        '''
        s = []
        for i in range(0,len(X_test),1):
            pred = sess.run(preds_adv, {x: X_test[i:i+1]})
            print(pred)
            print(Y_test[i:i+1])
            s.append(np.sort(pred)[0,-1]-np.sort(pred)[0,-2])
        
        #Draw a histogram
        def draw_hist(myList,Title,Xlabel,Ylabel):
            plt.hist(myList,np.arange(0,1,0.01),normed=True,stacked=True,facecolor='red')
            plt.xlabel(Xlabel)       
            plt.ylabel(Ylabel)
            plt.title(Title)
            plt.show()
        draw_hist(myList=s,Title='adversarial',Xlabel='difference between max and second largest',
               Ylabel='Probability')
        '''

        report.clean_train_adv_eval = acc

        # Calculate training error
        if testing:
            eval_par = {'batch_size': batch_size}
            acc = model_eval(sess,
                             x,
                             y,
                             preds_adv,
                             X_train,
                             Y_train,
                             args=eval_par)
            report.train_clean_train_adv_eval = acc
        return report
Beispiel #16
0
def mnist_tutorial(train_start=0, train_end=60000, test_start=0,
                   test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE, train_dir=TRAIN_DIR,
                   filename=FILENAME, load_model=LOAD_MODEL,
                   testing=True, label_smoothing=0.1):
  """
  MNIST CleverHans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param train_dir: Directory storing the saved model
  :param filename: Filename to save model under
  :param load_model: True for load, False for not load
  :param testing: if true, test error is calculated
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """
  tf.keras.backend.set_learning_phase(0)

  # Object used to keep track of (and return) key accuracies
  report = AccuracyReport()

  # Set TF random seed to improve reproducibility
  tf.set_random_seed(1234)

  if keras.backend.image_data_format() != 'channels_last':
    raise NotImplementedError("this tutorial requires keras to be configured to channels_last format")

  # Create TF session and set as Keras backend session
  sess = tf.Session()
  keras.backend.set_session(sess)

  # Get MNIST test data
  mnist = MNIST(train_start=train_start, train_end=train_end,
                test_start=test_start, test_end=test_end)
  x_train, y_train = mnist.get_set('train')
  x_test, y_test = mnist.get_set('test')

  # Obtain Image Parameters
  img_rows, img_cols, nchannels = x_train.shape[1:4]
  nb_classes = y_train.shape[1]

  # Define input TF placeholder
  x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
  y = tf.placeholder(tf.float32, shape=(None, nb_classes))

  # Define TF model graph
  model = cnn_model(img_rows=img_rows, img_cols=img_cols,
                    channels=nchannels, nb_filters=64,
                    nb_classes=nb_classes)
  preds = model(x)
  print("Defined TensorFlow model graph.")

  def evaluate():
    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    report.clean_train_clean_eval = acc
    print('Test accuracy on legitimate examples: %0.4f' % acc)

  # Train an MNIST model
  train_params = {
      'nb_epochs': nb_epochs,
      'batch_size': batch_size,
      'learning_rate': learning_rate,
      'train_dir': train_dir,
      'filename': filename
  }

  rng = np.random.RandomState([2017, 8, 30])
  if not os.path.exists(train_dir):
    os.mkdir(train_dir)

  ckpt = tf.train.get_checkpoint_state(train_dir)
  print(train_dir, ckpt)
  ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path
  wrap = KerasModelWrapper(model)

  if load_model and ckpt_path:
    saver = tf.train.Saver()
    print(ckpt_path)
    saver.restore(sess, ckpt_path)
    print("Model loaded from: {}".format(ckpt_path))
    evaluate()
  else:
    print("Model was not loaded, training from scratch.")
    loss = CrossEntropy(wrap, smoothing=label_smoothing)
    train(sess, loss, x_train, y_train, evaluate=evaluate,
          args=train_params, rng=rng)

  # Calculate training error
  if testing:
    eval_params = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds, x_train, y_train, args=eval_params)
    report.train_clean_train_clean_eval = acc

  df = DeepFool(wrap, sess=sess)
  adv_x = df.generate(x)

  batch = 1000
  x_adv_test = None
  x_adv_train = None

  for i in tqdm(range(int(len(x_test) / batch))):
    tmp = sess.run(adv_x, feed_dict={x: x_test[i*batch:(i+1)*batch]})
    if x_adv_test is None:
      x_adv_test = tmp
    else:
      x_adv_test = np.concatenate((x_adv_test, tmp))

  for i in tqdm(range(int(len(x_train) / batch))):
    tmp = sess.run(adv_x, feed_dict={x: x_train[i*batch:(i+1)*batch]})
    if x_adv_train is None:
      x_adv_train = tmp
    else:
      x_adv_train = np.concatenate((x_adv_train, tmp))

  def evaluate_adv():
    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds, x_adv_test, y_test, args=eval_params)
    report.clean_train_clean_eval = acc
    print('Test accuracy on legitimate examples: %0.4f' % acc)

  evaluate_adv()

  save_list = [x_adv_train, x_adv_test]
  print(x_adv_train.shape)
  print(x_adv_test.shape)
  pickle.dump(save_list, open("./df.pkl", 'wb'))
Beispiel #17
0
def adv_generate(nb_epochs=25,
                 batch_size=128,
                 learning_rate=0.001,
                 clean_train=True,
                 testing=False,
                 nb_filters=64,
                 num_threads=None,
                 data='cifar',
                 adv_attack='fgsm',
                 save_dir='data'):
    """
    MNIST cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param testing: if true, complete an AccuracyReport for unit tests
                    to verify that performance is adequate
    :param backprop_through_attack: If True, backprop through adversarial
                                    example construction process during
                                    adversarial training.
    :param clean_train: if true, train on clean examples
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    # set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    config = tf.ConfigProto(**config_args)
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)

    if data == "mnist":
        # Get MNIST test data
        X_train, Y_train, X_test, Y_test = data_mnist(train_start=0,
                                                      train_end=60000,
                                                      test_start=0,
                                                      test_end=10000)
    else:
        X_train, Y_train, X_test, Y_test = data_cifar10()

    # print (Y_test.shape)
    '''
    for i in range(Y_test.shape[0]):
        img = np.squeeze(X_test[i,:,:,:])
        imsave(os.path.join("benign", str(i) + ".jpg"), img)

    for i in range(Y_test.shape[0]):
        img = np.squeeze(X_test[i,:,:,:])
        benign_path = "benign_" + str(np.argmax(Y_test[i,:], axis=0))
        if not os.path.exists(benign_path):
        	os.makedirs(benign_path)
        imsave(os.path.join(benign_path, str(i) + ".jpg"), img)
    '''
    # Use label smoothing
    assert Y_train.shape[1] == 10
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    if data == 'mnist':
        x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    else:
        x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # model_path = "models/mnist"
    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    rng = np.random.RandomState([2018, 7, 18])

    if clean_train:
        if data == 'mnist':
            model = build_model(0.01, 1e-6)
        else:
            model = build_model_cifar(0.01, 1e-6)

        preds = model(x)

        def evaluate():
            # Evaluate the accuracy of the MNIST model on legitimate test
            # examples
            eval_params = {'batch_size': batch_size}
            acc = model_eval(sess,
                             x,
                             y,
                             preds,
                             X_test,
                             Y_test,
                             args=eval_params)
            report.clean_train_clean_eval = acc
            assert X_test.shape[0] == 10000, X_test.shape
            print('Test accuracy on legitimate examples: %0.4f' % acc)

        model_train(sess,
                    x,
                    y,
                    preds,
                    X_train,
                    Y_train,
                    evaluate=evaluate,
                    args=train_params,
                    rng=rng)

        # Calculate training error
        if testing:
            eval_params = {'batch_size': batch_size}
            acc = model_eval(sess,
                             x,
                             y,
                             preds,
                             X_train,
                             Y_train,
                             args=eval_params)
            report.train_clean_train_clean_eval = acc

        if adv_attack == "FGSM":
            # Initialize the attack object and graph
            # FGSM
            print "FGSM ATTACK..."
            fgsm_params = {'eps': 0.1, 'clip_min': 0., 'clip_max': 1.}
            fgsm = FastGradientMethod(model, sess=sess)
            adv_x = fgsm.generate(x, **fgsm_params)
            preds_adv = model(adv_x)
        elif adv_attack == "CWL2":
            # CWL2
            print "CWL2 ATTACK..."
            cwl2_params = {'batch_size': 8}
            cwl2 = CarliniWagnerL2(model, sess=sess)
            adv_x = cwl2.generate(x, **cwl2_params)
            preds_adv = model(adv_x)
        elif adv_attack == "JSMA":
            # JSMA
            print "JSMA ATTACK..."
            jsma = SaliencyMapMethod(model, back='tf', sess=sess)
            jsma_params = {
                'theta': 1.,
                'gamma': 0.1,
                'clip_min': 0.,
                'clip_max': 1.
            }
            adv_x = jsma.generate(x, **jsma_params)
            preds_adv = model(adv_x)
        elif adv_attack == "DeepFool":
            # DeepFool
            print "DeepFool ATTACK..."
            deepfool = DeepFool(model, sess=sess)
            deepfool_params = {
                'nb_candidate': 10,
                'overshoot': 0.02,
                'max_iter': 50,
                'clip_min': 0.0,
                'clip_max': 1.0
            }
            adv_x = deepfool.generate(x, **deepfool_params)
            preds_adv = model(adv_x)
        elif adv_attack == "LBFGS":
            # LBFGS
            print "LBFGS ATTACK..."
            lbfgs_params = {'y_target': y, 'batch_size': 100}
            lbfgs = LBFGS(model, sess=sess)
            adv_x = lbfgs.generate(x, **lbfgs_params)
            preds_adv = model(adv_x)

        # Evaluate the accuracy of the MNIST model on adversarial examples
        eval_par = {'batch_size': batch_size}
        adv_imgs = []
        adv_imgs_test = []

        if not adv_attack == "LBFGS":
            for i in range(5000):
                adv_imgs_train, _ = sess.run(
                    [adv_x, preds_adv],
                    feed_dict={x: X_train[i * 10:(i + 1) * 10]})
                adv_imgs.append(adv_imgs_train)
            adv_imgs = np.vstack(adv_imgs)
            print(adv_imgs.shape)
            for i in range(1000):
                adv_imgs_tmp, _ = sess.run(
                    [adv_x, preds_adv],
                    feed_dict={x: X_test[i * 10:(i + 1) * 10]})
                adv_imgs_test.append(adv_imgs_tmp)
            adv_imgs_test = np.vstack(adv_imgs_test)
        else:
            for i in range(500):
                target = np_utils.to_categorical(
                    (np.argmax(Y_train[i * 100:(i + 1) * 100], axis=1) + 1) %
                    10, 10)
                adv_imgs_train, _ = sess.run([adv_x, preds_adv],
                                             feed_dict={
                                                 x: X_train[i * 100:(i + 1) *
                                                            100],
                                                 y: target
                                             })
                print('train image: %s' % str(i))
                adv_imgs.append(adv_imgs_train)
            print(adv_imgs.shape)

            for i in range(100):
                target = np_utils.to_categorical(
                    (np.argmax(Y_train[i * 100:(i + 1) * 100], axis=1) + 1) %
                    10, 10)
                adv_imgs_train, _ = sess.run([adv_x, preds_adv],
                                             feed_dict={
                                                 x: X_train[i * 100:(i + 1) *
                                                            100],
                                                 y: target
                                             })
                adv_imgs_test.append(adv_imgs_tmp)
                print('test image: %s' % str(i))
            adv_imgs_test = np.vstack(adv_imgs_test)
        '''
        for i in range(6):
            target = np_utils.to_categorical((np.argmax(Y_train[i*10000: (i+1)*10000, ...], axis = 1) + 1) % 10, 10)
            adv_imgs_train, adv_labels_train = sess.run([adv_x, preds_adv], feed_dict={x: X_train[i*10000: (i+1)*10000,...],
                                                                                       y: target})
        for i in range(60000):
            target = np_utils.to_categorical((np.argmax(Y_train[i:i+1, ...], axis = 1) + 1) % 10, 10)
            adv_imgs_train = sess.run([adv_x], feed_dict={x: X_train[i:i+1,...], y: target})
            print (len(adv_imgs_train), adv_imgs_train[0].shape, adv_imgs_train[1])
        '''
        label_truth_train = np.argmax(Y_train, axis=1)
        label_truth_test = np.argmax(Y_test, axis=1)

        save_dir = os.path.join(
            save_dir, os.path.join(adv_attack))  #, "eps_" + str(eps)))
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        print(adv_imgs.shape, adv_imgs_test.shape)
        provider.save_h5(adv_imgs, label_truth_train,
                         os.path.join(save_dir, "train_adv.h5"))
        provider.save_h5(adv_imgs_test, label_truth_test,
                         os.path.join(save_dir, "test_adv.h5"))
        # utils.save_h5(X_train, label_truth_train, "FGSM/train_benign.h5")
        # utils.save_h5(X_test, label_truth_test, "FGSM/test_benign.h5")
        '''
        for i in range(adv_labels.shape[0]):
            img = np.squeeze(adv_imgs[i,:,:,:])
            imsave(os.path.join("adv", str(i) + ".jpg"), img)

        for i in range(adv_labels.shape[0]):
            img = np.squeeze(adv_imgs[i,:,:,:])
	    adv_path = "adv_" + str(np.argmax(adv_labels[i,:], axis=0))
	    if not os.path.exists(adv_path):
	        os.makedirs(adv_path)
	    imsave(os.path.join(adv_path, str(i) + ".jpg"), img)
        '''

        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Test accuracy on adversarial examples: %0.4f\n' % acc)
        report.clean_train_adv_eval = acc

        # Calculate training error
        if testing:
            eval_par = {'batch_size': batch_size}
            acc = model_eval(sess,
                             x,
                             y,
                             preds_adv,
                             X_train,
                             Y_train,
                             args=eval_par)
            report.train_clean_train_adv_eval = acc

    return report
Beispiel #18
0
def train(alpha, eps2_ratio, gen_ratio, fgsm_eps, LR, logfile):
    logfile.write("fgsm_eps \t %g, LR \t %g, alpha \t %d , eps2_ratio \t %d , gen_ratio \t %d \n"%(fgsm_eps, LR, alpha, eps2_ratio, gen_ratio))
    #############################
    ##Hyper-parameter Setting####
    #############################
    hk = 256; #number of hidden units at the last layer
    Delta2 = (14*14+2)*25; #global sensitivity for the first hidden layer
    Delta3_adv = 2*hk #10*(hk + 1/4 * hk**2) #10*(hk) #global sensitivity for the output layer
    Delta3_benign = 2*hk #10*(hk); #global sensitivity for the output layer
    D = 50000; #size of the dataset
    L = 2499; #batch size
    image_size = 28;
    padding = 4;
    #numHidUnits = 14*14*32 + 7*7*64 + M + 10; #number of hidden units
    #gen_ratio = 1
    epsilon1 = 0.0; #0.175; #epsilon for dpLRP
    epsilon2 = 0.1*(1 + gen_ratio); #epsilon for the first hidden layer
    epsilon3 = 0.1*(1); #epsilon for the last hidden layer
    total_eps = epsilon1 + epsilon2 + epsilon3
    print(total_eps)
    uncert = 0.1; #uncertainty modeling at the output layer
    infl = 1; #inflation rate in the privacy budget redistribution
    R_lowerbound = 1e-5; #lower bound of the LRP
    c = [0, 40, 50, 200] #norm bounds
    epochs = 200; #number of epochs
    preT_epochs = 50; #number of epochs
    T = int(D/L*epochs + 1); #number of steps T
    pre_T = int(D/L*preT_epochs + 1);
    step_for_epoch = int(D/L); #number of steps for one epoch
    
    broken_ratio = 1
    #alpha = 9.0 # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
    #eps2_ratio = 10; # [1/10, 1/8, 1/6, 1/4, 1/2, 1, 2, 4, 6, 8, 10]
    #eps_benign = 1/(1+eps2_ratio)*(2*epsilon2)
    #eps_adv = eps2_ratio/(1+eps2_ratio)*(2*epsilon2)
    
    #fgsm_eps = 0.1
    rand_alpha = 0.05
    
    ##Robustness##
    robustness_T = (fgsm_eps*18*18*L*epsilon2)/Delta2;
    ####
    
    LRPfile = os.getcwd() + '/Relevance_R_0_075.txt';
    #############################
    mnist = input_data.read_data_sets("MNIST_data/", one_hot = True);

    #############################
    ##Construct the Model########
    #############################
    #Step 4: Randomly initiate the noise, Compute 1/|L| * Delta3 for the output layer#

    #Compute the 1/|L| * Delta3 for the last hidden layer#
    """eps3_ratio = Delta3_adv/Delta3_benign;
    eps3_benign = 1/(1+eps3_ratio)*(epsilon3)
    eps3_adv = eps3_ratio/(1+eps3_ratio)*(epsilon3)"""
    loc, scale3_benign, scale3_adv = 0., Delta3_benign/(epsilon3*L), Delta3_adv/(epsilon3*L);
    ###
    #End Step 4#
    # Parameters Declarification
    W_conv1 = weight_variable('W_conv1', [5, 5, 1, 32], collect=[AECODER_VARIABLES]);
    b_conv1 = bias_variable('b_conv1', [32], collect=[AECODER_VARIABLES]);

    shape     = W_conv1.get_shape().as_list()
    w_t       = tf.reshape(W_conv1, [-1, shape[-1]])
    w         = tf.transpose(w_t)
    sing_vals = tf.svd(w, compute_uv=False)
    sensitivity = tf.reduce_max(sing_vals)
    gamma = 2*(14*14 + 2)*25/(L*sensitivity)
    
    dp_epsilon=1.0 #0.1
    delta_r = fgsm_eps*(image_size**2);
    #delta_h = 1.0 * delta_r; #sensitivity*(14**2) = sensitivity*(\beta**2) can also be used
    #dp_mult = (Delta2/(L*epsilon2))/(delta_r / dp_epsilon) + (2*Delta2/(L*epsilon2))/(delta_h / dp_epsilon)
    
    W_conv2 = weight_variable('W_conv2', [5, 5, 32, 64], collect=[CONV_VARIABLES]);
    b_conv2 = bias_variable('b_conv2', [64], collect=[CONV_VARIABLES]);

    W_fc1 = weight_variable('W_fc1', [4 * 4 * 64, hk], collect=[CONV_VARIABLES]);
    b_fc1 = bias_variable('b_fc1', [hk], collect=[CONV_VARIABLES]);

    W_fc2 = weight_variable('W_fc2', [hk, 10], collect=[CONV_VARIABLES]);
    b_fc2 = bias_variable('b_fc2', [10], collect=[CONV_VARIABLES]);

    """scale2 = tf.Variable(tf.ones([hk]))
    beta2 = tf.Variable(tf.zeros([hk]))
    tf.add_to_collections([CONV_VARIABLES], scale2)
    tf.add_to_collections([CONV_VARIABLES], beta2)"""

    params = [W_conv1, b_conv1, W_conv2, b_conv2, W_fc1, b_fc1, W_fc2, b_fc2]
    ###


    #Step 5: Create the model#
    noise = tf.placeholder(tf.float32, [None, image_size, image_size, 1]);
    adv_noise = tf.placeholder(tf.float32, [None, image_size, image_size, 1]);

    keep_prob = tf.placeholder(tf.float32);
    x = tf.placeholder(tf.float32, [None, image_size*image_size]);
    x_image = tf.reshape(x, [-1,image_size,image_size,1]);

    #perturbFMx = np.random.laplace(0.0, Delta2/(2*epsilon2*L), 28*28)
    #perturbFMx = np.reshape(perturbFMx, [-1, 28, 28, 1]);

    # pretrain ###
    #Enc_Layer1 = EncLayer(inpt=x_image, n_filter_in = 1, n_filter_out = 32, filter_size = 5, W=W_conv1, b=b_conv1, activation=tf.nn.relu)
    #pretrain = Enc_Layer1.get_train_ops2(xShape = tf.shape(x_image)[0], Delta = Delta2, epsilon = 2*epsilon2, batch_size = L, learning_rate= LR, W = W_conv1, b = b_conv1, perturbFMx = noise)
    ###########

    adv_x = tf.placeholder(tf.float32, [None, image_size*image_size]);
    adv_image = tf.reshape(adv_x, [-1,image_size,image_size,1]);

    #perturbFMx_adv = np.random.laplace(0.0, Delta2/(2*epsilon2*L), 28*28)
    #perturbFMx_adv = np.reshape(perturbFMx_adv, [-1, 28, 28, 1]);

    # pretrain adv ###
    #perturbFM_h = np.random.laplace(0.0, 2*Delta2/(epsilon2*L), 14*14*32)
    #perturbFM_h = np.reshape(perturbFM_h, [-1, 14, 14, 32]);
    FM_h = tf.placeholder(tf.float32, [None, 14, 14, 32]);
    Enc_Layer2 = EncLayer(inpt=adv_image, n_filter_in = 1, n_filter_out = 32, filter_size = 5, W=W_conv1, b=b_conv1, activation=tf.nn.relu)
    pretrain_adv = Enc_Layer2.get_train_ops2(xShape = tf.shape(adv_image)[0], Delta = Delta2, batch_size = L, learning_rate= LR, W = W_conv1, b = b_conv1, perturbFMx = adv_noise, perturbFM_h = FM_h)
    Enc_Layer3 = EncLayer(inpt=x_image, n_filter_in = 1, n_filter_out = 32, filter_size = 5, W=W_conv1, b=b_conv1, activation=tf.nn.relu)
    pretrain_benign = Enc_Layer3.get_train_ops2(xShape = tf.shape(x_image)[0], Delta = Delta2, batch_size = L, learning_rate= LR, W = W_conv1, b = b_conv1, perturbFMx = noise, perturbFM_h = FM_h)
    ###########
    
    x_image += noise;
    x_image = tf.clip_by_value(x_image, -10, 10) #Clip the values of each input feature.
    
    adv_image += adv_noise;
    adv_image = tf.clip_by_value(adv_image, -10, 10) #Clip the values of each input feature.

    #perturbFM = np.random.laplace(0.0, scale3_benign, hk)
    #perturbFM = np.reshape(perturbFM, [hk]);
    perturbFM = np.random.laplace(0.0, scale3_benign, hk * 10)
    perturbFM = np.reshape(perturbFM, [hk, 10]);
    
    y_conv = inference(x_image, perturbFM, hk, FM_h, params);
    softmax_y_conv = tf.nn.softmax(y_conv)
    #robust_mask = inference_robust_mask(y_conv, Delta2, L, epsilon2, robustness_T)

    #perturbFM = np.random.laplace(0.0, scale3_adv, hk)
    #perturbFM = np.reshape(perturbFM, [hk]);
    y_adv_conv = inference(adv_image, perturbFM, hk, FM_h, params);
    #adv_robust_mask = inference_robust_mask(y_adv_conv, Delta2, L, epsilon2, robustness_T)

    # test model
    perturbFM_test = np.random.laplace(0.0, 0, hk)
    perturbFM_test = np.reshape(perturbFM_test, [hk]);
    x_test = tf.reshape(x, [-1,image_size,image_size,1]);
    y_test = inference(x_test, perturbFM_test, hk, FM_h, params);
    #test_robust_mask = inference_robust_mask(y_test, Delta2, L, epsilon2, robustness_T)

    #Define a place holder for the output label#
    y_ = tf.placeholder(tf.float32, [None, 10]);
    adv_y_ = tf.placeholder(tf.float32, [None, 10]);
    #End Step 5#
    #############################

    #############################
    ##Define loss and Optimizer##
    #############################
    '''
        Computes differentially private sigmoid cross entropy given `logits`.
        
        Measures the probability error in discrete classification tasks in which each
        class is independent and not mutually exclusive.
        
        For brevity, let `x = logits`, `z = labels`.  The logistic loss is
        z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x))
        = z * -log(1 / (1 + exp(-x))) + (1 - z) * -log(exp(-x) / (1 + exp(-x)))
        = z * log(1 + exp(-x)) + (1 - z) * (-log(exp(-x)) + log(1 + exp(-x)))
        = z * log(1 + exp(-x)) + (1 - z) * (x + log(1 + exp(-x))
        = (1 - z) * x + log(1 + exp(-x))
        = x - x * z + log(1 + exp(-x))
        
        For x < 0, to avoid overflow in exp(-x), we reformulate the above
        
        x - x * z + log(1 + exp(-x))
        = log(exp(x)) - x * z + log(1 + exp(-x))
        = - x * z + log(1 + exp(x))
        
        Hence, to ensure stability and avoid overflow, the implementation uses this
        equivalent formulation
        
        max(x, 0) - x * z + log(1 + exp(-abs(x)))
        
        `logits` and `labels` must have the same type and shape. Let denote neg_abs_logits = -abs(y_conv) = -abs(h_fc1 * W_fc2). By Applying Taylor Expansion, we have:
        
        Taylor = max(y_conv, 0) - y_conv * y_ + log(1 + exp(-abs(y_conv)));
        = max(h_fc1 * W_fc2, 0) - (y_ * h_fc1) * W_fc2 + (math.log(2.0) + 0.5*neg_abs_logits + 1.0/8.0*neg_abs_logits**2)
        = max(h_fc1 * W_fc2, 0) - (y_ * h_fc1) * W_fc2 + (math.log(2.0) + 0.5*(-abs(h_fc1 * W_fc2)) + 1.0/8.0*(-abs(h_fc1 * W_fc2))**2)
        = F1 + F2
        where: F1 = max(h_fc1 * W_fc2, 0) + (math.log(2.0) + 0.5*(-abs(h_fc1 * W_fc2)) + 1.0/8.0*(-abs(h_fc1 * W_fc2))**2) and F2 = - (y_ * h_fc1) * W_fc2
        
        To ensure that Taylor is differentially private, we need to perturb all the coefficients, including the term y_ * h_fc1 * W_fc2.
        Note that h_fc1 is differentially private, since its computation on top of the DP Affine transformation does not access the original data.
        Therefore, F1 should be differentially private. We need to preserve DP in F2, which reads the groundtruth label y_, as follows:
        
        By applying Funtional Mechanism, we perturb (y_ * h_fc1) * W_fc2 as ((y_ * h_fc1) + perturbFM) * W_fc2 = (y_ * h_fc1)*W_fc2 + (perturbFM * W_fc2):
        
        perturbFM = np.random.laplace(0.0, scale3, hk * 10)
        perturbFM = np.reshape(perturbFM/L, [hk, 10]);
        
        where scale3 = Delta3/(epsilon3) = 2*hk/(epsilon3);
        
        To allow computing gradients at zero, we define custom versions of max and abs functions [Tensorflow].
        
        Source: https://github.com/tensorflow/tensorflow/blob/r1.4/tensorflow/python/ops/nn_impl.py @ TensorFlow
    '''
    ### Taylor for benign x
    zeros = array_ops.zeros_like(y_conv, dtype=y_conv.dtype)
    cond = (y_conv >= zeros)
    relu_logits = array_ops.where(cond, y_conv, zeros)
    neg_abs_logits = array_ops.where(cond, -y_conv, y_conv)
    #Taylor = math_ops.add(relu_logits - y_conv * y_, math_ops.log1p(math_ops.exp(neg_abs_logits)))
    Taylor_benign = math_ops.add(relu_logits - y_conv * y_, math.log(2.0) + 0.5*neg_abs_logits + 1.0/8.0*neg_abs_logits**2) - tf.reduce_sum(perturbFM*W_fc2)
    #Taylor_benign = tf.abs(y_conv - y_)

    ### Taylor for adv_x
    zeros_adv = array_ops.zeros_like(y_adv_conv, dtype=y_conv.dtype)
    cond_adv = (y_adv_conv >= zeros_adv)
    relu_logits_adv = array_ops.where(cond_adv, y_adv_conv, zeros_adv)
    neg_abs_logits_adv = array_ops.where(cond_adv, -y_adv_conv, y_adv_conv)
    #Taylor = math_ops.add(relu_logits - y_conv * y_, math_ops.log1p(math_ops.exp(neg_abs_logits)))
    Taylor_adv = math_ops.add(relu_logits_adv - y_adv_conv * adv_y_, math.log(2.0) + 0.5*neg_abs_logits_adv + 1.0/8.0*neg_abs_logits_adv**2) - tf.reduce_sum(perturbFM*W_fc2)
    #Taylor_adv = tf.abs(y_adv_conv - adv_y_)

    ### Adversarial training loss
    adv_loss = (1/(L + L*alpha))*(Taylor_benign + alpha * Taylor_adv)

    '''Some time, using learning rate decay can help to stablize training process. However, use this carefully, since it may affect the convergent speed.'''
    global_step = tf.Variable(0, trainable=False)
    pretrain_var_list = tf.get_collection(AECODER_VARIABLES)
    train_var_list = tf.get_collection(CONV_VARIABLES)
    #print(pretrain_var_list)
    #print(train_var_list)
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        pretrain_step = tf.train.AdamOptimizer(LR).minimize(pretrain_adv+pretrain_benign, global_step=global_step, var_list=pretrain_var_list);
        train_step = tf.train.AdamOptimizer(LR).minimize(adv_loss, global_step=global_step, var_list=train_var_list);
    sess = tf.InteractiveSession();

    # Define the correct prediction and accuracy
    # This needs to be changed to "Robust Prediction"
    correct_prediction_x = tf.equal(tf.argmax(y_test,1), tf.argmax(y_,1));
    accuracy_x = tf.reduce_mean(tf.cast(correct_prediction_x, tf.float32));

    #############
    # use these to get predictions wrt to robust conditions
    """robust_correct_prediction_x = tf.multiply(test_robust_mask, tf.cast(correct_prediction_x, tf.float32))
    accuracy_x_robust = tf.reduce_sum(robust_correct_prediction_x) / tf.reduce_sum(test_robust_mask)
    #certified_utility = 2/(1/accuracy_x_robust + 1/(tf.reduce_sum(test_robust_mask)/(1.0*tf.cast(tf.size(test_robust_mask), tf.float32))))
    certified_utility = (1.0*tf.reduce_sum(test_robust_mask))/(1.0*tf.cast(tf.size(test_robust_mask), tf.float32))"""
    #############

    # craft adversarial samples from x for training
    dynamic_eps = tf.placeholder(tf.float32);
    emsemble_L = int(L/3)
    softmax_y = tf.nn.softmax(y_test)
    #c_x_adv = fgsm(x, softmax_y, eps=fgsm_eps, clip_min=0.0, clip_max=1.0)
    c_x_adv = fgsm(x, softmax_y, eps=(dynamic_eps)/10, clip_min=-1.0, clip_max=1.0) # for I-FGSM
    x_adv = tf.reshape(c_x_adv, [emsemble_L,image_size*image_size]);

    #====================== attack =========================
    #attack_switch = {'randfgsm':True, 'fgsm':True, 'ifgsm':True, 'deepfool':True, 'mim':True, 'spsa':False, 'cwl2':False, 'madry':True, 'stm':True}
    #attack_switch = {'fgsm':True, 'ifgsm':True, 'deepfool':True, 'mim':True, 'spsa':False, 'cwl2':False, 'madry':True, 'stm':True}
    attack_switch = {'fgsm':True, 'ifgsm':True, 'deepfool':False, 'mim':True, 'spsa':False, 'cwl2':False, 'madry':True, 'stm':False}
    #other possible attacks:
        # ElasticNetMethod
        # FastFeatureAdversaries
        # LBFGS
        # SaliencyMapMethod
        # VirtualAdversarialMethod

    # y_test = logits (before softmax)
    # softmax_y_test = preds (probs, after softmax)
    softmax_y_test = tf.nn.softmax(y_test)

    # create saver
    saver = tf.train.Saver(tf.all_variables())
    
    sess.run(W_conv1.initializer)
    _gamma = sess.run(gamma)
    _gamma_x = Delta2/L
    epsilon2_update = epsilon2/(1.0 + 1.0/_gamma + 1/_gamma_x)
    print(epsilon2_update/_gamma + epsilon2_update/_gamma_x)
    print(epsilon2_update)
    _sensitivityW = sess.run(sensitivity)
    delta_h = _sensitivityW*(14**2)
    dp_mult = (Delta2/(L*epsilon2_update))/(delta_r / dp_epsilon) + (2*Delta2/(L*epsilon2_update))/(delta_h / dp_epsilon)
    #############################
    
    iterativeStep = 100
    
    # load the most recent models
    _global_step = 0
    ckpt = tf.train.get_checkpoint_state(os.getcwd() + './tmp/train')
    if ckpt and ckpt.model_checkpoint_path:
        print(ckpt.model_checkpoint_path);
        saver.restore(sess, ckpt.model_checkpoint_path)
        _global_step = int(ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1])
    else:
        print('No checkpoint file found')

    start_time = time.time();

    # adv pretrain model (Auto encoder layer)
    cost = tf.reduce_sum(Enc_Layer2.cost);
    logfile.write("pretrain: \n")
    
    # define cleverhans abstract models for using cleverhans attacks
    ch_model_logits = CustomCallableModelWrapper(callable_fn=inference_test_input, output_layer='logits', hk=hk, params=params, image_size=image_size, adv_noise = adv_noise)
    ch_model_probs = CustomCallableModelWrapper(callable_fn=inference_test_input_probs, output_layer='probs', hk=hk, params=params, image_size=image_size, adv_noise = adv_noise)

    # rand+fgsm
    # if attack_switch['randfgsm']:
    #     randfgsm_obj = FastGradientMethod(model=ch_model_probs, sess=sess)
    #     x_randfgsm_t = (fgsm_eps - rand_alpha) * randfgsm_obj.generate(x=x, eps=fgsm_eps, clip_min=-1.0, clip_max=1.0)
    #     x_rand_t = rand_alpha * tf.sign(tf.random_normal(shape=tf.shape(x), mean=0.0, stddev=1.0))

    # define each attack method's tensor
    mu_alpha = tf.placeholder(tf.float32, [1]);
    attack_tensor_dict = {}
    # FastGradientMethod
    if attack_switch['fgsm']:
        print('creating attack tensor of FastGradientMethod')
        fgsm_obj = FastGradientMethod(model=ch_model_probs, sess=sess)
        #x_adv_test_fgsm = fgsm_obj.generate(x=x, eps=fgsm_eps, clip_min=-1.0, clip_max=1.0, ord=2) # testing now
        x_adv_test_fgsm = fgsm_obj.generate(x=x, eps=mu_alpha, clip_min=-1.0, clip_max=1.0) # testing now
        attack_tensor_dict['fgsm'] = x_adv_test_fgsm

    # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init)
    # default: eps_iter=0.05, nb_iter=10
    if attack_switch['ifgsm']:
        print('creating attack tensor of BasicIterativeMethod')
        ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess)
        #x_adv_test_ifgsm = ifgsm_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2)
        x_adv_test_ifgsm = ifgsm_obj.generate(x=x, eps=mu_alpha, eps_iter=mu_alpha/iterativeStep, nb_iter=iterativeStep, clip_min=-1.0, clip_max=1.0)
        attack_tensor_dict['ifgsm'] = x_adv_test_ifgsm

    # Deepfool
    if attack_switch['deepfool']:
        print('creating attack tensor of DeepFool')
        deepfool_obj = DeepFool(model=ch_model_logits, sess=sess)
        #x_adv_test_deepfool = deepfool_obj.generate(x=x, nb_candidate=10, overshoot=0.02, max_iter=50, nb_classes=10, clip_min=-1.0, clip_max=1.0, ord=2)
        x_adv_test_deepfool = deepfool_obj.generate(x=x, nb_candidate=10, overshoot=0.02, max_iter=50, nb_classes=10, clip_min=-1.0, clip_max=1.0)
        attack_tensor_dict['deepfool'] = x_adv_test_deepfool

    # MomentumIterativeMethod
    # default: eps_iter=0.06, nb_iter=10
    if attack_switch['mim']:
        print('creating attack tensor of MomentumIterativeMethod')
        mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess)
        #x_adv_test_mim = mim_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, decay_factor=1.0, clip_min=-1.0, clip_max=1.0, ord=2)
        x_adv_test_mim = mim_obj.generate(x=x, eps=mu_alpha, eps_iter=mu_alpha/iterativeStep, nb_iter=iterativeStep, decay_factor=1.0, clip_min=-1.0, clip_max=1.0)
        attack_tensor_dict['mim'] = x_adv_test_mim

    # SPSA
    # note here the epsilon is the infinity norm instead of precent of perturb
    # Maybe exclude this method first, since it seems to have some constrain about the data value range
    if attack_switch['spsa']:
        print('creating attack tensor of SPSA')
        spsa_obj = SPSA(model=ch_model_logits, sess=sess)
        #x_adv_test_spsa = spsa_obj.generate(x=x, epsilon=fgsm_eps, num_steps=10, is_targeted=False, early_stop_loss_threshold=None, learning_rate=0.01, delta=0.01,spsa_samples=1000, spsa_iters=1, ord=2)
        x_adv_test_spsa = spsa_obj.generate(x=x, epsilon=fgsm_eps, num_steps=10, is_targeted=False, early_stop_loss_threshold=None, learning_rate=0.01, delta=0.01,spsa_samples=1000, spsa_iters=1)
        attack_tensor_dict['spsa'] = x_adv_test_spsa

    # CarliniWagnerL2
    # confidence=0 is fron their paper
    # it is said to be slow, maybe exclude first
    if attack_switch['cwl2']:
        print('creating attack tensor of CarliniWagnerL2')
        cwl2_obj = CarliniWagnerL2(model=ch_model_logits, sess=sess)
        #x_adv_test_cwl2 = cwl2_obj.generate(x=x, confidence=0, batch_size=1000, learning_rate=0.005, binary_search_steps=5, max_iterations=500, abort_early=True, initial_const=0.01, clip_min=-1.0, clip_max=1.0, ord=2)
        x_adv_test_cwl2 = cwl2_obj.generate(x=x, confidence=0, batch_size=1000, learning_rate=0.005, binary_search_steps=5, max_iterations=500, abort_early=True, initial_const=0.01, clip_min=-1.0, clip_max=1.0)
        attack_tensor_dict['cwl2'] = x_adv_test_cwl2

    # MadryEtAl (Projected Grdient with random init, same as rand+fgsm)
    # default: eps_iter=0.01, nb_iter=40
    if attack_switch['madry']:
        print('creating attack tensor of MadryEtAl')
        madry_obj = MadryEtAl(model=ch_model_probs, sess=sess)
        #x_adv_test_madry = madry_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2)
        x_adv_test_madry = madry_obj.generate(x=x, eps=mu_alpha, eps_iter=fgsm_eps/iterativeStep, nb_iter=iterativeStep, clip_min=-1.0, clip_max=1.0)
        attack_tensor_dict['madry'] = x_adv_test_madry

    # SpatialTransformationMethod
    # the params are pretty different from on the paper
    # so I use default
    # exclude since there's bug
    if attack_switch['stm']:
        print('creating attack tensor of SpatialTransformationMethod')
        stm_obj = SpatialTransformationMethod(model=ch_model_probs, sess=sess)
        #x_adv_test_stm = stm_obj.generate(x=x, batch_size=1000, n_samples=None, dx_min=-0.1, dx_max=0.1, n_dxs=2, dy_min=-0.1, dy_max=0.1, n_dys=2, angle_min=-30, angle_max=30, n_angles=6, ord=2)
        x_adv_test_stm = stm_obj.generate(x=x, batch_size=1000, n_samples=None, dx_min=-0.1, dx_max=0.1, n_dxs=2, dy_min=-0.1, dy_max=0.1, n_dys=2, angle_min=-30, angle_max=30, n_angles=6)
        attack_tensor_dict['stm'] = x_adv_test_stm
    #====================== attack =========================
    
    sess.run(tf.initialize_all_variables());

    ##perturb h for training
    perturbFM_h = np.random.laplace(0.0, 2*Delta2/(epsilon2_update*L), 14*14*32)
    perturbFM_h = np.reshape(perturbFM_h, [-1, 14, 14, 32]);

    ##perturb h for testing
    perturbFM_h_test = np.random.laplace(0.0, 0, 14*14*32)
    perturbFM_h_test = np.reshape(perturbFM_h_test, [-1, 14, 14, 32]);

    '''for i in range(_global_step, _global_step + pre_T):
        d_eps = random.random();
        
        batch = mnist.train.next_batch(L); #Get a random batch.
        adv_images = sess.run(x_adv, feed_dict = {x:batch[0], y_:batch[1], FM_h: perturbFM_h_test, dynamic_eps: d_eps})
        for iter in range(0, 9):
            adv_images = sess.run(x_adv, feed_dict = {x:adv_images, y_:batch[1], FM_h: perturbFM_h_test, dynamic_eps: d_eps})
        """batch = mnist.train.next_batch(emsemble_L)
        adv_images_mim = sess.run(attack_tensor_dict['mim'], feed_dict = {x:batch[0], y_: batch[1]})
        batch = mnist.train.next_batch(emsemble_L)
        adv_images_madry = sess.run(attack_tensor_dict['mim'], feed_dict = {x:batch[0], y_: batch[1]})
        train_images = np.append(np.append(adv_images, adv_images_mim, axis = 0),adv_images_madry, axis = 0)"""

        batch_2 = mnist.train.next_batch(L);
        pretrain_step.run(feed_dict={adv_x: np.append(adv_images, batch_2[0], axis = 0), adv_noise: AdvLnoise, FM_h: perturbFM_h});
        if i % int(5*step_for_epoch) == 0:
            cost_value = sess.run(cost, feed_dict={adv_x:mnist.test.images, adv_noise: AdvLnoise_test, FM_h: perturbFM_h_test})/(test_size*32)
            logfile.write("step \t %d \t %g \n"%(i, cost_value))
            print(cost_value)

    pre_train_finish_time = time.time()
    print('pre_train finished in: ' + parse_time(pre_train_finish_time - start_time))'''

    # train and test model with adv samples
    max_benign_acc = -1;
    max_robust_benign_acc = -1
    #max_adv_acc = -1;

    test_size = len(mnist.test.images)
    AdvLnoise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L);
    AdvLnoise_test = generateIdLMNoise(image_size, 0, epsilon2_update, test_size);

    Lnoise_empty = generateIdLMNoise(image_size, 0, epsilon2_update, L);
    BenignLNoise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L);
    last_eval_time = -1
    accum_time = 0
    accum_epoch = 0
    max_adv_acc_dict = {}
    max_robust_adv_acc_dict = {}
    #max_robust_adv_utility_dict = {}
    for atk in attack_switch.keys():
        if atk not in max_adv_acc_dict:
            max_adv_acc_dict[atk] = -1
            max_robust_adv_acc_dict[atk] = -1

    for i in range(_global_step, _global_step + T):
        # this batch is for generating adv samples
        batch = mnist.train.next_batch(emsemble_L); #Get a random batch.
        y_adv_batch = batch[1]
        #The number of epochs we print out the result. Print out the result every 5 epochs.
        if i % int(10*step_for_epoch) == 0 and i > int(10*step_for_epoch):
            cost_value = sess.run(cost, feed_dict={adv_x:mnist.test.images, adv_noise: AdvLnoise_test, FM_h: perturbFM_h_test})/(test_size*32)
            print(cost_value)
            
            if last_eval_time < 0:
                last_eval_time = time.time()
            #===================benign samples=====================
            predictions_form_argmax = np.zeros([test_size, 10])
            #test_bach = mnist.test.next_batch(test_size)
            softmax_predictions = softmax_y_conv.eval(feed_dict={x: mnist.test.images, noise: BenignLNoise, FM_h: perturbFM_h})
            argmax_predictions = np.argmax(softmax_predictions, axis=1)
            for n_draws in range(0, 1):
                _BenignLNoise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L);
                _perturbFM_h = np.random.laplace(0.0, 2*Delta2/(epsilon2_update*L), 14*14*32)
                _perturbFM_h = np.reshape(_perturbFM_h, [-1, 14, 14, 32]);
                for j in range(test_size):
                    pred = argmax_predictions[j]
                    predictions_form_argmax[j, pred] += 1;
                softmax_predictions = softmax_y_conv.eval(feed_dict={x: mnist.test.images, noise: (BenignLNoise + _BenignLNoise/2), FM_h: (perturbFM_h + _perturbFM_h/2)})
                argmax_predictions = np.argmax(softmax_predictions, axis=1)
            final_predictions = predictions_form_argmax;
            is_correct = []
            is_robust = []
            for j in range(test_size):
                is_correct.append(np.argmax(mnist.test.labels[j]) == np.argmax(final_predictions[j]))
                robustness_from_argmax = robustness.robustness_size_argmax(counts=predictions_form_argmax[j],eta=0.05,dp_attack_size=fgsm_eps, dp_epsilon=1.0, dp_delta=0.05, dp_mechanism='laplace') / (dp_mult)
                is_robust.append(robustness_from_argmax >= fgsm_eps)
            acc = np.sum(is_correct)*1.0/test_size
            robust_acc = np.sum([a and b for a,b in zip(is_robust, is_correct)])*1.0/np.sum(is_robust)
            robust_utility = np.sum(is_robust)*1.0/test_size
            max_benign_acc = max(max_benign_acc, acc)
            max_robust_benign_acc = max(max_robust_benign_acc, robust_acc*robust_utility)
            log_str = "step: {:.1f}\t epsilon: {:.1f}\t benign: {:.4f} \t {:.4f} \t {:.4f} \t {:.4f} \t".format(i, total_eps, acc, robust_acc, robust_utility, robust_acc*robust_utility)
            #===================adv samples=====================
            #log_str = "step: {:.1f}\t epsilon: {:.1f}\t".format(i, total_eps)
            """adv_images_dict = {}
            for atk in attack_switch.keys():
                if attack_switch[atk]:
                    adv_images_dict[atk] = sess.run(attack_tensor_dict[atk], feed_dict = {x:mnist.test.images, y_:mnist.test.labels})
            print("Done with the generating of Adversarial samples")"""
            #===================adv samples=====================
            adv_acc_dict = {}
            robust_adv_acc_dict = {}
            robust_adv_utility_dict = {}
            for atk in attack_switch.keys():
                if atk not in adv_acc_dict:
                    adv_acc_dict[atk] = -1
                    robust_adv_acc_dict[atk] = -1
                    robust_adv_utility_dict[atk] = -1
                if attack_switch[atk]:
                    adv_images_dict = sess.run(attack_tensor_dict[atk], feed_dict = {x:mnist.test.images, y_: mnist.test.labels, adv_noise: AdvLnoise_test, mu_alpha:[fgsm_eps]})
                    ### PixelDP Robustness ###
                    predictions_form_argmax = np.zeros([test_size, 10])
                    softmax_predictions = softmax_y_conv.eval(feed_dict={x: adv_images_dict, noise: BenignLNoise, FM_h: perturbFM_h})
                    argmax_predictions = np.argmax(softmax_predictions, axis=1)
                    for n_draws in range(0, 2000):
                        if n_draws % 1000 == 0:
                            print(n_draws)
                        _BenignLNoise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L);
                        _perturbFM_h = np.random.laplace(0.0, 2*Delta2/(epsilon2_update*L), 14*14*32)
                        _perturbFM_h = np.reshape(_perturbFM_h, [-1, 14, 14, 32]);
                        for j in range(test_size):
                            pred = argmax_predictions[j]
                            predictions_form_argmax[j, pred] += 1;
                        softmax_predictions = softmax_y_conv.eval(feed_dict={x: adv_images_dict, noise: BenignLNoise, FM_h: (perturbFM_h + _perturbFM_h/2)}) * softmax_y_conv.eval(feed_dict={x: adv_images_dict, noise: (BenignLNoise + _BenignLNoise/2), FM_h: perturbFM_h})
                        #softmax_predictions = softmax_y_conv.eval(feed_dict={x: adv_images_dict, noise: BenignLNoise, FM_h: (_perturbFM_h)}) * softmax_y_conv.eval(feed_dict={x: adv_images_dict, noise: (_BenignLNoise), FM_h: perturbFM_h})
                        argmax_predictions = np.argmax(softmax_predictions, axis=1)
                    final_predictions = predictions_form_argmax;
                    is_correct = []
                    is_robust = []
                    for j in range(test_size):
                        is_correct.append(np.argmax(mnist.test.labels[j]) == np.argmax(final_predictions[j]))
                        robustness_from_argmax = robustness.robustness_size_argmax(counts=predictions_form_argmax[j],eta=0.05,dp_attack_size=fgsm_eps, dp_epsilon=1.0, dp_delta=0.05, dp_mechanism='laplace') / (dp_mult)
                        is_robust.append(robustness_from_argmax >= fgsm_eps)
                    adv_acc_dict[atk] = np.sum(is_correct)*1.0/test_size
                    robust_adv_acc_dict[atk] = np.sum([a and b for a,b in zip(is_robust, is_correct)])*1.0/np.sum(is_robust)
                    robust_adv_utility_dict[atk] = np.sum(is_robust)*1.0/test_size
                    ##############################
            for atk in attack_switch.keys():
                if attack_switch[atk]:
                    # added robust prediction
                    log_str += " {}: {:.4f} {:.4f} {:.4f} {:.4f}".format(atk, adv_acc_dict[atk], robust_adv_acc_dict[atk], robust_adv_utility_dict[atk], robust_adv_acc_dict[atk]*robust_adv_utility_dict[atk])
                    max_adv_acc_dict[atk] = max(max_adv_acc_dict[atk], adv_acc_dict[atk])
                    max_robust_adv_acc_dict[atk] = max(max_robust_adv_acc_dict[atk], robust_adv_acc_dict[atk]*robust_adv_utility_dict[atk])
            print(log_str)
            logfile.write(log_str + '\n')

            # logfile.write("step \t %d \t %g \t %g \n"%(i, benign_acc, adv_acc))
            # print("step \t %d \t %g \t %g"%(i, benign_acc, adv_acc));

            # estimate end time
            """if i > 0 and i % int(10*step_for_epoch) == 0:
                current_time_interval = time.time() - last_eval_time
                last_eval_time = time.time()
                print('during last eval interval, {} epoch takes {}'.format(10, parse_time(current_time_interval)))
                accum_time += current_time_interval
                accum_epoch += 10
                estimate_time = ((_global_step + T - i) / step_for_epoch) * (accum_time / accum_epoch)
                print('estimate finish in: {}'.format(parse_time(estimate_time)))"""

            #print("step \t %d \t adversarial test accuracy \t %g"%(i, accuracy_x.eval(feed_dict={x: adv_images, y_: mnist.test.labels, noise: Lnoise_empty})));
            """checkpoint_path = os.path.join(os.getcwd() + '/tmp/train', 'model.ckpt')
            saver.save(sess, checkpoint_path, global_step=i);"""

        d_eps = random.random();
        y_adv = batch[1]
        adv_images = sess.run(attack_tensor_dict['ifgsm'], feed_dict = {x:batch[0], y_: batch[1], adv_noise: AdvLnoise, mu_alpha:[d_eps]})
        """for iter in range(0, 9):
            adv_images = sess.run(x_adv, feed_dict = {x:adv_images, y_:batch[1], FM_h: perturbFM_h_test, dynamic_eps: d_eps})"""
        batch = mnist.train.next_batch(emsemble_L)
        adv_images_mim = sess.run(attack_tensor_dict['mim'], feed_dict = {x:batch[0], y_: batch[1], adv_noise: AdvLnoise, mu_alpha:[d_eps]})
        y_adv = np.append(y_adv, batch[1], axis = 0)
        batch = mnist.train.next_batch(emsemble_L)
        adv_images_madry = sess.run(attack_tensor_dict['madry'], feed_dict = {x:batch[0], y_: batch[1], adv_noise: AdvLnoise, mu_alpha:[d_eps]})
        y_adv = np.append(y_adv, batch[1], axis = 0)
        train_images = np.append(np.append(adv_images, adv_images_mim, axis = 0),adv_images_madry, axis = 0)
        
        batch = mnist.train.next_batch(L); #Get a random batch.
        # train with benign and adv samples
        pretrain_step.run(feed_dict={adv_x: train_images, x: batch[0], adv_noise: AdvLnoise_test, noise: BenignLNoise, FM_h: perturbFM_h});
        train_step.run(feed_dict={x: batch[0], adv_x: train_images, y_: batch[1], adv_y_: y_adv, noise: BenignLNoise, adv_noise: AdvLnoise_test, FM_h: perturbFM_h});
    duration = time.time() - start_time;
    # print(parse_time(duration)); #print running time duration#

    max_acc_string = "max acc: benign: \t{:.4f} {:.4f}".format(max_benign_acc, max_robust_benign_acc)
    for atk in attack_switch.keys():
        if attack_switch[atk]:
            max_acc_string += " {}: \t{:.4f} {:.4f}".format(atk, max_adv_acc_dict[atk], max_robust_adv_acc_dict[atk])
    logfile.write(max_acc_string + '\n')
    logfile.write(str(duration) + '\n')
Beispiel #19
0
    def get_adversarial_version(self,
                                x,
                                y=None,
                                eps=0.3,
                                iterations=100,
                                attack='FGSM',
                                targeted=False,
                                y_tar=None,
                                clip_min=0.0,
                                clip_max=1.0,
                                nb_candidate=10,
                                num_params=100):
        """
        Desc:
            Caclulate the adversarial version for point x using FGSM
            x: matrix of n x input_shape samples
            y: matrix of n x input_label samples
            eps: used for FGSM
            attack: FGMS or CW
        
        """
        if self.dataset == 'cifar10':
            model = KerasModelWrapper(self.model)
        else:
            model = KerasModelWrapper(self.model.model)
        if attack == 'CW-l2':
            K.set_learning_phase(0)
            # Instantiate a CW attack object
            cw = CarliniWagnerL2(model, sess=self.sess)

            cw_params = {
                'batch_size': 10,
                'confidence': 0,
                'learning_rate': 1e-2,
                'binary_search_steps': 5,
                'max_iterations': iterations,
                'abort_early': True,
                'initial_const': 1e-4,
                'clip_min': 0.0,
                'clip_max': 1.0
            }

            x_adv = cw.generate_np(x, **cw_params)

        elif attack == 'CW-l0':
            K.set_learning_phase(0)
            # Instantiate a CW attack object
            cw = CarliniWagnerL0(model, sess=self.sess)

            cw_params = {
                'batch_size': 1,
                'confidence': 0.,
                'learning_rate': 1e-2,
                'binary_search_steps': 5,
                'max_iterations': iterations,
                'abort_early': True,
                'initial_const': 1e-4,
                'clip_min': 0.0,
                'clip_max': 1.0
            }

            x_adv = cw.generate_np(x, **cw_params)

        elif attack == 'DF':
            K.set_learning_phase(0)
            df = DeepFool(model, sess=self.sess)
            df_params = {'nb_candidate': nb_candidate}
            x_adv = df.generate_np(x, **df_params)

        elif attack == 'JSMA':
            K.set_learning_phase(0)
            jsma = SaliencyMapMethod(model, sess=self.sess)
            jsma_params = {
                'theta': 1.,
                'gamma': 0.03,
                'clip_min': clip_min,
                'clip_max': clip_max,
                'y_target': y_tar
            }
            x_adv = jsma.generate_np(x, **jsma_params)

        elif attack == 'FGSM':
            K.set_learning_phase(0)
            fgsm = FastGradientMethod(model, sess=self.sess)
            fgsm_params = {
                'eps': 0.15,
                'clip_min': clip_min,
                'clip_max': clip_max,
                'y_target': y_tar
            }
            x_adv = fgsm.generate_np(x, **fgsm_params)

        elif attack == 'BIM':
            K.set_learning_phase(0)
            fgsm = BasicIterativeMethod(model, sess=self.sess)
            fgsm_params = {
                'eps': 0.015,
                'eps_iter': 0.005,
                'nb_iter': 100,
                'clip_min': clip_min,
                'clip_max': clip_max,
                'y_target': y_tar
            }
            x_adv = fgsm.generate_np(x, **fgsm_params)

        return x_adv
Beispiel #20
0
def train_child(t, p, m, load_dict=False):
    # model = nn.DataParallel(TestCNN().cuda(1), device_ids=[1, 2, 3])
    raw_model = TestCNN().cuda(0)
    model = TestCNN().cuda(0)
    tf_model = convert_pytorch_model_to_tf(model)
    cleverhans_model = CallableModelWrapper(tf_model, output_layer='logits')
    session = tf.Session()
    x_op = tf.placeholder(tf.float32, shape=(None, 3, 32, 32))
    fgsm = FastGradientMethod(cleverhans_model, sess=session)
    # stm = SpatialTransformationMethod(cleverhans_model, sess=session)
    # cw2 = CarliniWagnerL2(cleverhans_model, sess=session)
    pgd = ProjectedGradientDescent(cleverhans_model, sess=session)
    noise = Noise(cleverhans_model, sess=session)
    mim = MomentumIterativeMethod(cleverhans_model, sess=session)
    df = DeepFool(cleverhans_model, sess=session)
    tf_raw_model = convert_pytorch_model_to_tf(raw_model)
    cleverhans_raw_model = CallableModelWrapper(tf_raw_model,
                                                output_layer='logits')
    pgd_raw = ProjectedGradientDescent(cleverhans_raw_model, sess=session)

    def fgsm_op(x, eps):
        att = fgsm.generate(x_op, eps=eps)
        return session.run(att, feed_dict={x_op: x})

    # def stm_op(x, eps):
    #     att = stm.generate(x_op, batch_size=len(x), dx_min=-0.1*eps, dx_max=0.1*eps, dy_min=-0.1*eps, dy_max=0.1*eps, angle_min=-30*eps, angle_max=30*eps)
    #     return session.run(att, feed_dict={x_op: x})
    # def cw2_op(x, eps):
    #     att = cw2.generate(x_op, max_iterations=3)
    def pgd_op(x, eps):
        att = pgd.generate(x_op, eps=eps, eps_iter=eps * 0.2, nb_iter=3)
        return session.run(att, feed_dict={x_op: x})

    def pgd_raw_op(x, eps):
        att = pgd_raw.generate(x_op, eps=eps, eps_iter=eps * 0.2, nb_iter=3)
        return session.run(att, feed_dict={x_op: x})

    def noise_op(x, eps):
        att = noise.generate(x_op, eps=eps)
        return session.run(att, feed_dict={x_op: x})

    def df_op(x):
        att = df.generate(x_op, nb_candidate=10, max_iter=3)
        return session.run(att, feed_dict={x_op: x})

    def mim_op(x, eps):
        att = mim.generate(x_op, eps=eps, eps_iter=eps * 0.2)
        return session.run(att, feed_dict={x_op: x})

    def attack_train(x):
        attacks = [fgsm_op, noise_op, mim_op]
        attacks_name = ['FGSM', 'Noise', 'MIM']
        eps = [[0.03, 0.3], [0.03, 0.3], [0.03, 0.3]]
        train_x_adv = x.copy()
        adv_type = np.random.randint(SUBPOLICY_COUNT, size=len(train_x_adv))
        for i, (ti, pi, mi) in enumerate(
                tqdm(zip(t, p, m),
                     total=len(t),
                     desc='Subpolicy: ',
                     leave=False)):
            adv_i = train_x_adv[adv_type == i]
            for j, (tj, pj, mj) in enumerate(
                    tqdm(zip(ti, pi, mi),
                         total=len(ti),
                         desc='Operation: ',
                         leave=False)):
                tj, pj, mj = (*tj, *pj, *mj)
                adv_j = adv_i[np.random.randn(len(adv_i)) < pj]
                for i in tqdm(range(0, len(adv_j), BATCH_SIZE),
                              desc=attacks_name[tj] + ': ',
                              leave=False):
                    adv_j[i:][:BATCH_SIZE] = attacks[tj](
                        adv_j[i:][:BATCH_SIZE], (mj + 1) / MAGN_COUNT *
                        (eps[tj][1] - eps[tj][0]) + eps[tj][0])
        return train_x_adv

    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    raw_optimizer = optim.Adam(raw_model.parameters(), lr=1e-3)
    adv_idx = np.random.randn(len(train_x)) < 0.5
    train_x_adv = train_x.copy()
    train_x_adv[adv_idx] = attack_train(train_x_adv[adv_idx])
    adv_trainset = torch.utils.data.TensorDataset(
        torch.tensor(train_x_adv, dtype=torch.float),
        torch.tensor(train_y, dtype=torch.long))
    adv_trainloader = torch.utils.data.DataLoader(trainset,
                                                  batch_size=BATCH_SIZE,
                                                  shuffle=True,
                                                  num_workers=4)
    if load_dict:
        model.load_state_dict(torch.load('partial_eval_runs/model.pt'))
        optimizer.load_state_dict(torch.load('partial_eval_runs/optimizer.pt'))
        raw_model.load_state_dict(torch.load('partial_eval_runs/raw_model.pt'))
        raw_optimizer.load_state_dict(
            torch.load('partial_eval_runs/raw_optimizer.pt'))
    model.train()
    batch_tqdm = tqdm(adv_trainloader, leave=False)
    for x, y in batch_tqdm:
        optimizer.zero_grad()
        output = model(x.cuda(0))
        loss = criterion(output, y.cuda(0))
        loss.backward()
        optimizer.step()
        acc = torch.sum(output.cpu().argmax(axis=1) == y) / y.size(0)
        batch_tqdm.set_description(f'adv {loss:.3f} {acc:.3f}')
    batch_tqdm = tqdm(trainloader, leave=False)
    raw_model.train()
    for x, y in batch_tqdm:
        raw_optimizer.zero_grad()
        output = raw_model(x.cuda(0))
        loss = criterion(output, y.cuda(0))
        loss.backward()
        raw_optimizer.step()
        acc = torch.sum(output.cpu().argmax(axis=1) == y) / y.size(0)
        batch_tqdm.set_description(f'raw {loss:.3f} {acc:.3f}')
    with torch.no_grad():
        model.eval()
        batch_tqdm = tqdm(valloader, leave=False)
        tot_acc = 0
        for x, y in batch_tqdm:
            output = model(x.cuda(0))
            acc = float(torch.sum(output.cpu().argmax(axis=1) == y))
            tot_acc += acc
        adv_raw_acc = tot_acc / len(val_x)
        val_x_adv = np.zeros_like(val_x)
        for i in tqdm(range(0, len(val_x_adv), BATCH_SIZE),
                      desc='PGD: ',
                      leave=False):
            val_x_adv[i:][:BATCH_SIZE] = pgd_op(val_x[i:][:BATCH_SIZE], 0.01)
        adv_valset = torch.utils.data.TensorDataset(
            torch.tensor(val_x_adv, dtype=torch.float),
            torch.tensor(val_y, dtype=torch.long))
        adv_valloader = torch.utils.data.DataLoader(adv_valset,
                                                    batch_size=BATCH_SIZE,
                                                    shuffle=False,
                                                    num_workers=4)
        batch_tqdm = tqdm(adv_valloader, leave=False)
        tot_acc = 0
        for x, y in batch_tqdm:
            output = model(x.cuda(0))
            acc = float(torch.sum(output.cpu().argmax(axis=1) == y))
            tot_acc += acc
        adv_adv_acc = tot_acc / len(val_x)
        raw_model.eval()
        batch_tqdm = tqdm(valloader, leave=False)
        tot_acc = 0
        for x, y in batch_tqdm:
            output = raw_model(x.cuda(0))
            acc = float(torch.sum(output.cpu().argmax(axis=1) == y))
            tot_acc += acc
        raw_raw_acc = tot_acc / len(val_x)
        val_x_adv = np.zeros_like(val_x)
        for i in tqdm(range(0, len(val_x_adv), BATCH_SIZE),
                      desc='PGD: ',
                      leave=False):
            val_x_adv[i:][:BATCH_SIZE] = pgd_raw_op(val_x[i:][:BATCH_SIZE],
                                                    0.01)
        adv_valset = torch.utils.data.TensorDataset(
            torch.tensor(val_x_adv, dtype=torch.float),
            torch.tensor(val_y, dtype=torch.long))
        adv_valloader = torch.utils.data.DataLoader(adv_valset,
                                                    batch_size=BATCH_SIZE,
                                                    shuffle=False,
                                                    num_workers=4)
        batch_tqdm = tqdm(adv_valloader, leave=False)
        tot_acc = 0
        for x, y in batch_tqdm:
            output = raw_model(x.cuda(0))
            acc = float(torch.sum(output.cpu().argmax(axis=1) == y))
            tot_acc += acc
        raw_adv_acc = tot_acc / len(val_x)
    with open('partial_eval_runs/acc.csv', 'a') as f:
        f.write(f'{adv_raw_acc},{adv_adv_acc},{raw_raw_acc},{raw_adv_acc}\n')
    print(
        f'adv {adv_raw_acc:.3f} -> {adv_adv_acc:.3f} | raw {raw_raw_acc:.3f} -> {raw_adv_acc:.3f}'
    )
    torch.save(model.state_dict(), 'partial_eval_runs/model.pt')
    torch.save(optimizer.state_dict(), 'partial_eval_runs/optimizer.pt')
    torch.save(raw_model.state_dict(), 'partial_eval_runs/raw_model.pt')
    torch.save(raw_optimizer.state_dict(),
               'partial_eval_runs/raw_optimizer.pt')
Beispiel #21
0
def mnist_tutorial_deepfool(train_start=0, train_end=60000, #读60000训练
                            test_start=0,test_end=10000, #读10000测试
                            viz_enabled=True, nb_epochs=6,
                            batch_size=128, nb_classes=2, source_samples=10,
                            learning_rate=0.001, attack_iterations=100,
                            model_path=os.path.join("models", "mnist")):
    """
    MNIST tutorial for Deepfool's attack
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples激活对抗例子
    :param nb_epochs: number of epochs to train model(一个epoch指代所有的数据送入网络中完成一次前向计算及反向传播的过程。)
    :param batch_size: size of training batches
    :param nb_classes: number of output classes(输出几类)
    :param source_samples: number of test inputs to attack(测试输入用于攻击的数量)
    :param learning_rate: learning rate for training(学习率)
    :param model_path: path to the model file(文件路径)
    :param attack_iterations: 攻击迭代次数
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies精确度报告
    report = AccuracyReport()

    # MNIST-specific dimensions图像尺寸28*28*1
    img_rows = 28
    img_cols = 28
    channels = 1

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph
    model = make_basic_picklable_cnn()
    preds = model(x)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow(构建训练模型)
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': os.path.join(*os.path.split(model_path)[:-1]),
        'filename': os.path.split(model_path)[-1]
    }

    rng = np.random.RandomState([2018, 8, 9])
    # check if we've trained before, and if we have, use that pre-trained model
    if os.path.exists(model_path+".meta"):
        tf_model_load(sess, model_path)
    else:
        model_train(sess, x, y, preds, X_train, Y_train, args=train_params,
                    save=os.path.exists("models"), rng=rng)
        print("save success")

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params)
    assert X_test.shape[0] == test_end - test_start, X_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using Carlini and Wagner's approach
    ###########################################################################
    print('Crafting ' + str(source_samples) + ' * ' + str(nb_classes-1) +
          ' adversarial examples')
    print("This could take some time ...")

    # Instantiate a DeepFool attack object
    deepfool = DeepFool(model, back='tf', sess=sess)


    idxs = [np.where(np.argmax(Y_test, axis=1) == i)[0][1] for i in range(10)]
    print("idxs:",idxs)

    # construct adv_inputs
    grid_shape = (nb_classes, 2, img_rows, img_cols, channels)
    grid_viz_data = np.zeros(grid_shape, dtype='f')
    print("grid_viz_data",grid_viz_data.shape)
    adv_inputs = X_test[idxs].reshape([-1,28,28,1])

    deepfool_params = {'nb_candidate': 10,
                       'overshoot': 0.02,
                       'max_iter': attack_iterations,
                       'nb_classes': 10,
                       'clip_min': 0.,
                       'clip_max': 1.}

    adv = deepfool.generate_np(adv_inputs, **deepfool_params)

    print("adv success")

    adv_accuracy = 1-model_eval(sess, x, y, preds, adv, Y_test[idxs],
                                args={'batch_size': 10})

    for j in range(10):
        grid_viz_data[j, 0] = adv_inputs[j]
        grid_viz_data[j, 1] = adv[j]

    print(grid_viz_data.shape)

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy))
    report.clean_train_adv_eval = 1.-adv_accuracy

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(np.sum((adv - adv_inputs)**2,
                                       axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        import matplotlib.pyplot as plt
        _ = grid_visual(grid_viz_data)

    return report
Beispiel #22
0
class TestDeepFool(CleverHansTest):
    def setUp(self):
        super(TestDeepFool, self).setUp()
        import tensorflow as tf

        # The world's simplest neural network
        def my_model(x):
            W1 = tf.constant([[1.5, .3], [-2, 0.3]], dtype=tf.float32)
            h1 = tf.nn.sigmoid(tf.matmul(x, W1))
            W2 = tf.constant([[-2.4, 1.2], [0.5, -2.3]], dtype=tf.float32)
            res = tf.matmul(h1, W2)
            return res

        self.sess = tf.Session()
        self.model = my_model
        self.attack = DeepFool(self.model, sess=self.sess)

    def test_generate_np_gives_adversarial_example(self):
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val,
                                        over_shoot=0.02,
                                        max_iter=50,
                                        nb_candidate=2,
                                        clip_min=-5,
                                        clip_max=5)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)

        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

    def test_generate_gives_adversarial_example(self):
        import tensorflow as tf

        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        x = tf.placeholder(tf.float32, x_val.shape)

        x_adv_p = self.attack.generate(x,
                                       over_shoot=0.02,
                                       max_iter=50,
                                       nb_candidate=2,
                                       clip_min=-5,
                                       clip_max=5)
        x_adv = self.sess.run(x_adv_p, {x: x_val})

        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)

        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

    def test_generate_np_gives_clipped_adversarial_examples(self):
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val,
                                        over_shoot=0.02,
                                        max_iter=50,
                                        nb_candidate=2,
                                        clip_min=-0.2,
                                        clip_max=0.3)

        self.assertTrue(-0.201 < np.min(x_adv))
        self.assertTrue(np.max(x_adv) < .301)
Beispiel #23
0
def JSMA(train_start=0,
         train_end=60000,
         test_start=0,
         test_end=10000,
         nb_epochs=6,
         batch_size=128,
         learning_rate=0.001,
         clean_train=True,
         testing=False,
         backprop_through_attack=False,
         nb_filters=64):
    """
    MNIST cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param testing: if true, complete an AccuracyReport for unit tests
                    to verify that performance is adequate
    :param backprop_through_attack: If True, backprop through adversarial
                                    example construction process during
                                    adversarial training.
    :param clean_train: if true, train on clean examples
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    sess = tf.Session()

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)
    source_samples = batch_size
    # Use label smoothing
    # Hopefully this doesn't screw up JSMA...
    assert Y_train.shape[1] == 10
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    model_path = "models/mnist"
    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_par = {'batch_size': batch_size}
    rng = np.random.RandomState([2017, 8, 30])

    if clean_train:
        model = make_basic_cnn(nb_filters=nb_filters)
        preds = model.get_probs(x)

        def evaluate():
            # Evaluate the accuracy of the MNIST model on legitimate test
            # examples
            eval_params = {'batch_size': batch_size}
            acc = model_eval(sess,
                             x,
                             y,
                             preds,
                             X_test,
                             Y_test,
                             args=eval_params)
            report.clean_train_clean_eval = acc
            assert X_test.shape[0] == test_end - test_start, X_test.shape
            print('Test accuracy on legitimate examples: %0.4f' % acc)

        model_train(sess,
                    x,
                    y,
                    preds,
                    X_train,
                    Y_train,
                    evaluate=evaluate,
                    args=train_params,
                    rng=rng)
        print("#####Starting attacks on clean model#####")
        #################################################################
        #Clean test against JSMA
        jsma_params = {
            'theta': 1.,
            'gamma': 0.1,
            'clip_min': 0.,
            'clip_max': 1.,
            'y_target': None
        }

        jsma = SaliencyMapMethod(model, back='tf', sess=sess)
        adv_x = jsma.generate(x, **jsma_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on JSMA adversarial examples: %0.4f' % acc)
        ################################################################
        #Clean test against FGSM
        fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}

        fgsm = FastGradientMethod(model, sess=sess)
        adv_x = fgsm.generate(x, **fgsm_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on FGSM adversarial examples: %0.4f' % acc)
        ################################################################
        #Clean test against BIM
        bim_params = {
            'eps': 0.3,
            'eps_iter': 0.01,
            'nb_iter': 100,
            'clip_min': 0.,
            'clip_max': 1.
        }
        bim = BasicIterativeMethod(model, sess=sess)
        adv_x = bim.generate(x, **bim_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on BIM adversarial examples: %0.4f' % acc)
        ################################################################
        #Clean test against EN
        en_params = {
            'binary_search_steps': 1,
            #'y': None,
            'max_iterations': 100,
            'learning_rate': 0.1,
            'batch_size': source_samples,
            'initial_const': 10
        }
        en = ElasticNetMethod(model, back='tf', sess=sess)
        adv_x = en.generate(x, **en_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on EN adversarial examples: %0.4f' % acc)
        ################################################################
        #Clean test against DF
        deepfool_params = {
            'nb_candidate': 10,
            'overshoot': 0.02,
            'max_iter': 50,
            'clip_min': 0.,
            'clip_max': 1.
        }
        deepfool = DeepFool(model, sess=sess)
        adv_x = deepfool.generate(x, **deepfool_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on DF adversarial examples: %0.4f' % acc)
        ################################################################
        #Clean test against VAT
        vat_params = {
            'eps': 2.0,
            'num_iterations': 1,
            'xi': 1e-6,
            'clip_min': 0.,
            'clip_max': 1.
        }
        vat = VirtualAdversarialMethod(model, sess=sess)
        adv_x = vat.generate(x, **vat_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on VAT adversarial examples: %0.4f\n' % acc)
        ################################################################
        print("Repeating the process, using adversarial training\n")
    # Redefine TF model graph
    model_2 = make_basic_cnn(nb_filters=nb_filters)
    preds_2 = model_2(x)
    #################################################################
    #Adversarial test against JSMA
    jsma_params = {
        'theta': 1.,
        'gamma': 0.1,
        'clip_min': 0.,
        'clip_max': 1.,
        'y_target': None
    }

    jsma = SaliencyMapMethod(model, back='tf', sess=sess)
    adv_x = jsma.generate(x, **jsma_params)
    preds_adv_jsma = model.get_probs(adv_x)
    ################################################################
    #Adversarial test against FGSM
    fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}

    fgsm = FastGradientMethod(model, sess=sess)
    adv_x = fgsm.generate(x, **fgsm_params)
    preds_adv_fgsm = model.get_probs(adv_x)
    ################################################################
    #Adversarial test against BIM
    bim_params = {
        'eps': 0.3,
        'eps_iter': 0.01,
        'nb_iter': 100,
        'clip_min': 0.,
        'clip_max': 1.
    }
    bim = BasicIterativeMethod(model, sess=sess)
    adv_x = bim.generate(x, **bim_params)
    preds_adv_bim = model.get_probs(adv_x)
    ################################################################
    #Adversarial test against EN
    en_params = {
        'binary_search_steps': 5,
        #'y': None,
        'max_iterations': 100,
        'learning_rate': 0.1,
        'batch_size': source_samples,
        'initial_const': 10
    }
    en = ElasticNetMethod(model, back='tf', sess=sess)
    adv_x = en.generate(x, **en_params)
    preds_adv_en = model.get_probs(adv_x)
    ################################################################
    #Adversarial test against DF
    deepfool_params = {
        'nb_candidate': 10,
        'overshoot': 0.02,
        'max_iter': 200,
        'clip_min': 0.,
        'clip_max': 1.
    }
    deepfool = DeepFool(model, sess=sess)
    adv_x = deepfool.generate(x, **deepfool_params)
    preds_adv_df = model.get_probs(adv_x)
    ################################################################
    #Adversarial test against VAT
    vat_params = {
        'eps': 2.0,
        'num_iterations': 1,
        'xi': 1e-6,
        'clip_min': 0.,
        'clip_max': 1.
    }
    vat = VirtualAdversarialMethod(model, sess=sess)
    adv_x = vat.generate(x, **vat_params)
    preds_adv_vat = model.get_probs(adv_x)
    ################################################################
    print("#####Evaluate trained model#####")

    def evaluate_2():
        # Evaluate the accuracy of the MNIST model on JSMA adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_jsma,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on JSMA adversarial examples: %0.4f' % acc)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_fgsm,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on FGSM adversarial examples: %0.4f' % acc)

        # Evaluate the accuracy of the MNIST model on BIM adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_bim,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on BIM adversarial examples: %0.4f' % acc)

        # Evaluate the accuracy of the MNIST model on EN adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_en,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on EN adversarial examples: %0.4f' % acc)

        # Evaluate the accuracy of the MNIST model on DF adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_df,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on DF adversarial examples: %0.4f' % acc)

        # Evaluate the accuracy of the MNIST model on VAT adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_vat,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on VAT adversarial examples: %0.4f\n' % acc)

    preds_2_adv = [
        preds_adv_jsma
        # ,preds_adv_fgsm
        # ,preds_adv_bim
        # ,preds_adv_en
        # ,preds_adv_df
    ]

    model_train(sess,
                x,
                y,
                preds_2,
                X_train,
                Y_train,
                predictions_adv=preds_2_adv,
                evaluate=evaluate_2,
                args=train_params,
                rng=rng)
Beispiel #24
0
def eval(sess,
         model_name,
         X_train,
         Y_train,
         X_test,
         Y_test,
         cnn=False,
         rbf=False,
         fgsm=False,
         jsma=False,
         df=False,
         bim=False):
    """ Load model saved in model_name.json and model_name_weights.h5 and 
    evaluate its accuracy on legitimate test samples and adversarial samples.
    Use cnn=True if the model is CNN based.
    """

    # open text file and output accuracy results to it
    text_file = open("fmnist_results.txt", "w")

    # load saved model
    print("Load model ... ")
    '''
    json = open('models/{}.json'.format(model_name), 'r')
    model = json.read()
    json.close()
    loaded_model = model_from_json(model)
    loaded_model.load_weights("models/{}_weights.h5".format(model_name))
    '''
    if rbf:
        loaded_model = load_model("rbfmodels/{}.h5".format(model_name),
                                  custom_objects={'RBFLayer': RBFLayer})
        text_file.write('Evaluating on rbfmodels/{}.h5\n\n'.format(model_name))
    else:
        loaded_model = load_model("models/{}.h5".format(model_name))
        text_file.write('Evaluating on models/{}.h5\n\n'.format(model_name))

    # Set placeholders
    if cnn:
        x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    else:
        x = tf.placeholder(tf.float32, shape=(None, 784))

    y = tf.placeholder(tf.float32, shape=(None, 10))

    predictions = loaded_model(x)

    accuracy = model_eval(sess,
                          x,
                          y,
                          predictions,
                          X_test,
                          Y_test,
                          args={"batch_size": 128})
    text_file.write('Test accuracy on legitimate test examples: {0}\n'.format(
        str(accuracy)))
    #print('Test accuracy on legitimate test examples: ' + str(accuracy))

    # Craft adversarial examples depending on the input parameters
    wrap = KerasModelWrapper(loaded_model)

    # FGSM
    if fgsm:
        fgsm = FastGradientMethod(wrap, sess=sess)
        fgsm_params = {'eps': 0.3}
        adv_x = fgsm.generate(x, **fgsm_params)
        adv_x = tf.stop_gradient(adv_x)
        preds_adv = loaded_model(adv_x)

        # Evaluate the accuracy of the F-MNIST model on adversarial examples
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_adv,
                              X_test,
                              Y_test,
                              args={"batch_size": 128})
        text_file.write(
            'Test accuracy on fgsm adversarial test examples: {0}\n'.format(
                str(accuracy)))
        #print('Test accuracy on fgsm adversarial test examples: ' + str(accuracy))

    # JSMA
    if jsma:
        jsma = SaliencyMapMethod(wrap, sess=sess)
        jsma_params = {
            'theta': 2.,
            'gamma': 0.145,
            'clip_min': 0.,
            'clip_max': 1.,
            'y_target': None
        }
        adv_x = jsma.generate(x, **jsma_params)
        adv_x = tf.stop_gradient(adv_x)
        preds_adv = loaded_model(adv_x)

        # Evaluate the accuracy of the F-MNIST model on adversarial examples
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_adv,
                              X_test,
                              Y_test,
                              args={"batch_size": 128})
        text_file.write(
            'Test accuracy on jsma adversarial test examples: {0}\n'.format(
                str(accuracy)))
        #print('Test accuracy on jsma adversarial test examples: ' + str(accuracy))

    # DeepFool
    if df:
        df = DeepFool(wrap, sess=sess)
        df_params = {'nb_candidate': 10, 'max_iter': 50}
        adv_x = df.generate(x, **df_params)
        adv_x = tf.stop_gradient(adv_x)
        preds_adv = loaded_model(adv_x)

        # Evaluate the accuracy of the F-MNIST model on adversarial examples
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_adv,
                              X_test,
                              Y_test,
                              args={"batch_size": 128})
        text_file.write(
            'Test accuracy on df adversarial test examples: {0}\n'.format(
                str(accuracy)))
        #print('Test accuracy on df adversarial test examples: ' + str(accuracy))

    # Basic Iterative Method
    if bim:
        bim = ProjectedGradientDescent(wrap, sess=sess)
        bim_params = {'eps': 0.3}
        adv_x = bim.generate(x, **bim_params)
        adv_x = tf.stop_gradient(adv_x)
        preds_adv = loaded_model(adv_x)

        # Evaluate the accuracy of the F-MNIST model on adversarial examples
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_adv,
                              X_test,
                              Y_test,
                              args={"batch_size": 128})
        text_file.write(
            'Test accuracy on bim adversarial test examples: {0}\n'.format(
                str(accuracy)))
        #print('Test accuracy on bim adversarial test examples: ' + str(accuracy))

    print('Accuracy results outputted to fmnist_results.txt')
    text_file.close()

    # Close TF session
    sess.close()