コード例 #1
0
def deep_fool_attack():
    counter = 0
    image_iterator = importer.load_images_generator(importer.batch_shape)
    tf.reset_default_graph()
    sess = tf.Session()
    x_input = tf.placeholder(tf.float32, shape=importer.batch_shape)
    folder_path = os.path.join(config.ADVERSARIAL_FOLDER, "deep_full_base")
    os.makedirs(folder_path, exist_ok=True)
    while True:
        with tf.Session() as sess:
            filenames, images = next(image_iterator, (None, None))
            model = Inception_V3_Model(np.float32(images))
            params = {}
            attack = DeepFool(model=model, sess=sess)
            params['max_iter'] = 5
            variables = tf.get_collection(tf.GraphKeys.VARIABLES)
            saver = tf.train.Saver(variables)
            saver.restore(sess, importer.checkpoint_path)
            x_adv = attack.generate(x_input, **params)
            #writer = tf.summary.FileWriter("/tmp/log/", sess.graph)
            adversarial_images = sess.run(x_adv, feed_dict={x_input: images})
            utils.image_saver(advesrsarial_images, filenames, folder_path)
            print("adversarial_images counter:{}".format(counter))
            #writer.close()
            counter += 1
            if counter == 999:
                print("Attack is finished")
                break
コード例 #2
0
class TestDeepFool(CleverHansTest):
    def setUp(self):
        super(TestDeepFool, self).setUp()

        self.sess = tf.Session()
        self.model = SimpleModel()
        self.attack = DeepFool(self.model, sess=self.sess)

    def test_generate_np_gives_adversarial_example(self):
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val,
                                        over_shoot=0.02,
                                        max_iter=50,
                                        nb_candidate=2,
                                        clip_min=-5,
                                        clip_max=5)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)

        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

    def test_generate_gives_adversarial_example(self):

        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        x = tf.placeholder(tf.float32, x_val.shape)

        x_adv_p = self.attack.generate(x,
                                       over_shoot=0.02,
                                       max_iter=50,
                                       nb_candidate=2,
                                       clip_min=-5,
                                       clip_max=5)
        self.assertEqual(x_val.shape, x_adv_p.shape)
        x_adv = self.sess.run(x_adv_p, {x: x_val})

        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)

        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

    def test_generate_np_gives_clipped_adversarial_examples(self):
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val,
                                        over_shoot=0.02,
                                        max_iter=50,
                                        nb_candidate=2,
                                        clip_min=-0.2,
                                        clip_max=0.3)

        self.assertTrue(-0.201 < np.min(x_adv))
        self.assertTrue(np.max(x_adv) < .301)
コード例 #3
0
class TestDeepFool(CleverHansTest):
    def setUp(self):
        super(TestDeepFool, self).setUp()
        import tensorflow as tf

        # The world's simplest neural network
        def my_model(x):
            W1 = tf.constant([[1.5, .3], [-2, 0.3]], dtype=tf.float32)
            h1 = tf.nn.sigmoid(tf.matmul(x, W1))
            W2 = tf.constant([[-2.4, 1.2], [0.5, -2.3]], dtype=tf.float32)
            res = tf.matmul(h1, W2)
            return res

        self.sess = tf.Session()
        self.model = my_model
        self.attack = DeepFool(self.model, sess=self.sess)

    def test_generate_np_gives_adversarial_example(self):
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val, over_shoot=0.02, max_iter=50,
                                        nb_candidate=2, clip_min=-5,
                                        clip_max=5)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)

        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

    def test_generate_gives_adversarial_example(self):
        import tensorflow as tf

        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        x = tf.placeholder(tf.float32, x_val.shape)

        x_adv_p = self.attack.generate(x, over_shoot=0.02, max_iter=50,
                                       nb_candidate=2, clip_min=-5, clip_max=5)
        x_adv = self.sess.run(x_adv_p, {x: x_val})

        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)

        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

    def test_generate_np_gives_clipped_adversarial_examples(self):
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val, over_shoot=0.02, max_iter=50,
                                        nb_candidate=2, clip_min=-0.2,
                                        clip_max=0.3)

        self.assertTrue(-0.201 < np.min(x_adv))
        self.assertTrue(np.max(x_adv) < .301)
コード例 #4
0
 def _DeepFool(self):
     deepfool_attack = DeepFool(self.wrapped_model, sess=self.sess)
     params = {
         'nb_candidate': 10,
         'max_iter': 100,
         'clip_min': 0.,
         'clip_max': 1.,
         'verbose': False
     }
     adv_x = deepfool_attack.generate(self.x, **params)
     self.save_images(adv_x, self.save_loc)
コード例 #5
0
class DeepFoolAttack(AdversarialAttack):
    def __init__(self, model, n_candidates=10, overshoot=0.02, max_iterations=50, clip_min=-1., clip_max=1.):
        super().__init__(model=model, clip_min=clip_min, clip_max=clip_max)
        self._n_candidate = n_candidates
        self._overshoot = overshoot
        self._max_iterations = max_iterations

        with self.graph.as_default():
            self._method = DeepFool(self._model, sess=self.session, nb_candidate=self._n_candidate,
                                    overshoot=self._overshoot, max_iter=self._max_iterations,
                                    nb_classes=self.n_classes, clip_min=self._clip_min, clip_max=self._clip_max)

    def attack_method(self, labels):
        return self._method.generate(x=self._x_clean)
コード例 #6
0
 def df(X, which, prob, magn):
     wrapped = DeepFool(KerasModelWrapper(which.model), sess=session)
     X = X.copy()
     idx = np.random.uniform(size=len(X))
     idx = np.where(idx < prob)[0]
     for i in tqdm(range(0, len(idx), CHILD_BATCH_SIZE),
                   desc=f'batch: ',
                   leave=False):
         tensor = tf.convert_to_tensor(X[idx[i:i + CHILD_BATCH_SIZE]])
         init = tf.global_variables_initializer()
         session.run(init)
         tensor = wrapped.generate(tensor,
                                   clip_min=0.,
                                   clip_max=magn * 0.3 + 0.3)
         X[idx[i:i + CHILD_BATCH_SIZE]] = session.run(tensor)
     return X
コード例 #7
0
class TestDeepFool(CleverHansTest):
    def setUp(self):
        super(TestDeepFool, self).setUp()

        self.sess = tf.Session()
        self.model = SimpleModel()
        self.attack = DeepFool(self.model, sess=self.sess)

    def test_generate_np_gives_adversarial_example(self):
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val, over_shoot=0.02, max_iter=50,
                                        nb_candidate=2, clip_min=-5,
                                        clip_max=5)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)

        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

    def test_generate_gives_adversarial_example(self):

        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        x = tf.placeholder(tf.float32, x_val.shape)

        x_adv_p = self.attack.generate(x, over_shoot=0.02, max_iter=50,
                                       nb_candidate=2, clip_min=-5, clip_max=5)
        x_adv = self.sess.run(x_adv_p, {x: x_val})

        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)

        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

    def test_generate_np_gives_clipped_adversarial_examples(self):
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val, over_shoot=0.02, max_iter=50,
                                        nb_candidate=2, clip_min=-0.2,
                                        clip_max=0.3)

        self.assertTrue(-0.201 < np.min(x_adv))
        self.assertTrue(np.max(x_adv) < .301)
コード例 #8
0
    def next_images():
        tf.logging.set_verbosity(tf.logging.INFO)
        print("{} generator graph is ready!".format(mode))
        tf.reset_default_graph()
        sess = tf.Session()
        x_input = tf.placeholder(tf.float32, shape=importer.batch_shape)
        params = {}
        model = InceptionModelLogits(importer.num_classes, x_input)
        if mode == 'deep_fool':
            graph = DeepFool(model, sess=sess)
            params['max_iter'] = 5
        else:
            raise Exception("Not supported mode")

        print('graph params: {}'.format(params))
        variables = tf.get_collection(tf.GraphKeys.VARIABLES)
        saver = tf.train.Saver(variables)
        saver.restore(sess, importer.checkpoint_path)
        image_iterator = importer.load_images_generator(batch_shape)
        while True:
            filenames, images = next(image_iterator, (None, None))
            if filenames is None: break
            true_classes = importer.filename_to_class(filenames)
            target = np.expand_dims(np.zeros(importer.num_classes), 1)
            if mode == 'carlini_wagner':
                assert (len(true_classes) == 1)
                target[true_classes[0]] = 1
                params["y"] = target
            x_adv = graph.generate(x_input, **params)
            adversarial_images = sess.run(x_adv, feed_dict={x_input: images})
            print("Image:{}, diff:{}".format(
                filenames[0],
                np.sum(np.abs(images[0] - adversarial_images[0]))))
            if is_return_orig_images:
                yield filenames, adversarial_images, images
            else:
                yield filenames, adversarial_images
def mnist_tutorial_jsma(train_start=0, train_end=5500, test_start=0,
                        test_end=1000, nb_epochs=8,
                        batch_size=100, nb_classes=10,
                        nb_filters=64,
                        learning_rate=0.001):
    """
    MNIST tutorial for the Jacobian-based saliency map approach (JSMA)
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param learning_rate: learning rate for training
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph
    model = make_basic_cnn()
    preds = model(x)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    # sess.run(tf.global_variables_initializer())
    rng = np.random.RandomState([2017, 8, 30])

    print("x_train shape: ", X_train.shape)
    print("y_train shape: ", Y_train.shape)

    # do not log
    model_train(sess, x, y, preds, X_train, Y_train, args=train_params,verbose=False,
                rng=rng)

    f_out_clean = open("Clean_jsma_elastic_against5.log", "w")

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params)
    assert X_test.shape[0] == test_end - test_start, X_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    f_out_clean.write('Test accuracy on legitimate test examples: ' + str(accuracy) + '\n')


    # Clean test against JSMA
    jsma_params = {'theta': 1., 'gamma': 0.1,
                   'clip_min': 0., 'clip_max': 1.,
                   'y_target': None}

    jsma = SaliencyMapMethod(model, back='tf', sess=sess)
    adv_x_jsma = jsma.generate(x, **jsma_params)
    preds_adv_jsma = model.get_probs(adv_x_jsma)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_jsma, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on JSMA adversarial examples: %0.4f' % acc)
    f_out_clean.write('Clean test accuracy on JSMA adversarial examples: ' + str(acc) + '\n')

    ################################################################
    # Clean test against FGSM
    fgsm_params = {'eps': 0.3,
                   'clip_min': 0.,
                   'clip_max': 1.}

    fgsm = FastGradientMethod(model, sess=sess)
    adv_x_fgsm = fgsm.generate(x, **fgsm_params)
    preds_adv_fgsm = model.get_probs(adv_x_fgsm)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_fgsm, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on FGSM adversarial examples: %0.4f' % acc)
    f_out_clean.write('Clean test accuracy on FGSM adversarial examples: ' + str(acc) + '\n')


    ################################################################
    # Clean test against BIM
    bim_params = {'eps': 0.3,
                  'eps_iter': 0.01,
                  'nb_iter': 100,
                  'clip_min': 0.,
                  'clip_max': 1.}
    bim = BasicIterativeMethod(model, sess=sess)
    adv_x_bim = bim.generate(x, **bim_params)
    preds_adv_bim = model.get_probs(adv_x_bim)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_bim, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on BIM adversarial examples: %0.4f' % acc)
    f_out_clean.write('Clean test accuracy on BIM adversarial examples: ' + str(acc) + '\n')

    ################################################################
    # Clean test against EN
    en_params = {'binary_search_steps': 1,
                 # 'y': None,
                 'max_iterations': 100,
                 'learning_rate': 0.1,
                 'batch_size': batch_size,
                 'initial_const': 10}
    en = ElasticNetMethod(model, back='tf', sess=sess)
    adv_x_en = en.generate(x, **en_params)
    preds_adv_en = model.get_probs(adv_x_en)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_en, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on EN adversarial examples: %0.4f' % acc)
    f_out_clean.write('Clean test accuracy on EN adversarial examples: ' + str(acc) + '\n')
    ################################################################
    # Clean test against DF
    deepfool_params = {'nb_candidate': 10,
                       'overshoot': 0.02,
                       'max_iter': 50,
                       'clip_min': 0.,
                       'clip_max': 1.}
    deepfool = DeepFool(model, sess=sess)
    adv_x_df = deepfool.generate(x, **deepfool_params)
    preds_adv_df = model.get_probs(adv_x_df)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_df, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on DF adversarial examples: %0.4f' % acc)
    f_out_clean.write('Clean test accuracy on DF adversarial examples: ' + str(acc) + '\n')

    ################################################################
    # Clean test against VAT
    vat_params = {'eps': 2.0,
                  'num_iterations': 1,
                  'xi': 1e-6,
                  'clip_min': 0.,
                  'clip_max': 1.}
    vat = VirtualAdversarialMethod(model, sess=sess)
    adv_x_vat = vat.generate(x, **vat_params)
    preds_adv_vat = model.get_probs(adv_x_vat)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_vat, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on VAT adversarial examples: %0.4f\n' % acc)
    f_out_clean.write('Clean test accuracy on VAT adversarial examples: ' + str(acc) + '\n')

    f_out_clean.close()

    ###########################################################################
    # Craft adversarial examples using the Jacobian-based saliency map approach
    ###########################################################################
    print('Crafting ' + str(X_train.shape[0]) + ' * ' + str(nb_classes-1) +
          ' adversarial examples')


    model_2 = make_basic_cnn()
    preds_2 = model(x)

    # need this for constructing the array
    sess.run(tf.global_variables_initializer())

    # run this again
    # sess.run(tf.global_variables_initializer())

    # 1. Instantiate a SaliencyMapMethod attack object
    jsma = SaliencyMapMethod(model_2, back='tf', sess=sess)
    jsma_params = {'theta': 1., 'gamma': 0.1,
                   'clip_min': 0., 'clip_max': 1.,
                   'y_target': None}
    adv_random = jsma.generate(x, **jsma_params)
    preds_adv_random = model_2.get_probs(adv_random)

    # 2. Instantiate FGSM attack
    fgsm_params = {'eps': 0.3,
                   'clip_min': 0.,
                   'clip_max': 1.}
    fgsm = FastGradientMethod(model_2, sess=sess)
    adv_x_fgsm = fgsm.generate(x, **fgsm_params)
    preds_adv_fgsm = model_2.get_probs(adv_x_fgsm)


    # 3. Instantiate Elastic net attack
    en_params = {'binary_search_steps': 5,
         #'y': None,
         'max_iterations': 100,
         'learning_rate': 0.1,
         'batch_size': batch_size,
         'initial_const': 10}
    enet = ElasticNetMethod(model_2, sess=sess)
    adv_x_en = enet.generate(x, **en_params)
    preds_adv_elastic_net = model_2.get_probs(adv_x_en)

    # 4. Deepfool
    deepfool_params = {'nb_candidate':10,
                       'overshoot':0.02,
                       'max_iter': 50,
                       'clip_min': 0.,
                       'clip_max': 1.}
    deepfool = DeepFool(model_2, sess=sess)
    adv_x_df = deepfool.generate(x, **deepfool_params)
    preds_adv_deepfool = model_2.get_probs(adv_x_df)

    # 5. Base Iterative
    bim_params = {'eps': 0.3,
                  'eps_iter': 0.01,
                  'nb_iter': 100,
                  'clip_min': 0.,
                  'clip_max': 1.}
    base_iter = BasicIterativeMethod(model_2, sess=sess)
    adv_x_bi = base_iter.generate(x, **bim_params)
    preds_adv_base_iter = model_2.get_probs(adv_x_bi)

    # 6. C & W Attack
    cw = CarliniWagnerL2(model_2, back='tf', sess=sess)
    cw_params = {'binary_search_steps': 1,
                 # 'y': None,
                 'max_iterations': 100,
                 'learning_rate': 0.1,
                 'batch_size': batch_size,
                 'initial_const': 10}
    adv_x_cw = cw.generate(x, **cw_params)
    preds_adv_cw = model_2.get_probs(adv_x_cw)

    #7
    vat_params = {'eps': 2.0,
                  'num_iterations': 1,
                  'xi': 1e-6,
                  'clip_min': 0.,
                  'clip_max': 1.}
    vat = VirtualAdversarialMethod(model_2, sess=sess)
    adv_x = vat.generate(x, **vat_params)
    preds_adv_vat = model_2.get_probs(adv_x)


    # ==> generate 10 targeted classes for every train data regardless
    # This call runs the Jacobian-based saliency map approach
    # Loop over the samples we want to perturb into adversarial examples

    X_train_adv_set = []
    Y_train_adv_set = []
    for index in range(X_train.shape[0]):
        print('--------------------------------------')
        x_val = X_train[index:(index+1)]
        y_val = Y_train[index]


        # add normal sample in!!!!
        X_train_adv_set.append(x_val)
        Y_train_adv_set.append(y_val)

        # We want to find an adversarial example for each possible target class
        # (i.e. all classes that differ from the label given in the dataset)
        current_class = int(np.argmax(y_val))
        target_classes = other_classes(nb_classes, current_class)
        # Loop over all target classes
        for target in target_classes:
            # print('Generating adv. example for target class %i' % target)
            # This call runs the Jacobian-based saliency map approach

            one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
            one_hot_target[0, target] = 1
            jsma_params['y_target'] = one_hot_target
            adv_x = jsma.generate_np(x_val, **jsma_params)

            # append to X_train_adv_set and Y_train_adv_set
            X_train_adv_set.append(adv_x)
            Y_train_adv_set.append(y_val)

            # shape is: (1, 28, 28, 1)
            # print("adv_x shape is: ", adv_x.shape)

            # check for success rate
            # res = int(model_argmax(sess, x, preds, adv_x) == target)

    print('-------------Finished Generating Np Adversarial Data-------------------------')

    X_train_data = np.concatenate(X_train_adv_set, axis=0)
    Y_train_data = np.stack(Y_train_adv_set, axis=0)
    print("X_train_data shape is: ", X_train_data.shape)
    print("Y_train_data shape is: ", Y_train_data.shape)

    # saves the output so later no need to re-fun file
    np.savez("jsma_training_data.npz", x_train=X_train_data
             , y_train=Y_train_data)

    # >>> data = np.load('/tmp/123.npz')
    # >>> data['a']

    f_out = open("Adversarial_jsma_elastic_against5.log", "w")

    # evaluate the function against 5 attacks
    # fgsm, base iterative, jsma, elastic net, and deepfool
    def evaluate_against_all():
            # 1 Clean Data
            eval_params = {'batch_size': batch_size}
            accuracy = model_eval(sess, x, y, preds, X_test, Y_test,
                                  args=eval_params)
            print('Legitimate accuracy: %0.4f' % accuracy)

            tmp = 'Legitimate accuracy: '+ str(accuracy) + "\n"
            f_out.write(tmp)


            # 2 JSMA
            accuracy = model_eval(sess, x, y, preds_adv_random, X_test,
                                  Y_test, args=eval_params)

            print('JSMA accuracy: %0.4f' % accuracy)
            tmp = 'JSMA accuracy:'+ str(accuracy) + "\n"
            f_out.write(tmp)


            # 3 FGSM
            accuracy = model_eval(sess, x, y, preds_adv_fgsm, X_test,
                                  Y_test, args=eval_params)

            print('FGSM accuracy: %0.4f' % accuracy)
            tmp = 'FGSM accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)

            # 4 Base Iterative
            accuracy = model_eval(sess, x, y, preds_adv_base_iter, X_test,
                                  Y_test, args=eval_params)

            print('Base Iterative accuracy: %0.4f' % accuracy)
            tmp = 'Base Iterative accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)

            # 5 Elastic Net
            accuracy = model_eval(sess, x, y, preds_adv_elastic_net, X_test,
                                  Y_test, args=eval_params)

            print('Elastic Net accuracy: %0.4f' % accuracy)
            tmp = 'Elastic Net accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)

            # 6 DeepFool
            accuracy = model_eval(sess, x, y, preds_adv_deepfool, X_test,
                                  Y_test, args=eval_params)
            print('DeepFool accuracy: %0.4f' % accuracy)
            tmp = 'DeepFool accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)

            # 7 C & W Attack
            accuracy = model_eval(sess, x, y, preds_adv_cw, X_test,
                                  Y_test, args=eval_params)
            print('C & W accuracy: %0.4f' % accuracy)
            tmp = 'C & W  accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)
            f_out.write("*******End of Epoch***********\n\n")

            # 8 Virtual Adversarial
            accuracy = model_eval(sess, x, y, preds_adv_vat, X_test,
                                  Y_test, args=eval_params)
            print('VAT accuracy: %0.4f' % accuracy)
            tmp = 'VAT accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)
            f_out.write("*******End of Epoch***********\n\n")

            print("*******End of Epoch***********\n\n")

        # report.adv_train_adv_eval = accuracy

    print("Now Adversarial Training with Elastic Net  + modified X_train and Y_train")
    # trained_model.out
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': '/home/stephen/PycharmProjects/jsma-runall-mac/',
        'filename': 'trained_model.out'
    }
    model_train(sess, x, y, preds_2, X_train_data, Y_train_data,
                 predictions_adv=preds_adv_elastic_net,
                evaluate=evaluate_against_all, verbose=False,
                args=train_params, rng=rng)


    # Close TF session
    sess.close()
    return report
コード例 #10
0
def JSMA_FGSM_BIM(train_start=0,
                  train_end=60000,
                  test_start=0,
                  test_end=10000,
                  nb_epochs=6,
                  batch_size=128,
                  learning_rate=0.001,
                  clean_train=True,
                  testing=False,
                  backprop_through_attack=False,
                  nb_filters=64):
    """
    MNIST cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param testing: if true, complete an AccuracyReport for unit tests
                    to verify that performance is adequate
    :param backprop_through_attack: If True, backprop through adversarial
                                    example construction process during
                                    adversarial training.
    :param clean_train: if true, train on clean examples
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    sess = tf.Session()

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)
    source_samples = batch_size
    # Use label smoothing
    # Hopefully this doesn't screw up JSMA...
    assert Y_train.shape[1] == 10
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    model_path = "models/mnist"
    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_par = {'batch_size': batch_size}
    rng = np.random.RandomState([2017, 8, 30])

    if clean_train:
        model = make_basic_cnn(nb_filters=nb_filters)
        preds = model.get_probs(x)

        def evaluate():
            # Evaluate the accuracy of the MNIST model on legitimate test
            # examples
            eval_params = {'batch_size': batch_size}
            acc = model_eval(sess,
                             x,
                             y,
                             preds,
                             X_test,
                             Y_test,
                             args=eval_params)
            report.clean_train_clean_eval = acc
            assert X_test.shape[0] == test_end - test_start, X_test.shape
            print('Test accuracy on legitimate examples: %0.4f' % acc)

        model_train(sess,
                    x,
                    y,
                    preds,
                    X_train,
                    Y_train,
                    evaluate=evaluate,
                    args=train_params,
                    rng=rng)
        print("#####Starting attacks on clean model#####")
        #################################################################
        #Clean test against JSMA
        jsma_params = {
            'theta': 1.,
            'gamma': 0.1,
            'clip_min': 0.,
            'clip_max': 1.,
            'y_target': None
        }

        jsma = SaliencyMapMethod(model, back='tf', sess=sess)
        adv_x = jsma.generate(x, **jsma_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on JSMA adversarial examples: %0.4f' % acc)
        ################################################################
        #Clean test against FGSM
        fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}

        fgsm = FastGradientMethod(model, sess=sess)
        adv_x = fgsm.generate(x, **fgsm_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on FGSM adversarial examples: %0.4f' % acc)
        ################################################################
        #Clean test against BIM
        bim_params = {
            'eps': 0.3,
            'eps_iter': 0.01,
            'nb_iter': 100,
            'clip_min': 0.,
            'clip_max': 1.
        }
        bim = BasicIterativeMethod(model, sess=sess)
        adv_x = bim.generate(x, **bim_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on BIM adversarial examples: %0.4f' % acc)
        ################################################################
        #Clean test against EN
        en_params = {
            'binary_search_steps': 1,
            #'y': None,
            'max_iterations': 100,
            'learning_rate': 0.1,
            'batch_size': source_samples,
            'initial_const': 10
        }
        en = ElasticNetMethod(model, back='tf', sess=sess)
        adv_x = en.generate(x, **en_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on EN adversarial examples: %0.4f' % acc)
        ################################################################
        #Clean test against DF
        deepfool_params = {
            'nb_candidate': 10,
            'overshoot': 0.02,
            'max_iter': 50,
            'clip_min': 0.,
            'clip_max': 1.
        }
        deepfool = DeepFool(model, sess=sess)
        adv_x = deepfool.generate(x, **deepfool_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on DF adversarial examples: %0.4f' % acc)
        ################################################################
        #Clean test against VAT
        vat_params = {
            'eps': 2.0,
            'num_iterations': 1,
            'xi': 1e-6,
            'clip_min': 0.,
            'clip_max': 1.
        }
        vat = VirtualAdversarialMethod(model, sess=sess)
        adv_x = vat.generate(x, **vat_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on VAT adversarial examples: %0.4f\n' % acc)
        ################################################################
        print("Repeating the process, using adversarial training\n")
    # Redefine TF model graph
    model_2 = make_basic_cnn(nb_filters=nb_filters)
    preds_2 = model_2(x)
    #################################################################
    #Adversarial test against JSMA
    jsma_params = {
        'theta': 1.,
        'gamma': 0.1,
        'clip_min': 0.,
        'clip_max': 1.,
        'y_target': None
    }

    jsma = SaliencyMapMethod(model, back='tf', sess=sess)
    adv_x = jsma.generate(x, **jsma_params)
    preds_adv_jsma = model.get_probs(adv_x)
    ################################################################
    #Adversarial test against FGSM
    fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}

    fgsm = FastGradientMethod(model, sess=sess)
    adv_x = fgsm.generate(x, **fgsm_params)
    preds_adv_fgsm = model.get_probs(adv_x)
    ################################################################
    #Adversarial test against BIM
    bim_params = {
        'eps': 0.3,
        'eps_iter': 0.01,
        'nb_iter': 100,
        'clip_min': 0.,
        'clip_max': 1.
    }
    bim = BasicIterativeMethod(model, sess=sess)
    adv_x = bim.generate(x, **bim_params)
    preds_adv_bim = model.get_probs(adv_x)
    ################################################################
    #Adversarial test against EN
    en_params = {
        'binary_search_steps': 5,
        #'y': None,
        'max_iterations': 100,
        'learning_rate': 0.1,
        'batch_size': source_samples,
        'initial_const': 10
    }
    en = ElasticNetMethod(model, back='tf', sess=sess)
    adv_x = en.generate(x, **en_params)
    preds_adv_en = model.get_probs(adv_x)
    ################################################################
    #Adversarial test against DF
    deepfool_params = {
        'nb_candidate': 10,
        'overshoot': 0.02,
        'max_iter': 200,
        'clip_min': 0.,
        'clip_max': 1.
    }
    deepfool = DeepFool(model, sess=sess)
    adv_x = deepfool.generate(x, **deepfool_params)
    preds_adv_df = model.get_probs(adv_x)
    ################################################################
    #Adversarial test against VAT
    vat_params = {
        'eps': 2.0,
        'num_iterations': 1,
        'xi': 1e-6,
        'clip_min': 0.,
        'clip_max': 1.
    }
    vat = VirtualAdversarialMethod(model, sess=sess)
    adv_x = vat.generate(x, **vat_params)
    preds_adv_vat = model.get_probs(adv_x)
    ################################################################
    print("#####Evaluate trained model#####")

    def evaluate_2():
        # Evaluate the accuracy of the MNIST model on JSMA adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_jsma,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on JSMA adversarial examples: %0.4f' % acc)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_fgsm,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on FGSM adversarial examples: %0.4f' % acc)

        # Evaluate the accuracy of the MNIST model on BIM adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_bim,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on BIM adversarial examples: %0.4f' % acc)

        # Evaluate the accuracy of the MNIST model on EN adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_en,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on EN adversarial examples: %0.4f' % acc)

        # Evaluate the accuracy of the MNIST model on DF adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_df,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on DF adversarial examples: %0.4f' % acc)

        # Evaluate the accuracy of the MNIST model on VAT adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_vat,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on VAT adversarial examples: %0.4f\n' % acc)

    preds_2_adv = [
        preds_adv_jsma, preds_adv_fgsm, preds_adv_bim
        # ,preds_adv_en
        # ,preds_adv_df
    ]

    model_train(sess,
                x,
                y,
                preds_2,
                X_train,
                Y_train,
                predictions_adv=preds_2_adv,
                evaluate=evaluate_2,
                args=train_params,
                rng=rng)
コード例 #11
0
def mnist_tutorial(train_start=0, train_end=60000, test_start=0,
                   test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE, train_dir=TRAIN_DIR,
                   filename=FILENAME, load_model=LOAD_MODEL,
                   testing=True, label_smoothing=0.1):
  """
  MNIST CleverHans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param train_dir: Directory storing the saved model
  :param filename: Filename to save model under
  :param load_model: True for load, False for not load
  :param testing: if true, test error is calculated
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """
  tf.keras.backend.set_learning_phase(0)

  # Object used to keep track of (and return) key accuracies
  report = AccuracyReport()

  # Set TF random seed to improve reproducibility
  tf.set_random_seed(1234)

  if keras.backend.image_data_format() != 'channels_last':
    raise NotImplementedError("this tutorial requires keras to be configured to channels_last format")

  # Create TF session and set as Keras backend session
  sess = tf.Session()
  keras.backend.set_session(sess)

  # Get MNIST test data
  mnist = MNIST(train_start=train_start, train_end=train_end,
                test_start=test_start, test_end=test_end)
  x_train, y_train = mnist.get_set('train')
  x_test, y_test = mnist.get_set('test')

  # Obtain Image Parameters
  img_rows, img_cols, nchannels = x_train.shape[1:4]
  nb_classes = y_train.shape[1]

  # Define input TF placeholder
  x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
  y = tf.placeholder(tf.float32, shape=(None, nb_classes))

  # Define TF model graph
  model = cnn_model(img_rows=img_rows, img_cols=img_cols,
                    channels=nchannels, nb_filters=64,
                    nb_classes=nb_classes)
  preds = model(x)
  print("Defined TensorFlow model graph.")

  def evaluate():
    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    report.clean_train_clean_eval = acc
    print('Test accuracy on legitimate examples: %0.4f' % acc)

  # Train an MNIST model
  train_params = {
      'nb_epochs': nb_epochs,
      'batch_size': batch_size,
      'learning_rate': learning_rate,
      'train_dir': train_dir,
      'filename': filename
  }

  rng = np.random.RandomState([2017, 8, 30])
  if not os.path.exists(train_dir):
    os.mkdir(train_dir)

  ckpt = tf.train.get_checkpoint_state(train_dir)
  print(train_dir, ckpt)
  ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path
  wrap = KerasModelWrapper(model)

  if load_model and ckpt_path:
    saver = tf.train.Saver()
    print(ckpt_path)
    saver.restore(sess, ckpt_path)
    print("Model loaded from: {}".format(ckpt_path))
    evaluate()
  else:
    print("Model was not loaded, training from scratch.")
    loss = CrossEntropy(wrap, smoothing=label_smoothing)
    train(sess, loss, x_train, y_train, evaluate=evaluate,
          args=train_params, rng=rng)

  # Calculate training error
  if testing:
    eval_params = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds, x_train, y_train, args=eval_params)
    report.train_clean_train_clean_eval = acc

  df = DeepFool(wrap, sess=sess)
  adv_x = df.generate(x)

  batch = 1000
  x_adv_test = None
  x_adv_train = None

  for i in tqdm(range(int(len(x_test) / batch))):
    tmp = sess.run(adv_x, feed_dict={x: x_test[i*batch:(i+1)*batch]})
    if x_adv_test is None:
      x_adv_test = tmp
    else:
      x_adv_test = np.concatenate((x_adv_test, tmp))

  for i in tqdm(range(int(len(x_train) / batch))):
    tmp = sess.run(adv_x, feed_dict={x: x_train[i*batch:(i+1)*batch]})
    if x_adv_train is None:
      x_adv_train = tmp
    else:
      x_adv_train = np.concatenate((x_adv_train, tmp))

  def evaluate_adv():
    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds, x_adv_test, y_test, args=eval_params)
    report.clean_train_clean_eval = acc
    print('Test accuracy on legitimate examples: %0.4f' % acc)

  evaluate_adv()

  save_list = [x_adv_train, x_adv_test]
  print(x_adv_train.shape)
  print(x_adv_test.shape)
  pickle.dump(save_list, open("./df.pkl", 'wb'))
コード例 #12
0
            x_input1 = tf.placeholder(tf.float32, shape=batch_shape)
            x_input2 = tf.placeholder(tf.float32, shape=batch_shape)
            prediction = model(x_input2, x_input1)

            # prediction = sess.run(predictions, feed_dict={phase_train_placeholder: False})
            # Define FGSM for the model
            steps = 1
            df_params = {
                'nb_classes': 2,
                'max_iter': 150,
                'clip_min': 0.0,
                'clip_max': 1.0,
                'nb_candidate': 2
            }
            DF = DeepFool(model, back='tf', sess=sess)
            adv_x = DF.generate(x_input1, x_input2, faces2_batch, **df_params)
            inception_vars = tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES, scope='InceptionResnetV1')
            saver = tf.train.Saver(inception_vars, max_to_keep=3)
            # sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False))
            pretrained_model = '/home/fan/facenet_adversarial_faces/models/facenet/20170512-110547/'
            if pretrained_model:
                print('Restoring pretrained model: %s' % pretrained_model)
                # facenet.load_model(pretrained_model)

                model_exp = os.path.expanduser(pretrained_model)
                print('Model directory: %s' % model_exp)
                _, ckpt_file = facenet.get_model_filenames(model_exp)

                # print('Metagraph file: %s' % meta_file)
                print('Checkpoint file: %s' % ckpt_file)
コード例 #13
0
def eval(sess,
         model_name,
         X_train,
         Y_train,
         X_test,
         Y_test,
         cnn=False,
         rbf=False,
         fgsm=False,
         jsma=False,
         df=False,
         bim=False):
    """ Load model saved in model_name.json and model_name_weights.h5 and 
    evaluate its accuracy on legitimate test samples and adversarial samples.
    Use cnn=True if the model is CNN based.
    """

    # open text file and output accuracy results to it
    text_file = open("cifar_results.txt", "w")

    # load saved model
    print("Load model ... ")
    '''
    json = open('models/{}.json'.format(model_name), 'r')
    model = json.read()
    json.close()
    loaded_model = model_from_json(model)
    loaded_model.load_weights("models/{}_weights.h5".format(model_name))
    '''
    if rbf:
        loaded_model = load_model("rbfmodels/{}.h5".format(model_name),
                                  custom_objects={'RBFLayer': RBFLayer})
        text_file.write('Evaluating on rbfmodels/{}.h5\n\n'.format(model_name))
    else:
        loaded_model = load_model("models/{}.h5".format(model_name))
        text_file.write('Evaluating on models/{}.h5\n\n'.format(model_name))

    # Set placeholders
    if cnn:
        x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3))
    else:
        x = tf.placeholder(tf.float32, shape=(None, 3072))

    y = tf.placeholder(tf.float32, shape=(None, 10))

    predictions = loaded_model(x)

    accuracy = model_eval(sess,
                          x,
                          y,
                          predictions,
                          X_test,
                          Y_test,
                          args={"batch_size": 128})
    text_file.write('Test accuracy on legitimate test examples: {0}\n'.format(
        str(accuracy)))
    #print('Test accuracy on legitimate test examples: ' + str(accuracy))

    # Craft adversarial examples depending on the input parameters
    wrap = KerasModelWrapper(loaded_model)

    # FGSM
    if fgsm:
        fgsm = FastGradientMethod(wrap, sess=sess)
        fgsm_params = {'eps': 0.3}
        adv_x = fgsm.generate(x, **fgsm_params)
        adv_x = tf.stop_gradient(adv_x)
        preds_adv = loaded_model(adv_x)

        # Evaluate the accuracy of the CIFAR-10 model on adversarial examples
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_adv,
                              X_test,
                              Y_test,
                              args={"batch_size": 128})
        text_file.write(
            'Test accuracy on fgsm adversarial test examples: {0}\n'.format(
                str(accuracy)))
        #print('Test accuracy on fgsm adversarial test examples: ' + str(accuracy))

    # JSMA
    if jsma:
        jsma = SaliencyMapMethod(wrap, sess=sess)
        jsma_params = {
            'theta': 2.,
            'gamma': 0.145,
            'clip_min': 0.,
            'clip_max': 1.,
            'y_target': None
        }
        adv_x = jsma.generate(x, **jsma_params)
        adv_x = tf.stop_gradient(adv_x)
        preds_adv = loaded_model(adv_x)

        # Evaluate the accuracy of the CIFAR-10 model on adversarial examples
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_adv,
                              X_test,
                              Y_test,
                              args={"batch_size": 128})
        text_file.write(
            'Test accuracy on jsma adversarial test examples: {0}\n'.format(
                str(accuracy)))
        #print('Test accuracy on jsma adversarial test examples: ' + str(accuracy))

    # DeepFool
    if df:
        df = DeepFool(wrap, sess=sess)
        df_params = {'nb_candidate': 10, 'max_iter': 50}
        adv_x = df.generate(x, **df_params)
        adv_x = tf.stop_gradient(adv_x)
        preds_adv = loaded_model(adv_x)

        # Evaluate the accuracy of the CIFAR-10 model on adversarial examples
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_adv,
                              X_test,
                              Y_test,
                              args={"batch_size": 128})
        text_file.write(
            'Test accuracy on df adversarial test examples: {0}\n'.format(
                str(accuracy)))
        #print('Test accuracy on df adversarial test examples: ' + str(accuracy))

    # Basic Iterative Method
    # Commented out as it is hanging on batch #0 at the moment
    '''
    if bim:
        bim = ProjectedGradientDescent(wrap, sess=sess)
        bim_params = {'eps': 0.3}
        adv_x = bim.generate(x, **bim_params)
        adv_x = tf.stop_gradient(adv_x)
        preds_adv = loaded_model(adv_x)

        # Evaluate the accuracy of the CIFAR-10 model on adversarial examples
        accuracy = model_eval(sess, x, y, preds_adv, X_test, Y_test, args={ "batch_size" : 128})
        text_file.write('Test accuracy on bim adversarial test examples: {0}\n'.format(str(accuracy)))
        #print('Test accuracy on bim adversarial test examples: ' + str(accuracy))
    '''
    print('Accuracy results outputted to cifar10_results.txt')
    text_file.close()

    # Close TF session
    sess.close()
コード例 #14
0
# Define the update func
loss = w * losses
optimizer = tf.train.AdamOptimizer(learning_rate=args.lr)
train_step = optimizer.minimize(loss)

# Test acc on legit data
logits = wrap.get_logits(x[0])
acc, acc_op = tf.metrics.accuracy(
    labels=tf.argmax(y, 1), predictions=tf.argmax(logits, 1))

# Define adv attack
deepfool = DeepFool(wrap, sess=sess)
deepfool_params = {'eps': args.noise_eps, 'clip_min': 0., 'clip_max': 1.}

# Attack images
x_deepfool = deepfool.generate(x[0], **deepfool_params)
# Consider the attack to be constant
x_deepfool = tf.stop_gradient(x_deepfool)

# Evaluate predictions on adv attacks
preds_deepfool = model(x_deepfool)
acc_deepfool, acc_op_deepfool = tf.metrics.accuracy(
    labels=tf.argmax(y, 1), predictions=tf.argmax(preds_deepfool, 1))

# Define adv attack
momentum_iterative = MomentumIterativeMethod(wrap, sess=sess)
momentum_iterative_params = {
    'eps': args.noise_eps,
    'clip_min': 0.,
    'clip_max': 1.
}
コード例 #15
0
def baseline_deepfool(train_start=0,
                      train_end=60000,
                      test_start=0,
                      test_end=10000,
                      nb_epochs=6,
                      batch_size=128,
                      learning_rate=0.001,
                      clean_train=True,
                      testing=False,
                      backprop_through_attack=False,
                      nb_filters=64):
    """
    MNIST cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param testing: if true, complete an AccuracyReport for unit tests
                    to verify that performance is adequate
    :param backprop_through_attack: If True, backprop through adversarial
                                    example construction process during
                                    adversarial training.
    :param clean_train: if true, train on clean examples
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    sess = tf.Session()

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Use label smoothing
    assert Y_train.shape[1] == 10
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    model_path = "models/mnist"
    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    deepfool_params = {
        'nb_candidate': 10,
        'overshoot': 0.02,
        'max_iter': 50,
        'clip_min': 0.,
        'clip_max': 1.
    }
    rng = np.random.RandomState([2017, 8, 30])

    if clean_train:
        model = make_basic_cnn(nb_filters=nb_filters)
        preds = model.get_probs(x)

        def evaluate():
            # Evaluate the accuracy of the MNIST model on legitimate test
            # examples
            eval_params = {'batch_size': batch_size}
            acc = model_eval(sess,
                             x,
                             y,
                             preds,
                             X_test,
                             Y_test,
                             args=eval_params)
            report.clean_train_clean_eval = acc
            assert X_test.shape[0] == test_end - test_start, X_test.shape
            print('Test accuracy on legitimate examples: %0.4f' % acc)

        #
        # HERE already trained model, thus we need a new one (model_2)
        model_train(sess,
                    x,
                    y,
                    preds,
                    X_train,
                    Y_train,
                    evaluate=evaluate,
                    args=train_params,
                    rng=rng)

        # Calculate training error
        if testing:
            eval_params = {'batch_size': batch_size}
            acc = model_eval(sess,
                             x,
                             y,
                             preds,
                             X_train,
                             Y_train,
                             args=eval_params)
            report.train_clean_train_clean_eval = acc

        # Initialize the Fast Gradient Sign Method (FGSM) attack object and
        # graph
        deepfool = DeepFool(model, sess=sess)
        adv_x = deepfool.generate(x, **deepfool_params)

        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on adversarial examples
        eval_par = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Test accuracy on DeepFool adversarial examples: %0.4f\n' % acc)
        report.clean_train_adv_eval = acc

        # Calculate training error
        if testing:
            eval_par = {'batch_size': batch_size}
            acc = model_eval(sess,
                             x,
                             y,
                             preds_adv,
                             X_train,
                             Y_train,
                             args=eval_par)
            report.train_clean_train_adv_eval = acc

        print("Repeating the process, using adversarial training")
        # Redefine TF model graph
    model_2 = make_basic_cnn(nb_filters=nb_filters)
    preds_2 = model_2(x)

    deepfool2 = DeepFool(model_2, sess=sess)
    adv_x_2 = deepfool2.generate(x, **deepfool_params)
    if not backprop_through_attack:
        # For the fgsm attack used in this tutorial, the attack has zero
        # gradient so enabling this flag does not change the gradient.
        # For some other attacks, enabling this flag increases the cost of
        # training, but gives the defender the ability to anticipate how
        # the atacker will change their strategy in response to updates to
        # the defender's parameters.
        adv_x_2 = tf.stop_gradient(adv_x_2)
    preds_2_adv = model_2(adv_x_2)

    #
    # let's generate DeepFool examples
    #
    # let's generate FGSM examples
    #
    fgsm = FastGradientMethod(model_2, sess=sess)
    fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
    adv_x_fgsm = fgsm.generate(x, **fgsm_params)
    if not backprop_through_attack:
        # For the fgsm attack used in this tutorial, the attack has zero
        # gradient so enabling this flag does not change the gradient.
        # For some other attacks, enabling this flag increases the cost of
        # training, but gives the defender the ability to anticipate how
        # the atacker will change their strategy in response to updates to
        # the defender's parameters.
        adv_x_fgsm = tf.stop_gradient(adv_x_fgsm)
    preds_2_fgsm = model_2(adv_x_fgsm)

    # DON'T WANT TO TRAIN on FGSM adv examples yet

    def evaluate_2():
        # Accuracy of adversarially trained model on legitimate test inputs
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2,
                              X_test,
                              Y_test,
                              args=eval_params)
        print('Test accuracy on legitimate examples: %0.4f' % accuracy)
        report.adv_train_clean_eval = accuracy

        # Accuracy of the adversarially trained model on FGSM adversarial examples
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2_fgsm,
                              X_test,
                              Y_test,
                              args=eval_params)
        print('Test accuracy on FGSM adversarial examples: %0.4f' % accuracy)
        report.adv_train_adv_eval = accuracy

        # Accuracy of the DeepFool adv trained model on DeepFool examples
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2_adv,
                              X_test,
                              Y_test,
                              args=eval_params)
        print('Test accuracy on DeepFool adversarial examples: %0.4f' %
              accuracy)

    # Perform and evaluate adversarial training
    model_train(sess,
                x,
                y,
                preds_2,
                X_train,
                Y_train,
                predictions_adv=preds_2_adv,
                evaluate=evaluate_2,
                args=train_params,
                rng=rng)

    # Calculate training errors
    if testing:
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2,
                              X_train,
                              Y_train,
                              args=eval_params)
        report.train_adv_train_clean_eval = accuracy
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2_adv,
                              X_train,
                              Y_train,
                              args=eval_params)
        report.train_adv_train_adv_eval = accuracy

    return report
コード例 #16
0
def train(alpha, eps2_ratio, gen_ratio, fgsm_eps, LR, logfile):
    logfile.write("fgsm_eps \t %g, LR \t %g, alpha \t %d , eps2_ratio \t %d , gen_ratio \t %d \n"%(fgsm_eps, LR, alpha, eps2_ratio, gen_ratio))
    #############################
    ##Hyper-parameter Setting####
    #############################
    hk = 256; #number of hidden units at the last layer
    Delta2 = (14*14+2)*25; #global sensitivity for the first hidden layer
    Delta3_adv = 2*hk #10*(hk + 1/4 * hk**2) #10*(hk) #global sensitivity for the output layer
    Delta3_benign = 2*hk #10*(hk); #global sensitivity for the output layer
    D = 50000; #size of the dataset
    L = 2499; #batch size
    image_size = 28;
    padding = 4;
    #numHidUnits = 14*14*32 + 7*7*64 + M + 10; #number of hidden units
    #gen_ratio = 1
    epsilon1 = 0.0; #0.175; #epsilon for dpLRP
    epsilon2 = 0.1*(1 + gen_ratio); #epsilon for the first hidden layer
    epsilon3 = 0.1*(1); #epsilon for the last hidden layer
    total_eps = epsilon1 + epsilon2 + epsilon3
    print(total_eps)
    uncert = 0.1; #uncertainty modeling at the output layer
    infl = 1; #inflation rate in the privacy budget redistribution
    R_lowerbound = 1e-5; #lower bound of the LRP
    c = [0, 40, 50, 200] #norm bounds
    epochs = 200; #number of epochs
    preT_epochs = 50; #number of epochs
    T = int(D/L*epochs + 1); #number of steps T
    pre_T = int(D/L*preT_epochs + 1);
    step_for_epoch = int(D/L); #number of steps for one epoch
    
    broken_ratio = 1
    #alpha = 9.0 # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
    #eps2_ratio = 10; # [1/10, 1/8, 1/6, 1/4, 1/2, 1, 2, 4, 6, 8, 10]
    #eps_benign = 1/(1+eps2_ratio)*(2*epsilon2)
    #eps_adv = eps2_ratio/(1+eps2_ratio)*(2*epsilon2)
    
    #fgsm_eps = 0.1
    rand_alpha = 0.05
    
    ##Robustness##
    robustness_T = (fgsm_eps*18*18*L*epsilon2)/Delta2;
    ####
    
    LRPfile = os.getcwd() + '/Relevance_R_0_075.txt';
    #############################
    mnist = input_data.read_data_sets("MNIST_data/", one_hot = True);

    #############################
    ##Construct the Model########
    #############################
    #Step 4: Randomly initiate the noise, Compute 1/|L| * Delta3 for the output layer#

    #Compute the 1/|L| * Delta3 for the last hidden layer#
    """eps3_ratio = Delta3_adv/Delta3_benign;
    eps3_benign = 1/(1+eps3_ratio)*(epsilon3)
    eps3_adv = eps3_ratio/(1+eps3_ratio)*(epsilon3)"""
    loc, scale3_benign, scale3_adv = 0., Delta3_benign/(epsilon3*L), Delta3_adv/(epsilon3*L);
    ###
    #End Step 4#
    # Parameters Declarification
    W_conv1 = weight_variable('W_conv1', [5, 5, 1, 32], collect=[AECODER_VARIABLES]);
    b_conv1 = bias_variable('b_conv1', [32], collect=[AECODER_VARIABLES]);

    shape     = W_conv1.get_shape().as_list()
    w_t       = tf.reshape(W_conv1, [-1, shape[-1]])
    w         = tf.transpose(w_t)
    sing_vals = tf.svd(w, compute_uv=False)
    sensitivity = tf.reduce_max(sing_vals)
    gamma = 2*(14*14 + 2)*25/(L*sensitivity)
    
    dp_epsilon=1.0 #0.1
    delta_r = fgsm_eps*(image_size**2);
    #delta_h = 1.0 * delta_r; #sensitivity*(14**2) = sensitivity*(\beta**2) can also be used
    #dp_mult = (Delta2/(L*epsilon2))/(delta_r / dp_epsilon) + (2*Delta2/(L*epsilon2))/(delta_h / dp_epsilon)
    
    W_conv2 = weight_variable('W_conv2', [5, 5, 32, 64], collect=[CONV_VARIABLES]);
    b_conv2 = bias_variable('b_conv2', [64], collect=[CONV_VARIABLES]);

    W_fc1 = weight_variable('W_fc1', [4 * 4 * 64, hk], collect=[CONV_VARIABLES]);
    b_fc1 = bias_variable('b_fc1', [hk], collect=[CONV_VARIABLES]);

    W_fc2 = weight_variable('W_fc2', [hk, 10], collect=[CONV_VARIABLES]);
    b_fc2 = bias_variable('b_fc2', [10], collect=[CONV_VARIABLES]);

    """scale2 = tf.Variable(tf.ones([hk]))
    beta2 = tf.Variable(tf.zeros([hk]))
    tf.add_to_collections([CONV_VARIABLES], scale2)
    tf.add_to_collections([CONV_VARIABLES], beta2)"""

    params = [W_conv1, b_conv1, W_conv2, b_conv2, W_fc1, b_fc1, W_fc2, b_fc2]
    ###


    #Step 5: Create the model#
    noise = tf.placeholder(tf.float32, [None, image_size, image_size, 1]);
    adv_noise = tf.placeholder(tf.float32, [None, image_size, image_size, 1]);

    keep_prob = tf.placeholder(tf.float32);
    x = tf.placeholder(tf.float32, [None, image_size*image_size]);
    x_image = tf.reshape(x, [-1,image_size,image_size,1]);

    #perturbFMx = np.random.laplace(0.0, Delta2/(2*epsilon2*L), 28*28)
    #perturbFMx = np.reshape(perturbFMx, [-1, 28, 28, 1]);

    # pretrain ###
    #Enc_Layer1 = EncLayer(inpt=x_image, n_filter_in = 1, n_filter_out = 32, filter_size = 5, W=W_conv1, b=b_conv1, activation=tf.nn.relu)
    #pretrain = Enc_Layer1.get_train_ops2(xShape = tf.shape(x_image)[0], Delta = Delta2, epsilon = 2*epsilon2, batch_size = L, learning_rate= LR, W = W_conv1, b = b_conv1, perturbFMx = noise)
    ###########

    adv_x = tf.placeholder(tf.float32, [None, image_size*image_size]);
    adv_image = tf.reshape(adv_x, [-1,image_size,image_size,1]);

    #perturbFMx_adv = np.random.laplace(0.0, Delta2/(2*epsilon2*L), 28*28)
    #perturbFMx_adv = np.reshape(perturbFMx_adv, [-1, 28, 28, 1]);

    # pretrain adv ###
    #perturbFM_h = np.random.laplace(0.0, 2*Delta2/(epsilon2*L), 14*14*32)
    #perturbFM_h = np.reshape(perturbFM_h, [-1, 14, 14, 32]);
    FM_h = tf.placeholder(tf.float32, [None, 14, 14, 32]);
    Enc_Layer2 = EncLayer(inpt=adv_image, n_filter_in = 1, n_filter_out = 32, filter_size = 5, W=W_conv1, b=b_conv1, activation=tf.nn.relu)
    pretrain_adv = Enc_Layer2.get_train_ops2(xShape = tf.shape(adv_image)[0], Delta = Delta2, batch_size = L, learning_rate= LR, W = W_conv1, b = b_conv1, perturbFMx = adv_noise, perturbFM_h = FM_h)
    Enc_Layer3 = EncLayer(inpt=x_image, n_filter_in = 1, n_filter_out = 32, filter_size = 5, W=W_conv1, b=b_conv1, activation=tf.nn.relu)
    pretrain_benign = Enc_Layer3.get_train_ops2(xShape = tf.shape(x_image)[0], Delta = Delta2, batch_size = L, learning_rate= LR, W = W_conv1, b = b_conv1, perturbFMx = noise, perturbFM_h = FM_h)
    ###########
    
    x_image += noise;
    x_image = tf.clip_by_value(x_image, -10, 10) #Clip the values of each input feature.
    
    adv_image += adv_noise;
    adv_image = tf.clip_by_value(adv_image, -10, 10) #Clip the values of each input feature.

    #perturbFM = np.random.laplace(0.0, scale3_benign, hk)
    #perturbFM = np.reshape(perturbFM, [hk]);
    perturbFM = np.random.laplace(0.0, scale3_benign, hk * 10)
    perturbFM = np.reshape(perturbFM, [hk, 10]);
    
    y_conv = inference(x_image, perturbFM, hk, FM_h, params);
    softmax_y_conv = tf.nn.softmax(y_conv)
    #robust_mask = inference_robust_mask(y_conv, Delta2, L, epsilon2, robustness_T)

    #perturbFM = np.random.laplace(0.0, scale3_adv, hk)
    #perturbFM = np.reshape(perturbFM, [hk]);
    y_adv_conv = inference(adv_image, perturbFM, hk, FM_h, params);
    #adv_robust_mask = inference_robust_mask(y_adv_conv, Delta2, L, epsilon2, robustness_T)

    # test model
    perturbFM_test = np.random.laplace(0.0, 0, hk)
    perturbFM_test = np.reshape(perturbFM_test, [hk]);
    x_test = tf.reshape(x, [-1,image_size,image_size,1]);
    y_test = inference(x_test, perturbFM_test, hk, FM_h, params);
    #test_robust_mask = inference_robust_mask(y_test, Delta2, L, epsilon2, robustness_T)

    #Define a place holder for the output label#
    y_ = tf.placeholder(tf.float32, [None, 10]);
    adv_y_ = tf.placeholder(tf.float32, [None, 10]);
    #End Step 5#
    #############################

    #############################
    ##Define loss and Optimizer##
    #############################
    '''
        Computes differentially private sigmoid cross entropy given `logits`.
        
        Measures the probability error in discrete classification tasks in which each
        class is independent and not mutually exclusive.
        
        For brevity, let `x = logits`, `z = labels`.  The logistic loss is
        z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x))
        = z * -log(1 / (1 + exp(-x))) + (1 - z) * -log(exp(-x) / (1 + exp(-x)))
        = z * log(1 + exp(-x)) + (1 - z) * (-log(exp(-x)) + log(1 + exp(-x)))
        = z * log(1 + exp(-x)) + (1 - z) * (x + log(1 + exp(-x))
        = (1 - z) * x + log(1 + exp(-x))
        = x - x * z + log(1 + exp(-x))
        
        For x < 0, to avoid overflow in exp(-x), we reformulate the above
        
        x - x * z + log(1 + exp(-x))
        = log(exp(x)) - x * z + log(1 + exp(-x))
        = - x * z + log(1 + exp(x))
        
        Hence, to ensure stability and avoid overflow, the implementation uses this
        equivalent formulation
        
        max(x, 0) - x * z + log(1 + exp(-abs(x)))
        
        `logits` and `labels` must have the same type and shape. Let denote neg_abs_logits = -abs(y_conv) = -abs(h_fc1 * W_fc2). By Applying Taylor Expansion, we have:
        
        Taylor = max(y_conv, 0) - y_conv * y_ + log(1 + exp(-abs(y_conv)));
        = max(h_fc1 * W_fc2, 0) - (y_ * h_fc1) * W_fc2 + (math.log(2.0) + 0.5*neg_abs_logits + 1.0/8.0*neg_abs_logits**2)
        = max(h_fc1 * W_fc2, 0) - (y_ * h_fc1) * W_fc2 + (math.log(2.0) + 0.5*(-abs(h_fc1 * W_fc2)) + 1.0/8.0*(-abs(h_fc1 * W_fc2))**2)
        = F1 + F2
        where: F1 = max(h_fc1 * W_fc2, 0) + (math.log(2.0) + 0.5*(-abs(h_fc1 * W_fc2)) + 1.0/8.0*(-abs(h_fc1 * W_fc2))**2) and F2 = - (y_ * h_fc1) * W_fc2
        
        To ensure that Taylor is differentially private, we need to perturb all the coefficients, including the term y_ * h_fc1 * W_fc2.
        Note that h_fc1 is differentially private, since its computation on top of the DP Affine transformation does not access the original data.
        Therefore, F1 should be differentially private. We need to preserve DP in F2, which reads the groundtruth label y_, as follows:
        
        By applying Funtional Mechanism, we perturb (y_ * h_fc1) * W_fc2 as ((y_ * h_fc1) + perturbFM) * W_fc2 = (y_ * h_fc1)*W_fc2 + (perturbFM * W_fc2):
        
        perturbFM = np.random.laplace(0.0, scale3, hk * 10)
        perturbFM = np.reshape(perturbFM/L, [hk, 10]);
        
        where scale3 = Delta3/(epsilon3) = 2*hk/(epsilon3);
        
        To allow computing gradients at zero, we define custom versions of max and abs functions [Tensorflow].
        
        Source: https://github.com/tensorflow/tensorflow/blob/r1.4/tensorflow/python/ops/nn_impl.py @ TensorFlow
    '''
    ### Taylor for benign x
    zeros = array_ops.zeros_like(y_conv, dtype=y_conv.dtype)
    cond = (y_conv >= zeros)
    relu_logits = array_ops.where(cond, y_conv, zeros)
    neg_abs_logits = array_ops.where(cond, -y_conv, y_conv)
    #Taylor = math_ops.add(relu_logits - y_conv * y_, math_ops.log1p(math_ops.exp(neg_abs_logits)))
    Taylor_benign = math_ops.add(relu_logits - y_conv * y_, math.log(2.0) + 0.5*neg_abs_logits + 1.0/8.0*neg_abs_logits**2) - tf.reduce_sum(perturbFM*W_fc2)
    #Taylor_benign = tf.abs(y_conv - y_)

    ### Taylor for adv_x
    zeros_adv = array_ops.zeros_like(y_adv_conv, dtype=y_conv.dtype)
    cond_adv = (y_adv_conv >= zeros_adv)
    relu_logits_adv = array_ops.where(cond_adv, y_adv_conv, zeros_adv)
    neg_abs_logits_adv = array_ops.where(cond_adv, -y_adv_conv, y_adv_conv)
    #Taylor = math_ops.add(relu_logits - y_conv * y_, math_ops.log1p(math_ops.exp(neg_abs_logits)))
    Taylor_adv = math_ops.add(relu_logits_adv - y_adv_conv * adv_y_, math.log(2.0) + 0.5*neg_abs_logits_adv + 1.0/8.0*neg_abs_logits_adv**2) - tf.reduce_sum(perturbFM*W_fc2)
    #Taylor_adv = tf.abs(y_adv_conv - adv_y_)

    ### Adversarial training loss
    adv_loss = (1/(L + L*alpha))*(Taylor_benign + alpha * Taylor_adv)

    '''Some time, using learning rate decay can help to stablize training process. However, use this carefully, since it may affect the convergent speed.'''
    global_step = tf.Variable(0, trainable=False)
    pretrain_var_list = tf.get_collection(AECODER_VARIABLES)
    train_var_list = tf.get_collection(CONV_VARIABLES)
    #print(pretrain_var_list)
    #print(train_var_list)
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        pretrain_step = tf.train.AdamOptimizer(LR).minimize(pretrain_adv+pretrain_benign, global_step=global_step, var_list=pretrain_var_list);
        train_step = tf.train.AdamOptimizer(LR).minimize(adv_loss, global_step=global_step, var_list=train_var_list);
    sess = tf.InteractiveSession();

    # Define the correct prediction and accuracy
    # This needs to be changed to "Robust Prediction"
    correct_prediction_x = tf.equal(tf.argmax(y_test,1), tf.argmax(y_,1));
    accuracy_x = tf.reduce_mean(tf.cast(correct_prediction_x, tf.float32));

    #############
    # use these to get predictions wrt to robust conditions
    """robust_correct_prediction_x = tf.multiply(test_robust_mask, tf.cast(correct_prediction_x, tf.float32))
    accuracy_x_robust = tf.reduce_sum(robust_correct_prediction_x) / tf.reduce_sum(test_robust_mask)
    #certified_utility = 2/(1/accuracy_x_robust + 1/(tf.reduce_sum(test_robust_mask)/(1.0*tf.cast(tf.size(test_robust_mask), tf.float32))))
    certified_utility = (1.0*tf.reduce_sum(test_robust_mask))/(1.0*tf.cast(tf.size(test_robust_mask), tf.float32))"""
    #############

    # craft adversarial samples from x for training
    dynamic_eps = tf.placeholder(tf.float32);
    emsemble_L = int(L/3)
    softmax_y = tf.nn.softmax(y_test)
    #c_x_adv = fgsm(x, softmax_y, eps=fgsm_eps, clip_min=0.0, clip_max=1.0)
    c_x_adv = fgsm(x, softmax_y, eps=(dynamic_eps)/10, clip_min=-1.0, clip_max=1.0) # for I-FGSM
    x_adv = tf.reshape(c_x_adv, [emsemble_L,image_size*image_size]);

    #====================== attack =========================
    #attack_switch = {'randfgsm':True, 'fgsm':True, 'ifgsm':True, 'deepfool':True, 'mim':True, 'spsa':False, 'cwl2':False, 'madry':True, 'stm':True}
    #attack_switch = {'fgsm':True, 'ifgsm':True, 'deepfool':True, 'mim':True, 'spsa':False, 'cwl2':False, 'madry':True, 'stm':True}
    attack_switch = {'fgsm':True, 'ifgsm':True, 'deepfool':False, 'mim':True, 'spsa':False, 'cwl2':False, 'madry':True, 'stm':False}
    #other possible attacks:
        # ElasticNetMethod
        # FastFeatureAdversaries
        # LBFGS
        # SaliencyMapMethod
        # VirtualAdversarialMethod

    # y_test = logits (before softmax)
    # softmax_y_test = preds (probs, after softmax)
    softmax_y_test = tf.nn.softmax(y_test)

    # create saver
    saver = tf.train.Saver(tf.all_variables())
    
    sess.run(W_conv1.initializer)
    _gamma = sess.run(gamma)
    _gamma_x = Delta2/L
    epsilon2_update = epsilon2/(1.0 + 1.0/_gamma + 1/_gamma_x)
    print(epsilon2_update/_gamma + epsilon2_update/_gamma_x)
    print(epsilon2_update)
    _sensitivityW = sess.run(sensitivity)
    delta_h = _sensitivityW*(14**2)
    dp_mult = (Delta2/(L*epsilon2_update))/(delta_r / dp_epsilon) + (2*Delta2/(L*epsilon2_update))/(delta_h / dp_epsilon)
    #############################
    
    iterativeStep = 100
    
    # load the most recent models
    _global_step = 0
    ckpt = tf.train.get_checkpoint_state(os.getcwd() + './tmp/train')
    if ckpt and ckpt.model_checkpoint_path:
        print(ckpt.model_checkpoint_path);
        saver.restore(sess, ckpt.model_checkpoint_path)
        _global_step = int(ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1])
    else:
        print('No checkpoint file found')

    start_time = time.time();

    # adv pretrain model (Auto encoder layer)
    cost = tf.reduce_sum(Enc_Layer2.cost);
    logfile.write("pretrain: \n")
    
    # define cleverhans abstract models for using cleverhans attacks
    ch_model_logits = CustomCallableModelWrapper(callable_fn=inference_test_input, output_layer='logits', hk=hk, params=params, image_size=image_size, adv_noise = adv_noise)
    ch_model_probs = CustomCallableModelWrapper(callable_fn=inference_test_input_probs, output_layer='probs', hk=hk, params=params, image_size=image_size, adv_noise = adv_noise)

    # rand+fgsm
    # if attack_switch['randfgsm']:
    #     randfgsm_obj = FastGradientMethod(model=ch_model_probs, sess=sess)
    #     x_randfgsm_t = (fgsm_eps - rand_alpha) * randfgsm_obj.generate(x=x, eps=fgsm_eps, clip_min=-1.0, clip_max=1.0)
    #     x_rand_t = rand_alpha * tf.sign(tf.random_normal(shape=tf.shape(x), mean=0.0, stddev=1.0))

    # define each attack method's tensor
    mu_alpha = tf.placeholder(tf.float32, [1]);
    attack_tensor_dict = {}
    # FastGradientMethod
    if attack_switch['fgsm']:
        print('creating attack tensor of FastGradientMethod')
        fgsm_obj = FastGradientMethod(model=ch_model_probs, sess=sess)
        #x_adv_test_fgsm = fgsm_obj.generate(x=x, eps=fgsm_eps, clip_min=-1.0, clip_max=1.0, ord=2) # testing now
        x_adv_test_fgsm = fgsm_obj.generate(x=x, eps=mu_alpha, clip_min=-1.0, clip_max=1.0) # testing now
        attack_tensor_dict['fgsm'] = x_adv_test_fgsm

    # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init)
    # default: eps_iter=0.05, nb_iter=10
    if attack_switch['ifgsm']:
        print('creating attack tensor of BasicIterativeMethod')
        ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess)
        #x_adv_test_ifgsm = ifgsm_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2)
        x_adv_test_ifgsm = ifgsm_obj.generate(x=x, eps=mu_alpha, eps_iter=mu_alpha/iterativeStep, nb_iter=iterativeStep, clip_min=-1.0, clip_max=1.0)
        attack_tensor_dict['ifgsm'] = x_adv_test_ifgsm

    # Deepfool
    if attack_switch['deepfool']:
        print('creating attack tensor of DeepFool')
        deepfool_obj = DeepFool(model=ch_model_logits, sess=sess)
        #x_adv_test_deepfool = deepfool_obj.generate(x=x, nb_candidate=10, overshoot=0.02, max_iter=50, nb_classes=10, clip_min=-1.0, clip_max=1.0, ord=2)
        x_adv_test_deepfool = deepfool_obj.generate(x=x, nb_candidate=10, overshoot=0.02, max_iter=50, nb_classes=10, clip_min=-1.0, clip_max=1.0)
        attack_tensor_dict['deepfool'] = x_adv_test_deepfool

    # MomentumIterativeMethod
    # default: eps_iter=0.06, nb_iter=10
    if attack_switch['mim']:
        print('creating attack tensor of MomentumIterativeMethod')
        mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess)
        #x_adv_test_mim = mim_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, decay_factor=1.0, clip_min=-1.0, clip_max=1.0, ord=2)
        x_adv_test_mim = mim_obj.generate(x=x, eps=mu_alpha, eps_iter=mu_alpha/iterativeStep, nb_iter=iterativeStep, decay_factor=1.0, clip_min=-1.0, clip_max=1.0)
        attack_tensor_dict['mim'] = x_adv_test_mim

    # SPSA
    # note here the epsilon is the infinity norm instead of precent of perturb
    # Maybe exclude this method first, since it seems to have some constrain about the data value range
    if attack_switch['spsa']:
        print('creating attack tensor of SPSA')
        spsa_obj = SPSA(model=ch_model_logits, sess=sess)
        #x_adv_test_spsa = spsa_obj.generate(x=x, epsilon=fgsm_eps, num_steps=10, is_targeted=False, early_stop_loss_threshold=None, learning_rate=0.01, delta=0.01,spsa_samples=1000, spsa_iters=1, ord=2)
        x_adv_test_spsa = spsa_obj.generate(x=x, epsilon=fgsm_eps, num_steps=10, is_targeted=False, early_stop_loss_threshold=None, learning_rate=0.01, delta=0.01,spsa_samples=1000, spsa_iters=1)
        attack_tensor_dict['spsa'] = x_adv_test_spsa

    # CarliniWagnerL2
    # confidence=0 is fron their paper
    # it is said to be slow, maybe exclude first
    if attack_switch['cwl2']:
        print('creating attack tensor of CarliniWagnerL2')
        cwl2_obj = CarliniWagnerL2(model=ch_model_logits, sess=sess)
        #x_adv_test_cwl2 = cwl2_obj.generate(x=x, confidence=0, batch_size=1000, learning_rate=0.005, binary_search_steps=5, max_iterations=500, abort_early=True, initial_const=0.01, clip_min=-1.0, clip_max=1.0, ord=2)
        x_adv_test_cwl2 = cwl2_obj.generate(x=x, confidence=0, batch_size=1000, learning_rate=0.005, binary_search_steps=5, max_iterations=500, abort_early=True, initial_const=0.01, clip_min=-1.0, clip_max=1.0)
        attack_tensor_dict['cwl2'] = x_adv_test_cwl2

    # MadryEtAl (Projected Grdient with random init, same as rand+fgsm)
    # default: eps_iter=0.01, nb_iter=40
    if attack_switch['madry']:
        print('creating attack tensor of MadryEtAl')
        madry_obj = MadryEtAl(model=ch_model_probs, sess=sess)
        #x_adv_test_madry = madry_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2)
        x_adv_test_madry = madry_obj.generate(x=x, eps=mu_alpha, eps_iter=fgsm_eps/iterativeStep, nb_iter=iterativeStep, clip_min=-1.0, clip_max=1.0)
        attack_tensor_dict['madry'] = x_adv_test_madry

    # SpatialTransformationMethod
    # the params are pretty different from on the paper
    # so I use default
    # exclude since there's bug
    if attack_switch['stm']:
        print('creating attack tensor of SpatialTransformationMethod')
        stm_obj = SpatialTransformationMethod(model=ch_model_probs, sess=sess)
        #x_adv_test_stm = stm_obj.generate(x=x, batch_size=1000, n_samples=None, dx_min=-0.1, dx_max=0.1, n_dxs=2, dy_min=-0.1, dy_max=0.1, n_dys=2, angle_min=-30, angle_max=30, n_angles=6, ord=2)
        x_adv_test_stm = stm_obj.generate(x=x, batch_size=1000, n_samples=None, dx_min=-0.1, dx_max=0.1, n_dxs=2, dy_min=-0.1, dy_max=0.1, n_dys=2, angle_min=-30, angle_max=30, n_angles=6)
        attack_tensor_dict['stm'] = x_adv_test_stm
    #====================== attack =========================
    
    sess.run(tf.initialize_all_variables());

    ##perturb h for training
    perturbFM_h = np.random.laplace(0.0, 2*Delta2/(epsilon2_update*L), 14*14*32)
    perturbFM_h = np.reshape(perturbFM_h, [-1, 14, 14, 32]);

    ##perturb h for testing
    perturbFM_h_test = np.random.laplace(0.0, 0, 14*14*32)
    perturbFM_h_test = np.reshape(perturbFM_h_test, [-1, 14, 14, 32]);

    '''for i in range(_global_step, _global_step + pre_T):
        d_eps = random.random();
        
        batch = mnist.train.next_batch(L); #Get a random batch.
        adv_images = sess.run(x_adv, feed_dict = {x:batch[0], y_:batch[1], FM_h: perturbFM_h_test, dynamic_eps: d_eps})
        for iter in range(0, 9):
            adv_images = sess.run(x_adv, feed_dict = {x:adv_images, y_:batch[1], FM_h: perturbFM_h_test, dynamic_eps: d_eps})
        """batch = mnist.train.next_batch(emsemble_L)
        adv_images_mim = sess.run(attack_tensor_dict['mim'], feed_dict = {x:batch[0], y_: batch[1]})
        batch = mnist.train.next_batch(emsemble_L)
        adv_images_madry = sess.run(attack_tensor_dict['mim'], feed_dict = {x:batch[0], y_: batch[1]})
        train_images = np.append(np.append(adv_images, adv_images_mim, axis = 0),adv_images_madry, axis = 0)"""

        batch_2 = mnist.train.next_batch(L);
        pretrain_step.run(feed_dict={adv_x: np.append(adv_images, batch_2[0], axis = 0), adv_noise: AdvLnoise, FM_h: perturbFM_h});
        if i % int(5*step_for_epoch) == 0:
            cost_value = sess.run(cost, feed_dict={adv_x:mnist.test.images, adv_noise: AdvLnoise_test, FM_h: perturbFM_h_test})/(test_size*32)
            logfile.write("step \t %d \t %g \n"%(i, cost_value))
            print(cost_value)

    pre_train_finish_time = time.time()
    print('pre_train finished in: ' + parse_time(pre_train_finish_time - start_time))'''

    # train and test model with adv samples
    max_benign_acc = -1;
    max_robust_benign_acc = -1
    #max_adv_acc = -1;

    test_size = len(mnist.test.images)
    AdvLnoise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L);
    AdvLnoise_test = generateIdLMNoise(image_size, 0, epsilon2_update, test_size);

    Lnoise_empty = generateIdLMNoise(image_size, 0, epsilon2_update, L);
    BenignLNoise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L);
    last_eval_time = -1
    accum_time = 0
    accum_epoch = 0
    max_adv_acc_dict = {}
    max_robust_adv_acc_dict = {}
    #max_robust_adv_utility_dict = {}
    for atk in attack_switch.keys():
        if atk not in max_adv_acc_dict:
            max_adv_acc_dict[atk] = -1
            max_robust_adv_acc_dict[atk] = -1

    for i in range(_global_step, _global_step + T):
        # this batch is for generating adv samples
        batch = mnist.train.next_batch(emsemble_L); #Get a random batch.
        y_adv_batch = batch[1]
        #The number of epochs we print out the result. Print out the result every 5 epochs.
        if i % int(10*step_for_epoch) == 0 and i > int(10*step_for_epoch):
            cost_value = sess.run(cost, feed_dict={adv_x:mnist.test.images, adv_noise: AdvLnoise_test, FM_h: perturbFM_h_test})/(test_size*32)
            print(cost_value)
            
            if last_eval_time < 0:
                last_eval_time = time.time()
            #===================benign samples=====================
            predictions_form_argmax = np.zeros([test_size, 10])
            #test_bach = mnist.test.next_batch(test_size)
            softmax_predictions = softmax_y_conv.eval(feed_dict={x: mnist.test.images, noise: BenignLNoise, FM_h: perturbFM_h})
            argmax_predictions = np.argmax(softmax_predictions, axis=1)
            for n_draws in range(0, 1):
                _BenignLNoise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L);
                _perturbFM_h = np.random.laplace(0.0, 2*Delta2/(epsilon2_update*L), 14*14*32)
                _perturbFM_h = np.reshape(_perturbFM_h, [-1, 14, 14, 32]);
                for j in range(test_size):
                    pred = argmax_predictions[j]
                    predictions_form_argmax[j, pred] += 1;
                softmax_predictions = softmax_y_conv.eval(feed_dict={x: mnist.test.images, noise: (BenignLNoise + _BenignLNoise/2), FM_h: (perturbFM_h + _perturbFM_h/2)})
                argmax_predictions = np.argmax(softmax_predictions, axis=1)
            final_predictions = predictions_form_argmax;
            is_correct = []
            is_robust = []
            for j in range(test_size):
                is_correct.append(np.argmax(mnist.test.labels[j]) == np.argmax(final_predictions[j]))
                robustness_from_argmax = robustness.robustness_size_argmax(counts=predictions_form_argmax[j],eta=0.05,dp_attack_size=fgsm_eps, dp_epsilon=1.0, dp_delta=0.05, dp_mechanism='laplace') / (dp_mult)
                is_robust.append(robustness_from_argmax >= fgsm_eps)
            acc = np.sum(is_correct)*1.0/test_size
            robust_acc = np.sum([a and b for a,b in zip(is_robust, is_correct)])*1.0/np.sum(is_robust)
            robust_utility = np.sum(is_robust)*1.0/test_size
            max_benign_acc = max(max_benign_acc, acc)
            max_robust_benign_acc = max(max_robust_benign_acc, robust_acc*robust_utility)
            log_str = "step: {:.1f}\t epsilon: {:.1f}\t benign: {:.4f} \t {:.4f} \t {:.4f} \t {:.4f} \t".format(i, total_eps, acc, robust_acc, robust_utility, robust_acc*robust_utility)
            #===================adv samples=====================
            #log_str = "step: {:.1f}\t epsilon: {:.1f}\t".format(i, total_eps)
            """adv_images_dict = {}
            for atk in attack_switch.keys():
                if attack_switch[atk]:
                    adv_images_dict[atk] = sess.run(attack_tensor_dict[atk], feed_dict = {x:mnist.test.images, y_:mnist.test.labels})
            print("Done with the generating of Adversarial samples")"""
            #===================adv samples=====================
            adv_acc_dict = {}
            robust_adv_acc_dict = {}
            robust_adv_utility_dict = {}
            for atk in attack_switch.keys():
                if atk not in adv_acc_dict:
                    adv_acc_dict[atk] = -1
                    robust_adv_acc_dict[atk] = -1
                    robust_adv_utility_dict[atk] = -1
                if attack_switch[atk]:
                    adv_images_dict = sess.run(attack_tensor_dict[atk], feed_dict = {x:mnist.test.images, y_: mnist.test.labels, adv_noise: AdvLnoise_test, mu_alpha:[fgsm_eps]})
                    ### PixelDP Robustness ###
                    predictions_form_argmax = np.zeros([test_size, 10])
                    softmax_predictions = softmax_y_conv.eval(feed_dict={x: adv_images_dict, noise: BenignLNoise, FM_h: perturbFM_h})
                    argmax_predictions = np.argmax(softmax_predictions, axis=1)
                    for n_draws in range(0, 2000):
                        if n_draws % 1000 == 0:
                            print(n_draws)
                        _BenignLNoise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L);
                        _perturbFM_h = np.random.laplace(0.0, 2*Delta2/(epsilon2_update*L), 14*14*32)
                        _perturbFM_h = np.reshape(_perturbFM_h, [-1, 14, 14, 32]);
                        for j in range(test_size):
                            pred = argmax_predictions[j]
                            predictions_form_argmax[j, pred] += 1;
                        softmax_predictions = softmax_y_conv.eval(feed_dict={x: adv_images_dict, noise: BenignLNoise, FM_h: (perturbFM_h + _perturbFM_h/2)}) * softmax_y_conv.eval(feed_dict={x: adv_images_dict, noise: (BenignLNoise + _BenignLNoise/2), FM_h: perturbFM_h})
                        #softmax_predictions = softmax_y_conv.eval(feed_dict={x: adv_images_dict, noise: BenignLNoise, FM_h: (_perturbFM_h)}) * softmax_y_conv.eval(feed_dict={x: adv_images_dict, noise: (_BenignLNoise), FM_h: perturbFM_h})
                        argmax_predictions = np.argmax(softmax_predictions, axis=1)
                    final_predictions = predictions_form_argmax;
                    is_correct = []
                    is_robust = []
                    for j in range(test_size):
                        is_correct.append(np.argmax(mnist.test.labels[j]) == np.argmax(final_predictions[j]))
                        robustness_from_argmax = robustness.robustness_size_argmax(counts=predictions_form_argmax[j],eta=0.05,dp_attack_size=fgsm_eps, dp_epsilon=1.0, dp_delta=0.05, dp_mechanism='laplace') / (dp_mult)
                        is_robust.append(robustness_from_argmax >= fgsm_eps)
                    adv_acc_dict[atk] = np.sum(is_correct)*1.0/test_size
                    robust_adv_acc_dict[atk] = np.sum([a and b for a,b in zip(is_robust, is_correct)])*1.0/np.sum(is_robust)
                    robust_adv_utility_dict[atk] = np.sum(is_robust)*1.0/test_size
                    ##############################
            for atk in attack_switch.keys():
                if attack_switch[atk]:
                    # added robust prediction
                    log_str += " {}: {:.4f} {:.4f} {:.4f} {:.4f}".format(atk, adv_acc_dict[atk], robust_adv_acc_dict[atk], robust_adv_utility_dict[atk], robust_adv_acc_dict[atk]*robust_adv_utility_dict[atk])
                    max_adv_acc_dict[atk] = max(max_adv_acc_dict[atk], adv_acc_dict[atk])
                    max_robust_adv_acc_dict[atk] = max(max_robust_adv_acc_dict[atk], robust_adv_acc_dict[atk]*robust_adv_utility_dict[atk])
            print(log_str)
            logfile.write(log_str + '\n')

            # logfile.write("step \t %d \t %g \t %g \n"%(i, benign_acc, adv_acc))
            # print("step \t %d \t %g \t %g"%(i, benign_acc, adv_acc));

            # estimate end time
            """if i > 0 and i % int(10*step_for_epoch) == 0:
                current_time_interval = time.time() - last_eval_time
                last_eval_time = time.time()
                print('during last eval interval, {} epoch takes {}'.format(10, parse_time(current_time_interval)))
                accum_time += current_time_interval
                accum_epoch += 10
                estimate_time = ((_global_step + T - i) / step_for_epoch) * (accum_time / accum_epoch)
                print('estimate finish in: {}'.format(parse_time(estimate_time)))"""

            #print("step \t %d \t adversarial test accuracy \t %g"%(i, accuracy_x.eval(feed_dict={x: adv_images, y_: mnist.test.labels, noise: Lnoise_empty})));
            """checkpoint_path = os.path.join(os.getcwd() + '/tmp/train', 'model.ckpt')
            saver.save(sess, checkpoint_path, global_step=i);"""

        d_eps = random.random();
        y_adv = batch[1]
        adv_images = sess.run(attack_tensor_dict['ifgsm'], feed_dict = {x:batch[0], y_: batch[1], adv_noise: AdvLnoise, mu_alpha:[d_eps]})
        """for iter in range(0, 9):
            adv_images = sess.run(x_adv, feed_dict = {x:adv_images, y_:batch[1], FM_h: perturbFM_h_test, dynamic_eps: d_eps})"""
        batch = mnist.train.next_batch(emsemble_L)
        adv_images_mim = sess.run(attack_tensor_dict['mim'], feed_dict = {x:batch[0], y_: batch[1], adv_noise: AdvLnoise, mu_alpha:[d_eps]})
        y_adv = np.append(y_adv, batch[1], axis = 0)
        batch = mnist.train.next_batch(emsemble_L)
        adv_images_madry = sess.run(attack_tensor_dict['madry'], feed_dict = {x:batch[0], y_: batch[1], adv_noise: AdvLnoise, mu_alpha:[d_eps]})
        y_adv = np.append(y_adv, batch[1], axis = 0)
        train_images = np.append(np.append(adv_images, adv_images_mim, axis = 0),adv_images_madry, axis = 0)
        
        batch = mnist.train.next_batch(L); #Get a random batch.
        # train with benign and adv samples
        pretrain_step.run(feed_dict={adv_x: train_images, x: batch[0], adv_noise: AdvLnoise_test, noise: BenignLNoise, FM_h: perturbFM_h});
        train_step.run(feed_dict={x: batch[0], adv_x: train_images, y_: batch[1], adv_y_: y_adv, noise: BenignLNoise, adv_noise: AdvLnoise_test, FM_h: perturbFM_h});
    duration = time.time() - start_time;
    # print(parse_time(duration)); #print running time duration#

    max_acc_string = "max acc: benign: \t{:.4f} {:.4f}".format(max_benign_acc, max_robust_benign_acc)
    for atk in attack_switch.keys():
        if attack_switch[atk]:
            max_acc_string += " {}: \t{:.4f} {:.4f}".format(atk, max_adv_acc_dict[atk], max_robust_adv_acc_dict[atk])
    logfile.write(max_acc_string + '\n')
    logfile.write(str(duration) + '\n')
コード例 #17
0
def mnist_tutorial(train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   nb_epochs=6,
                   batch_size=128,
                   learning_rate=0.001,
                   clean_train=True,
                   testing=False,
                   backprop_through_attack=False,
                   nb_filters=64,
                   num_threads=None):
    """
    MNIST cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param testing: if true, complete an AccuracyReport for unit tests
                    to verify that performance is adequate
    :param backprop_through_attack: If True, backprop through adversarial
                                    example construction process during
                                    adversarial training.
    :param clean_train: if true, train on clean examples
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Use label smoothing
    assert Y_train.shape[1] == 10
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    dp_params = {'clip_min': 0., 'clip_max': 1.}
    rng = np.random.RandomState([2017, 8, 30])

    if clean_train:
        model = make_basic_cnn(nb_filters=nb_filters)
        preds = model.get_probs(x)

        init = tf.group(tf.global_variables_initializer(),
                        tf.local_variables_initializer())
        sess.run(init)

        def evaluate():
            # Evaluate the accuracy of the MNIST model on legitimate test
            # examples
            eval_params = {'batch_size': batch_size}
            acc = model_eval(sess,
                             x,
                             y,
                             preds,
                             X_test,
                             Y_test,
                             args=eval_params)
            report.clean_train_clean_eval = acc
            assert X_test.shape[0] == test_end - test_start, X_test.shape
            print('Test accuracy on legitimate examples: %0.4f' % acc)

        model_train(sess,
                    x,
                    y,
                    preds,
                    X_train,
                    Y_train,
                    evaluate=evaluate,
                    args=train_params,
                    rng=rng)

        s = []
        for i in range(0, len(X_test), 1):
            pred = sess.run(preds, {x: X_test[i:i + 1]})
            print(pred)
            print(Y_test[i:i + 1])
            s.append(np.sort(pred)[0, -1] - np.sort(pred)[0, -2])
        #Draw a histogram
        def draw_hist(myList, Title, Xlabel, Ylabel):
            plt.hist(myList,
                     np.arange(0, 1, 0.01),
                     normed=True,
                     stacked=True,
                     facecolor='blue')
            plt.xlabel(Xlabel)
            plt.ylabel(Ylabel)
            plt.title(Title)
            plt.show()

        draw_hist(myList=s,
                  Title='legitimate',
                  Xlabel='difference between max and second largest',
                  Ylabel='Probability')

        # Calculate training error
        if testing:
            eval_params = {'batch_size': batch_size}
            acc = model_eval(sess,
                             x,
                             y,
                             preds,
                             X_train,
                             Y_train,
                             args=eval_params)
            report.train_clean_train_clean_eval = acc

        # Initialize the deepfool attack object and
        # graph

        deepfool = DeepFool(model, back='tf', sess=sess)
        adv_x = deepfool.generate(x, **dp_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on adversarial examples
        eval_par = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Test accuracy on adversarial examples: %0.4f\n' % acc)
        '''
        s = []
        for i in range(0,len(X_test),1):
            pred = sess.run(preds_adv, {x: X_test[i:i+1]})
            print(pred)
            print(Y_test[i:i+1])
            s.append(np.sort(pred)[0,-1]-np.sort(pred)[0,-2])
        
        #Draw a histogram
        def draw_hist(myList,Title,Xlabel,Ylabel):
            plt.hist(myList,np.arange(0,1,0.01),normed=True,stacked=True,facecolor='red')
            plt.xlabel(Xlabel)       
            plt.ylabel(Ylabel)
            plt.title(Title)
            plt.show()
        draw_hist(myList=s,Title='adversarial',Xlabel='difference between max and second largest',
               Ylabel='Probability')
        '''

        report.clean_train_adv_eval = acc

        # Calculate training error
        if testing:
            eval_par = {'batch_size': batch_size}
            acc = model_eval(sess,
                             x,
                             y,
                             preds_adv,
                             X_train,
                             Y_train,
                             args=eval_par)
            report.train_clean_train_adv_eval = acc
        return report
コード例 #18
0
ファイル: adv_generate.py プロジェクト: wangjksjtu/PeerNets
def adv_generate(nb_epochs=25,
                 batch_size=128,
                 learning_rate=0.001,
                 clean_train=True,
                 testing=False,
                 nb_filters=64,
                 num_threads=None,
                 data='cifar',
                 adv_attack='fgsm',
                 save_dir='data'):
    """
    MNIST cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param testing: if true, complete an AccuracyReport for unit tests
                    to verify that performance is adequate
    :param backprop_through_attack: If True, backprop through adversarial
                                    example construction process during
                                    adversarial training.
    :param clean_train: if true, train on clean examples
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    # set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    config = tf.ConfigProto(**config_args)
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)

    if data == "mnist":
        # Get MNIST test data
        X_train, Y_train, X_test, Y_test = data_mnist(train_start=0,
                                                      train_end=60000,
                                                      test_start=0,
                                                      test_end=10000)
    else:
        X_train, Y_train, X_test, Y_test = data_cifar10()

    # print (Y_test.shape)
    '''
    for i in range(Y_test.shape[0]):
        img = np.squeeze(X_test[i,:,:,:])
        imsave(os.path.join("benign", str(i) + ".jpg"), img)

    for i in range(Y_test.shape[0]):
        img = np.squeeze(X_test[i,:,:,:])
        benign_path = "benign_" + str(np.argmax(Y_test[i,:], axis=0))
        if not os.path.exists(benign_path):
        	os.makedirs(benign_path)
        imsave(os.path.join(benign_path, str(i) + ".jpg"), img)
    '''
    # Use label smoothing
    assert Y_train.shape[1] == 10
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    if data == 'mnist':
        x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    else:
        x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # model_path = "models/mnist"
    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    rng = np.random.RandomState([2018, 7, 18])

    if clean_train:
        if data == 'mnist':
            model = build_model(0.01, 1e-6)
        else:
            model = build_model_cifar(0.01, 1e-6)

        preds = model(x)

        def evaluate():
            # Evaluate the accuracy of the MNIST model on legitimate test
            # examples
            eval_params = {'batch_size': batch_size}
            acc = model_eval(sess,
                             x,
                             y,
                             preds,
                             X_test,
                             Y_test,
                             args=eval_params)
            report.clean_train_clean_eval = acc
            assert X_test.shape[0] == 10000, X_test.shape
            print('Test accuracy on legitimate examples: %0.4f' % acc)

        model_train(sess,
                    x,
                    y,
                    preds,
                    X_train,
                    Y_train,
                    evaluate=evaluate,
                    args=train_params,
                    rng=rng)

        # Calculate training error
        if testing:
            eval_params = {'batch_size': batch_size}
            acc = model_eval(sess,
                             x,
                             y,
                             preds,
                             X_train,
                             Y_train,
                             args=eval_params)
            report.train_clean_train_clean_eval = acc

        if adv_attack == "FGSM":
            # Initialize the attack object and graph
            # FGSM
            print "FGSM ATTACK..."
            fgsm_params = {'eps': 0.1, 'clip_min': 0., 'clip_max': 1.}
            fgsm = FastGradientMethod(model, sess=sess)
            adv_x = fgsm.generate(x, **fgsm_params)
            preds_adv = model(adv_x)
        elif adv_attack == "CWL2":
            # CWL2
            print "CWL2 ATTACK..."
            cwl2_params = {'batch_size': 8}
            cwl2 = CarliniWagnerL2(model, sess=sess)
            adv_x = cwl2.generate(x, **cwl2_params)
            preds_adv = model(adv_x)
        elif adv_attack == "JSMA":
            # JSMA
            print "JSMA ATTACK..."
            jsma = SaliencyMapMethod(model, back='tf', sess=sess)
            jsma_params = {
                'theta': 1.,
                'gamma': 0.1,
                'clip_min': 0.,
                'clip_max': 1.
            }
            adv_x = jsma.generate(x, **jsma_params)
            preds_adv = model(adv_x)
        elif adv_attack == "DeepFool":
            # DeepFool
            print "DeepFool ATTACK..."
            deepfool = DeepFool(model, sess=sess)
            deepfool_params = {
                'nb_candidate': 10,
                'overshoot': 0.02,
                'max_iter': 50,
                'clip_min': 0.0,
                'clip_max': 1.0
            }
            adv_x = deepfool.generate(x, **deepfool_params)
            preds_adv = model(adv_x)
        elif adv_attack == "LBFGS":
            # LBFGS
            print "LBFGS ATTACK..."
            lbfgs_params = {'y_target': y, 'batch_size': 100}
            lbfgs = LBFGS(model, sess=sess)
            adv_x = lbfgs.generate(x, **lbfgs_params)
            preds_adv = model(adv_x)

        # Evaluate the accuracy of the MNIST model on adversarial examples
        eval_par = {'batch_size': batch_size}
        adv_imgs = []
        adv_imgs_test = []

        if not adv_attack == "LBFGS":
            for i in range(5000):
                adv_imgs_train, _ = sess.run(
                    [adv_x, preds_adv],
                    feed_dict={x: X_train[i * 10:(i + 1) * 10]})
                adv_imgs.append(adv_imgs_train)
            adv_imgs = np.vstack(adv_imgs)
            print(adv_imgs.shape)
            for i in range(1000):
                adv_imgs_tmp, _ = sess.run(
                    [adv_x, preds_adv],
                    feed_dict={x: X_test[i * 10:(i + 1) * 10]})
                adv_imgs_test.append(adv_imgs_tmp)
            adv_imgs_test = np.vstack(adv_imgs_test)
        else:
            for i in range(500):
                target = np_utils.to_categorical(
                    (np.argmax(Y_train[i * 100:(i + 1) * 100], axis=1) + 1) %
                    10, 10)
                adv_imgs_train, _ = sess.run([adv_x, preds_adv],
                                             feed_dict={
                                                 x: X_train[i * 100:(i + 1) *
                                                            100],
                                                 y: target
                                             })
                print('train image: %s' % str(i))
                adv_imgs.append(adv_imgs_train)
            print(adv_imgs.shape)

            for i in range(100):
                target = np_utils.to_categorical(
                    (np.argmax(Y_train[i * 100:(i + 1) * 100], axis=1) + 1) %
                    10, 10)
                adv_imgs_train, _ = sess.run([adv_x, preds_adv],
                                             feed_dict={
                                                 x: X_train[i * 100:(i + 1) *
                                                            100],
                                                 y: target
                                             })
                adv_imgs_test.append(adv_imgs_tmp)
                print('test image: %s' % str(i))
            adv_imgs_test = np.vstack(adv_imgs_test)
        '''
        for i in range(6):
            target = np_utils.to_categorical((np.argmax(Y_train[i*10000: (i+1)*10000, ...], axis = 1) + 1) % 10, 10)
            adv_imgs_train, adv_labels_train = sess.run([adv_x, preds_adv], feed_dict={x: X_train[i*10000: (i+1)*10000,...],
                                                                                       y: target})
        for i in range(60000):
            target = np_utils.to_categorical((np.argmax(Y_train[i:i+1, ...], axis = 1) + 1) % 10, 10)
            adv_imgs_train = sess.run([adv_x], feed_dict={x: X_train[i:i+1,...], y: target})
            print (len(adv_imgs_train), adv_imgs_train[0].shape, adv_imgs_train[1])
        '''
        label_truth_train = np.argmax(Y_train, axis=1)
        label_truth_test = np.argmax(Y_test, axis=1)

        save_dir = os.path.join(
            save_dir, os.path.join(adv_attack))  #, "eps_" + str(eps)))
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        print(adv_imgs.shape, adv_imgs_test.shape)
        provider.save_h5(adv_imgs, label_truth_train,
                         os.path.join(save_dir, "train_adv.h5"))
        provider.save_h5(adv_imgs_test, label_truth_test,
                         os.path.join(save_dir, "test_adv.h5"))
        # utils.save_h5(X_train, label_truth_train, "FGSM/train_benign.h5")
        # utils.save_h5(X_test, label_truth_test, "FGSM/test_benign.h5")
        '''
        for i in range(adv_labels.shape[0]):
            img = np.squeeze(adv_imgs[i,:,:,:])
            imsave(os.path.join("adv", str(i) + ".jpg"), img)

        for i in range(adv_labels.shape[0]):
            img = np.squeeze(adv_imgs[i,:,:,:])
	    adv_path = "adv_" + str(np.argmax(adv_labels[i,:], axis=0))
	    if not os.path.exists(adv_path):
	        os.makedirs(adv_path)
	    imsave(os.path.join(adv_path, str(i) + ".jpg"), img)
        '''

        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Test accuracy on adversarial examples: %0.4f\n' % acc)
        report.clean_train_adv_eval = acc

        # Calculate training error
        if testing:
            eval_par = {'batch_size': batch_size}
            acc = model_eval(sess,
                             x,
                             y,
                             preds_adv,
                             X_train,
                             Y_train,
                             args=eval_par)
            report.train_clean_train_adv_eval = acc

    return report
コード例 #19
0
def DF(torch_model, dataset, eps_list, opt, c, h, w, clip_min, clip_max):

    if opt == 'evaluate':
        acclist = []
        for eps in eps_list:
            sess = tf.Session()
            x_op = tf.placeholder(tf.float32, shape=(
                None,
                c,
                h,
                w,
            ))
            # Convert pytorch model to a tf_model and wrap it in cleverhans
            tf_model_fn = convert_pytorch_model_to_tf(torch_model)
            cleverhans_model = CallableModelWrapper(tf_model_fn,
                                                    output_layer='logits')

            # Create an FGSM attack
            atk_op = DeepFool(cleverhans_model, sess=sess)
            atk_params = {'clip_min': clip_min, 'clip_max': clip_max}
            adv_x_op = atk_op.generate(x_op, **atk_params)
            adv_preds_op = tf_model_fn(adv_x_op)

            # Run an evaluation of our model against fgsm
            total = 0
            correct = 0
            for xs, ys in dataset:
                xs, ys = xs.to(device), ys.to(device)
                adv_preds = sess.run(adv_preds_op, feed_dict={x_op: xs})
                correct += (np.argmax(
                    adv_preds, axis=1) == ys.cpu().detach().numpy()).sum()
                total += dataset.batch_size

            acc = float(correct) / total
            print('Adv accuracy: {:.3f}'.format(acc * 100))
            acclist.append(acc)
        return acclist

    elif opt == 'generate':
        advpacklist = []
        for eps in eps_list:
            advlist = []
            sess = tf.Session()
            x_op = tf.placeholder(tf.float32, shape=(
                None,
                c,
                h,
                w,
            ))
            # Convert pytorch model to a tf_model and wrap it in cleverhans
            tf_model_fn = convert_pytorch_model_to_tf(torch_model)
            cleverhans_model = CallableModelWrapper(tf_model_fn,
                                                    output_layer='logits')

            # Create an FGSM attack
            atk_op = DeepFool(cleverhans_model, sess=sess)
            atk_params = {'clip_min': clip_min, 'clip_max': clip_max}
            adv_x_op = atk_op.generate(x_op, **atk_params)

            # Run an evaluation of our model against fgsm
            for xs, ys in dataset:
                xs, ys = xs.to(device), ys.to(device)
                adv = torch.from_numpy(sess.run(adv_x_op, feed_dict={x_op:
                                                                     xs}))
                if ys == np.argmax(torch_model(xs).data.cpu().numpy()):
                    pred = np.argmax(torch_model(adv).data.cpu().numpy())
                    if ys != pred:
                        adv = adv.numpy()
                        advlist.append(adv)
            print(len(advlist))
            advpacklist.append(advlist)
        return advpacklist