예제 #1
0
def main(_):
    # Images for inception classifier are normalized to be in [-1, 1] interval,
    # eps is a difference between pixels so it should be in [0, 2] interval.
    # Renormalizing epsilon from [0, 255] to [0, 2].
    eps = 2.0 * FLAGS.max_epsilon / 255.0
    batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3]
    num_classes = 1001

    tf.logging.set_verbosity(tf.logging.INFO)

    with tf.Graph().as_default():
        # Prepare graph
        x_input = tf.placeholder(tf.float32, shape=batch_shape)

        model = InceptionModel(num_classes)

        salmap = SaliencyMapMethod(model)

        noisy_images = x_input + 0.05 * tf.sign(tf.random_normal(batch_shape))
        x_adv = salmap.generate(noisy_images, clip_min=-1., clip_max=1.)
        #x_adv = x_input + tf.clip_by_value(x_adv - x_input, -eps, eps)

        # Run computation
        saver = tf.train.Saver(slim.get_model_variables())
        session_creator = tf.train.ChiefSessionCreator(
            scaffold=tf.train.Scaffold(saver=saver),
            checkpoint_filename_with_path=FLAGS.checkpoint_path,
            master=FLAGS.master)

        with tf.train.MonitoredSession(
                session_creator=session_creator) as sess:
            for filenames, images in load_images(FLAGS.input_dir, batch_shape):
                adv_images = sess.run(x_adv, feed_dict={x_input: images})
                save_images(adv_images, filenames, FLAGS.output_dir)
예제 #2
0
def main(_):
  # Images for inception classifier are normalized to be in [-1, 1] interval,
  # eps is a difference between pixels so it should be in [0, 2] interval.
  # Renormalizing epsilon from [0, 255] to [0, 2].
  eps = 2.0 * FLAGS.max_epsilon / 255.0
  batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3]
  num_classes = 1001
  BUILD_MODEL = True

  from cleverhans.attacks_tf import jacobian_graph, jsma_batch

  tf.logging.set_verbosity(tf.logging.INFO)

  with tf.Graph().as_default() as d_graph:
    # Prepare graph

    x_input = tf.placeholder(tf.float32, shape=batch_shape)
    model = InceptionModel(num_classes)

    preds = model(x_input)
    #grads = jacobian_graph(preds, x_input, num_classes)
    saver = tf.train.Saver(slim.get_model_variables())

    # Run computation
    with tf.Session() as sess:
      #print("Session is closed:",sess._is_closed())
      saver.restore(sess, FLAGS.checkpoint_path)

      salmap = SaliencyMapMethod(model, sess = sess)
      x_adv = salmap.generate(x_input, eps=eps, clip_min=-1., clip_max=1.)

      for filenames, images in load_images(FLAGS.input_dir, batch_shape):

        adv_images = sess.run(x_adv, feed_dict={x_input: images})
        save_images(adv_images, filenames, FLAGS.output_dir)
예제 #3
0
class SaliencyMapAttack(AdversarialAttack):
    def __init__(self,
                 model,
                 theta=1.0,
                 gamma=1.0,
                 clip_min=-1.0,
                 clip_max=1.0,
                 targeted=False,
                 symbolic_impl=True):
        super().__init__(model=model, clip_min=clip_min, clip_max=clip_max)
        self._targeted = targeted
        self._theta = theta
        self._gamma = gamma
        self._symbolic_impl = symbolic_impl

        with self.graph.as_default():
            self._method = SaliencyMapMethod(self._model,
                                             sess=self.session,
                                             theta=self._theta,
                                             gamma=self._gamma,
                                             nb_classes=self._n_classes,
                                             clip_min=self._clip_min,
                                             clip_max=self._clip_max,
                                             symbolic_impl=self._symbolic_impl)

    def attack_method(self, labels):
        if self._targeted:
            return self._method.generate(x=self._x_clean, y_target=labels)
        return self._method.generate(x=self._x_clean)
def saliency_map_method(model,
                        X,
                        Y,
                        theta,
                        gamma,
                        clip_min=None,
                        clip_max=None):
    nb_classes = Y.shape[1]
    X_adv = np.zeros_like(X)
    # Instantiate a SaliencyMapMethod attack object
    jsma = SaliencyMapMethod(model, back='tf', sess=sess)
    jsma_params = {
        'theta': theta,
        'gamma': gamma,
        'clip_min': clip_min,
        'clip_max': clip_max,
        'y_target': None
    }
    for i in tqdm(range(len(X))):
        # Get the sample
        sample = X[i:(i + 1)]
        # First, record the current class of the sample
        current_class = int(np.argmax(Y[i]))
        # Randomly choose a target class
        target_class = np.random.choice(
            other_classes(nb_classes, current_class))
        # This call runs the Jacobian-based saliency map approach
        one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
        one_hot_target[0, target_class] = 1
        jsma_params['y_target'] = one_hot_target
        X_adv[i] = jsma.generate_np(sample, **jsma_params)

    return X_adv
예제 #5
0
def generate_jsma_examples(sess, model, x, y, X, Y, attack_params, verbose,
                           attack_log_fpath):
    """
    Targeted attack, with target classes in Y.
    """
    Y_target = Y

    nb_classes = Y.shape[1]

    jsma = SaliencyMapMethod(KerasModelWrapper(model), back='tf', sess=sess)
    jsma_params = {'theta': 1., 'gamma': 0.1, 'clip_min': 0., 'clip_max': 1.}

    adv_x_list = []

    with click.progressbar(range(0, len(X)),
                           file=sys.stderr,
                           show_pos=True,
                           width=40,
                           bar_template='  [%(bar)s] JSMA Attacking %(info)s',
                           fill_char='>',
                           empty_char='-') as bar:
        # Loop over the samples we want to perturb into adversarial examples
        for sample_ind in bar:
            sample = X[sample_ind:(sample_ind + 1)]

            adv_x = jsma.generate_np(sample, **jsma_params)
            adv_x_list.append(adv_x)

    return np.vstack(adv_x_list)
def attack(model, x_input, input_img):
    wrap = KerasModelWrapper(model)
    jsma = SaliencyMapMethod(wrap, sess=sess)
    jsma_params = {
        'theta': 0.1,
        'gamma': 0.1,
        'clip_min': -1.,
        'clip_max': 1.,
        'y_target': None
    }
    adv_x = jsma.generate_np(input_img, **jsma_params)
    return adv_x
def _get_or_create_jsma(sess, x, classifier, in_x, save_file):
    if not os.path.exists(save_file):
        jsma = SaliencyMapMethod(classifier)
        adv = jsma.generate(x, clip_min=0., clip_max=1., gamma=FLAGS.gamma)
        jsma_adv = np.zeros(in_x.shape)
        for i in range(in_x.shape[0] // 10000):
            jsma_adv[i * 10000:(i + 1) * 10000] = adv.eval(
                feed_dict={x: in_x[i * 10000:(i + 1) * 10000]})
            print(f'done computing jsma axs for first {(i+1)*10000}')
        np.save(save_file, jsma_adv)
    else:
        return np.load(save_file)
예제 #8
0
def mnist_jsma_attack(sample, target, model, sess):
    jsma = SaliencyMapMethod(model, back='tf', sess=sess)
    jsma_params = {
        'theta': 1.,
        'gamma': 3,
        'clip_min': 0.,
        'clip_max': 1.,
        'y_target': None
    }
    jsma_params['y_target'] = target

    adv = jsma.generate_np(sample, **jsma_params)
    return adv
예제 #9
0
    def setUp(self):
        super(TestSaliencyMapMethod, self).setUp()

        self.sess = tf.Session()
        self.sess.as_default()
        self.model = DummyModel()
        self.attack = SaliencyMapMethod(self.model, sess=self.sess)

        # initialize model
        with tf.name_scope('dummy_model'):
            self.model(tf.placeholder(tf.float32, shape=(None, 1000)))
        self.sess.run(tf.global_variables_initializer())

        self.attack = SaliencyMapMethod(self.model, sess=self.sess)
예제 #10
0
    def setUp(self):
        super(TestSaliencyMapMethod, self).setUp()

        self.sess = tf.Session()
        self.sess.as_default()
        self.model = DummyModel()
        self.attack = SaliencyMapMethod(self.model, sess=self.sess)

        # initialize model
        with tf.name_scope('dummy_model'):
            self.model(tf.placeholder(tf.float32, shape=(None, 1000)))
        self.sess.run(tf.global_variables_initializer())

        self.attack = SaliencyMapMethod(self.model, sess=self.sess)
예제 #11
0
class TestSaliencyMapMethod(CleverHansTest):
    def setUp(self):
        super(TestSaliencyMapMethod, self).setUp()
        import tensorflow as tf
        import tensorflow.contrib.slim as slim

        def dummy_model(x):
            net = slim.fully_connected(x, 60)
            return slim.fully_connected(net, 10, activation_fn=None)

        self.sess = tf.Session()
        self.sess.as_default()
        self.model = tf.make_template('dummy_model', dummy_model)
        self.attack = SaliencyMapMethod(self.model, sess=self.sess)

        # initialize model
        with tf.name_scope('dummy_model'):
            self.model(tf.placeholder(tf.float32, shape=(None, 1000)))
        self.sess.run(tf.global_variables_initializer())

        self.attack = SaliencyMapMethod(self.model, sess=self.sess)

    def test_generate_np_targeted_gives_adversarial_example(self):
        x_val = np.random.rand(10, 1000)
        x_val = np.array(x_val, dtype=np.float32)

        feed_labs = np.zeros((10, 10))
        feed_labs[np.arange(10), np.random.randint(0, 9, 10)] = 1
        x_adv = self.attack.generate_np(x_val,
                                        clip_min=-5., clip_max=5.,
                                        y_target=feed_labs)
        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)

        worked = np.mean(np.argmax(feed_labs, axis=1) == new_labs)
        self.assertTrue(worked > .9)
예제 #12
0
class TestSaliencyMapMethod(CleverHansTest):
    def setUp(self):
        super(TestSaliencyMapMethod, self).setUp()

        self.sess = tf.Session()
        self.sess.as_default()
        self.model = DummyModel()
        self.attack = SaliencyMapMethod(self.model, sess=self.sess)

        # initialize model
        with tf.name_scope('dummy_model'):
            self.model(tf.placeholder(tf.float32, shape=(None, 1000)))
        self.sess.run(tf.global_variables_initializer())

        self.attack = SaliencyMapMethod(self.model, sess=self.sess)

    def test_generate_np_targeted_gives_adversarial_example(self):
        x_val = np.random.rand(10, 1000)
        x_val = np.array(x_val, dtype=np.float32)

        feed_labs = np.zeros((10, 10))
        feed_labs[np.arange(10), np.random.randint(0, 9, 10)] = 1
        x_adv = self.attack.generate_np(x_val,
                                        clip_min=-5., clip_max=5.,
                                        y_target=feed_labs)
        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)

        worked = np.mean(np.argmax(feed_labs, axis=1) == new_labs)
        self.assertTrue(worked > .9)
예제 #13
0
class TestSaliencyMapMethod(CleverHansTest):
    def setUp(self):
        super(TestSaliencyMapMethod, self).setUp()

        self.sess = tf.Session()
        self.sess.as_default()
        self.model = DummyModel()
        self.attack = SaliencyMapMethod(self.model, sess=self.sess)

        # initialize model
        with tf.name_scope('dummy_model'):
            self.model(tf.placeholder(tf.float32, shape=(None, 1000)))
        self.sess.run(tf.global_variables_initializer())

        self.attack = SaliencyMapMethod(self.model, sess=self.sess)

    def test_generate_np_targeted_gives_adversarial_example(self):
        x_val = np.random.rand(10, 1000)
        x_val = np.array(x_val, dtype=np.float32)

        feed_labs = np.zeros((10, 10))
        feed_labs[np.arange(10), np.random.randint(0, 9, 10)] = 1
        x_adv = self.attack.generate_np(x_val,
                                        clip_min=-5.,
                                        clip_max=5.,
                                        y_target=feed_labs)
        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)

        worked = np.mean(np.argmax(feed_labs, axis=1) == new_labs)
        self.assertTrue(worked > .9)
예제 #14
0
    def setUp(self):
        super(TestSaliencyMapMethod, self).setUp()
        import tensorflow as tf
        import tensorflow.contrib.slim as slim

        def dummy_model(x):
            net = slim.fully_connected(x, 60)
            return slim.fully_connected(net, 10, activation_fn=None)

        self.sess = tf.Session()
        self.sess.as_default()
        self.model = tf.make_template('dummy_model', dummy_model)
        self.attack = SaliencyMapMethod(self.model, sess=self.sess)

        # initialize model
        with tf.name_scope('dummy_model'):
            self.model(tf.placeholder(tf.float32, shape=(None, 1000)))
        self.sess.run(tf.global_variables_initializer())

        self.attack = SaliencyMapMethod(self.model, sess=self.sess)
예제 #15
0
    def adv_jsma_train(self, epoch=5000):
        print('training {} for epoch={}'.format(self.MODEL_NAME, epoch))
        params = {
            'theta': 1.,
            'gamma': 0.1,
            'clip_min': 0.,
            'clip_max': 1.,
            'y_target': None
        }
        x, y = self.cnn.make_inputs()
        probs = self.cnn.make_model(x)

        self.cnn.start_session()
        self.cnn.init_session_and_restore()

        jsma = SaliencyMapMethod(self.cnn, sess=self.cnn.sess)
        adv_x = jsma.generate(x, **params)
        adv_probs = self.cnn.make_model(adv_x)

        self.cnn.train(probs, x, y, epoch, self.dataset, adv_preds=adv_probs)
        # cnn.test(gtsrb)
        self.cnn.end_session()
예제 #16
0
def jsma_craft(surrogate_model, source_samples, target_info):
    import tensorflow as tf
    import numpy as np
    from dataset import Data
    from cleverhans.attacks import SaliencyMapMethod

    with tf.variable_scope(surrogate_model.scope):
        sess = tf.get_default_session()

        jsma = SaliencyMapMethod(surrogate_model.tf(), back='tf', sess=sess)

        one_hot_target = np.zeros((1, 10), dtype=np.float32)
        one_hot_target[0, target_info['target_class']] = 1
        jsma_params = {
            'theta': 1.,
            'gamma': 0.1,
            'clip_min': 0.,
            'clip_max': 1.,
            'y_target': one_hot_target
        }

        # Loop over the samples we want to perturb into adversarial examples

        adv_xs = []
        for sample_ind in range(len(source_samples)):
            sample = source_samples.getx(sample_ind)

            adv_xs.append(jsma.generate_np(sample, **jsma_params))

            # TODO remove break
            if sample_ind == 2:
                break

        adv_ys = np.concatenate([one_hot_target] * len(adv_xs))

        adv_xs_d = Data(np.concatenate(adv_xs), adv_ys)

        return source_samples, adv_xs_d
예제 #17
0
    def __init__(self,
                 model,
                 theta=1.0,
                 gamma=1.0,
                 clip_min=-1.0,
                 clip_max=1.0,
                 targeted=False,
                 symbolic_impl=True):
        super().__init__(model=model, clip_min=clip_min, clip_max=clip_max)
        self._targeted = targeted
        self._theta = theta
        self._gamma = gamma
        self._symbolic_impl = symbolic_impl

        with self.graph.as_default():
            self._method = SaliencyMapMethod(self._model,
                                             sess=self.session,
                                             theta=self._theta,
                                             gamma=self._gamma,
                                             nb_classes=self._n_classes,
                                             clip_min=self._clip_min,
                                             clip_max=self._clip_max,
                                             symbolic_impl=self._symbolic_impl)
예제 #18
0
def generate(sess, model, data_feeder, source, target, adv_dump_dir, nb_samples, perturbation_step, max_perturbation):
    wrap = KerasModelWrapper(model.model)
    jsma = SaliencyMapMethod(wrap, sess=sess)
    jsma_params = {
        'gamma': max_perturbation,
        'theta': perturbation_step,
        'clip_min': 0.0,
        'clip_max': 1.0,
        'y_target': data_feeder.get_labels(target, nb_samples)
    }

    craft_data = data_feeder.get_evasion_craft_data(source_class=source, total_count=nb_samples)
    adv_data = jsma.generate_np(craft_data, **jsma_params)

    # Commit data
    adv_writer = AdversarialWriterEvasion(source_class=source,
                                          target_class=target,
                                          attack_params={
                                              'step': perturbation_step,
                                              'max_perturbation':max_perturbation},
                                          adversarial_data_path=adv_dump_dir)
    adv_writer.batch_put(craft_data, adv_data)
    adv_writer.commit()
예제 #19
0
def get_adversarial_attack_and_params(attack_name, wrap, sess):
    params = None
    stop_gradient = False

    if attack_name == "fgsm":
        attack = FastGradientMethod(wrap, sess=sess)
        params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
        stop_gradient = True
    if attack_name == "deepfool":
        attack = DeepFool(wrap, sess=sess)
    if attack_name == "lbfgs":
        attack = LBFGS(wrap, sess=sess)
    if attack_name == "saliency":
        attack = SaliencyMapMethod(wrap, sess=sess)
    if attack_name == "bim":
        attack = BasicIterativeMethod(wrap, sess=sess)

    return attack, params, stop_gradient
예제 #20
0
def main(type="Resnet", dataset="CIFAR10", attack_type="FGM"):

    size = 256
    eval_params = {'batch_size': 128}

    ############################################# Prepare the Data #####################################################

    if dataset == 'CIFAR10':
        (_, _), (x_test, y_test) = prepare_CIFAR10()
        num_classes = 10
        input_dim = 32
    elif dataset == 'CIFAR100':
        (_, _), (x_test, y_test) = prepare_CIFAR100()
        num_classes = 100
        input_dim = 32
    else:
        (_, _), (x_test, y_test) = prepare_SVHN("./Data/")
        num_classes = 10
        input_dim = 32

    x_test = x_test / 255.
    y_test = keras.utils.to_categorical(y_test, num_classes)

    ############################################# Prepare the Data #####################################################


    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:

        # prepare the placeholders
        x = tf.placeholder(tf.float32, [None, input_dim, input_dim, 3])
        y = tf.placeholder(tf.float32, [None, num_classes])

        input_output = []
        def modelBuilder(x, num_classes, dataset, type, sess, input_output):

            if len(input_output) == 0:

                reuse = False

                # Model/Graph
                if type == 'End2End':
                    _, tf_model = \
                        prepare_GBP_End2End(num_classes,
                                            inputT=x, sess=sess,
                                            checkpoint_dir='./{}_{}/'.format(dataset, type), reuse=reuse)
                else:

                    _, tf_model = \
                        prepare_Resnet(num_classes,
                                       inputT=x, sess=sess,
                                       checkpoint_dir='./{}_{}/'.format(dataset, type), reuse=reuse)

                input_output.append(x)
                input_output.append(tf_model.logits)

            else:

                reuse = True

                # Model/Graph
                if type == 'End2End':
                    _, tf_model = \
                        prepare_GBP_End2End(num_classes, inputT=x, reuse=reuse)
                else:
                    _, tf_model = \
                        prepare_Resnet(num_classes, inputT=x, reuse=reuse)

                input_output.append(x)
                input_output.append(tf_model.logits)


            return tf_model.logits

        # create an attackable model for the cleverhans
        model = CallableModelWrapper(lambda placeholder: modelBuilder(placeholder, num_classes, dataset, type, sess, input_output), 'logits')

        # TODO: check the configurations
        if attack_type == "FGM": # pass
            attack = FastGradientMethod(model, back='tf', sess=sess)
            params = {
                'eps' : 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "CWL2": # pass
            attack = CarliniWagnerL2(model, back='tf', sess=sess)
            params = {
                'confidence': 0.9,
                'batch_size': 128,
                'learning_rate': 0.005,
            }
        elif attack_type == "DF": # pass
            attack = DeepFool(model, back='tf', sess=sess)
            params = {
            }
        elif attack_type == "ENM": # configurations checked, quickly tested
            attack = ElasticNetMethod(model, back='tf', sess=sess)
            params = {
                'confidence': 0.9,
                'batch_size': 128,
                'learning_rate': 0.005,
            }
        elif attack_type == "FFA": # configuration checked
            attack = FastFeatureAdversaries(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'eps_iter': 0.005,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "LBFGS":
            attack = LBFGS(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "MEA":
            attack = MadryEtAl(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "MIM":
            attack = MomentumIterativeMethod(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "SMM":
            attack = SaliencyMapMethod(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "SPSA":
            attack = SPSA(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "VATM":
            attack = vatm(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "VAM":
            attack = VirtualAdversarialMethod(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        else:
            raise Exception("I don't recognize {} this attack type. I will use FGM instead.".format(attack_type))

        # tf operation
        adv_x = attack.generate(x, **params)

        # generate the adversarial examples
        adv_vals = sess.run(adv_x, feed_dict={x: x_test[:size]})

        # notice that "adv_vals" may contain NANs because of the failure of the attack
        # also the input may not be perturbed at all because of the failure of the attack
        to_delete = []
        for idx, adv in enumerate(adv_vals):
            # for nan
            if np.isnan(adv).any():
                to_delete.append(idx)
            # for no perturbation
            if np.array_equiv(adv, x_test[idx]):
                to_delete.append(idx)

        # cleanings
        adv_vals_cleaned = np.delete(adv_vals, to_delete, axis=0)
        ori_cleaned = np.delete(x_test[:size], to_delete, axis=0)
        y_cleaned = np.delete(y_test[:size], to_delete, axis=0)

        if len(adv_vals_cleaned) == 0:
            print("No adversarial example is generated!")
            return

        print("{} out of {} adversarial examples are generated.".format(len(adv_vals_cleaned), size))

        print("The average L_inf distortion is {}".format(
            np.mean([np.max(np.abs(adv - ori_cleaned[idx])) for idx, adv in enumerate(adv_vals_cleaned)])))

        # TODO: visualize the adv_vals

        accuracy = model_eval(sess, input_output[0], y, tf.nn.softmax(input_output[1]), x_test[:size], y_test[:size],
                              args=eval_params)
        print('Test accuracy on normal examples: %0.4f' % accuracy)

        accuracy = model_eval(sess, input_output[0], y, tf.nn.softmax(input_output[1]), adv_vals_cleaned, y_cleaned,
                              args=eval_params)
        print('Test accuracy on adversarial examples: %0.4f' % accuracy)
예제 #21
0
def JSMA_FGSM_BIM(train_start=0,
                  train_end=60000,
                  test_start=0,
                  test_end=10000,
                  nb_epochs=6,
                  batch_size=128,
                  learning_rate=0.001,
                  clean_train=True,
                  testing=False,
                  backprop_through_attack=False,
                  nb_filters=64):
    """
    MNIST cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param testing: if true, complete an AccuracyReport for unit tests
                    to verify that performance is adequate
    :param backprop_through_attack: If True, backprop through adversarial
                                    example construction process during
                                    adversarial training.
    :param clean_train: if true, train on clean examples
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    sess = tf.Session()

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)
    source_samples = batch_size
    # Use label smoothing
    # Hopefully this doesn't screw up JSMA...
    assert Y_train.shape[1] == 10
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    model_path = "models/mnist"
    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_par = {'batch_size': batch_size}
    rng = np.random.RandomState([2017, 8, 30])

    if clean_train:
        model = make_basic_cnn(nb_filters=nb_filters)
        preds = model.get_probs(x)

        def evaluate():
            # Evaluate the accuracy of the MNIST model on legitimate test
            # examples
            eval_params = {'batch_size': batch_size}
            acc = model_eval(sess,
                             x,
                             y,
                             preds,
                             X_test,
                             Y_test,
                             args=eval_params)
            report.clean_train_clean_eval = acc
            assert X_test.shape[0] == test_end - test_start, X_test.shape
            print('Test accuracy on legitimate examples: %0.4f' % acc)

        model_train(sess,
                    x,
                    y,
                    preds,
                    X_train,
                    Y_train,
                    evaluate=evaluate,
                    args=train_params,
                    rng=rng)
        print("#####Starting attacks on clean model#####")
        #################################################################
        #Clean test against JSMA
        jsma_params = {
            'theta': 1.,
            'gamma': 0.1,
            'clip_min': 0.,
            'clip_max': 1.,
            'y_target': None
        }

        jsma = SaliencyMapMethod(model, back='tf', sess=sess)
        adv_x = jsma.generate(x, **jsma_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on JSMA adversarial examples: %0.4f' % acc)
        ################################################################
        #Clean test against FGSM
        fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}

        fgsm = FastGradientMethod(model, sess=sess)
        adv_x = fgsm.generate(x, **fgsm_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on FGSM adversarial examples: %0.4f' % acc)
        ################################################################
        #Clean test against BIM
        bim_params = {
            'eps': 0.3,
            'eps_iter': 0.01,
            'nb_iter': 100,
            'clip_min': 0.,
            'clip_max': 1.
        }
        bim = BasicIterativeMethod(model, sess=sess)
        adv_x = bim.generate(x, **bim_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on BIM adversarial examples: %0.4f' % acc)
        ################################################################
        #Clean test against EN
        en_params = {
            'binary_search_steps': 1,
            #'y': None,
            'max_iterations': 100,
            'learning_rate': 0.1,
            'batch_size': source_samples,
            'initial_const': 10
        }
        en = ElasticNetMethod(model, back='tf', sess=sess)
        adv_x = en.generate(x, **en_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on EN adversarial examples: %0.4f' % acc)
        ################################################################
        #Clean test against DF
        deepfool_params = {
            'nb_candidate': 10,
            'overshoot': 0.02,
            'max_iter': 50,
            'clip_min': 0.,
            'clip_max': 1.
        }
        deepfool = DeepFool(model, sess=sess)
        adv_x = deepfool.generate(x, **deepfool_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on DF adversarial examples: %0.4f' % acc)
        ################################################################
        #Clean test against VAT
        vat_params = {
            'eps': 2.0,
            'num_iterations': 1,
            'xi': 1e-6,
            'clip_min': 0.,
            'clip_max': 1.
        }
        vat = VirtualAdversarialMethod(model, sess=sess)
        adv_x = vat.generate(x, **vat_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on VAT adversarial examples: %0.4f\n' % acc)
        ################################################################
        print("Repeating the process, using adversarial training\n")
    # Redefine TF model graph
    model_2 = make_basic_cnn(nb_filters=nb_filters)
    preds_2 = model_2(x)
    #################################################################
    #Adversarial test against JSMA
    jsma_params = {
        'theta': 1.,
        'gamma': 0.1,
        'clip_min': 0.,
        'clip_max': 1.,
        'y_target': None
    }

    jsma = SaliencyMapMethod(model, back='tf', sess=sess)
    adv_x = jsma.generate(x, **jsma_params)
    preds_adv_jsma = model.get_probs(adv_x)
    ################################################################
    #Adversarial test against FGSM
    fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}

    fgsm = FastGradientMethod(model, sess=sess)
    adv_x = fgsm.generate(x, **fgsm_params)
    preds_adv_fgsm = model.get_probs(adv_x)
    ################################################################
    #Adversarial test against BIM
    bim_params = {
        'eps': 0.3,
        'eps_iter': 0.01,
        'nb_iter': 100,
        'clip_min': 0.,
        'clip_max': 1.
    }
    bim = BasicIterativeMethod(model, sess=sess)
    adv_x = bim.generate(x, **bim_params)
    preds_adv_bim = model.get_probs(adv_x)
    ################################################################
    #Adversarial test against EN
    en_params = {
        'binary_search_steps': 5,
        #'y': None,
        'max_iterations': 100,
        'learning_rate': 0.1,
        'batch_size': source_samples,
        'initial_const': 10
    }
    en = ElasticNetMethod(model, back='tf', sess=sess)
    adv_x = en.generate(x, **en_params)
    preds_adv_en = model.get_probs(adv_x)
    ################################################################
    #Adversarial test against DF
    deepfool_params = {
        'nb_candidate': 10,
        'overshoot': 0.02,
        'max_iter': 200,
        'clip_min': 0.,
        'clip_max': 1.
    }
    deepfool = DeepFool(model, sess=sess)
    adv_x = deepfool.generate(x, **deepfool_params)
    preds_adv_df = model.get_probs(adv_x)
    ################################################################
    #Adversarial test against VAT
    vat_params = {
        'eps': 2.0,
        'num_iterations': 1,
        'xi': 1e-6,
        'clip_min': 0.,
        'clip_max': 1.
    }
    vat = VirtualAdversarialMethod(model, sess=sess)
    adv_x = vat.generate(x, **vat_params)
    preds_adv_vat = model.get_probs(adv_x)
    ################################################################
    print("#####Evaluate trained model#####")

    def evaluate_2():
        # Evaluate the accuracy of the MNIST model on JSMA adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_jsma,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on JSMA adversarial examples: %0.4f' % acc)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_fgsm,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on FGSM adversarial examples: %0.4f' % acc)

        # Evaluate the accuracy of the MNIST model on BIM adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_bim,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on BIM adversarial examples: %0.4f' % acc)

        # Evaluate the accuracy of the MNIST model on EN adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_en,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on EN adversarial examples: %0.4f' % acc)

        # Evaluate the accuracy of the MNIST model on DF adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_df,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on DF adversarial examples: %0.4f' % acc)

        # Evaluate the accuracy of the MNIST model on VAT adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_vat,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on VAT adversarial examples: %0.4f\n' % acc)

    preds_2_adv = [
        preds_adv_jsma, preds_adv_fgsm, preds_adv_bim
        # ,preds_adv_en
        # ,preds_adv_df
    ]

    model_train(sess,
                x,
                y,
                preds_2,
                X_train,
                Y_train,
                predictions_adv=preds_2_adv,
                evaluate=evaluate_2,
                args=train_params,
                rng=rng)
예제 #22
0
def attack_classifier(sess, x, model, x_test, attack_method="fgsm", target=None, batch_size=128):

    if attack_method == "fgsm":
        from cleverhans.attacks import FastGradientMethod
        params = {'eps': 8/255,
                  'clip_min': 0.,
                  'clip_max': 1.
                 }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = FastGradientMethod(model, sess=sess)

    elif attack_method == "basic_iterative":
        from cleverhans.attacks import BasicIterativeMethod
        params = {'eps': 8./255,
                  'eps_iter': 1./255,
                  'nb_iter': 10,
                  'clip_min': 0.,
                  'clip_max': 1.,
                  'ord': np.inf
                 }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = BasicIterativeMethod(model,sess = sess)

    elif attack_method == "momentum_iterative":
        from cleverhans.attacks import MomentumIterativeMethod
        params = {'eps':8/255,
                  'eps_iter':1/255,
                  'nb_iter': 10,
                  'clip_min': 0.,
                  'clip_max': 1.
                 }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = MomentumIterativeMethod(model,sess = sess)

    elif attack_method == "saliency":
        from cleverhans.attacks import SaliencyMapMethod
        params = {'theta':8/255,
                  'gamma':0.1,
                  'clip_min': 0.,
                  'clip_max': 1.
                 }
        assert target is None
        method = SaliencyMapMethod(model,sess = sess)

    elif attack_method == "virtual":
        from cleverhans.attacks import VirtualAdversarialMethod
        params = {'eps':8/255,
                  'num_iterations':10,
                  'xi' :1e-6,
                  'clip_min': 0.,
                  'clip_max': 1.
                 }
        assert target is None
        method = VirtualAdversarialMethod(model,sess = sess)

    elif attack_method == "cw":
        from cleverhans.attacks import CarliniWagnerL2
        params = {
            "confidence":0,
            "batch_size":128,
            "learning_rate":1e-4,
            "binary_search_steps":10,
            "max_iterations":1000,
            "abort_early": True,
            "initial_const":1e-2,
            "clip_min":0,
            "clip_max":1
        }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = CarliniWagnerL2(model,sess = sess)

    elif attack_method == "elastic_net":
        from cleverhans.attacks import ElasticNetMethod
        params = {
            "fista": "FISTA",
            "beta": 0.1,
            "decision_rule":"EN",
            "confidence":0,
            "batch_size":128,
            "learning_rate":1e-4,
            "binary_search_steps":10,
            "max_iterations":1000,
            "abort_early": True,
            "initial_const":1e-2,
            "clip_min":0,
            "clip_max":1
        }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = ElasticNetMethod(model,sess = sess)

    elif attack_method == "deepfool":
        from cleverhans.attacks import DeepFool
        params = {
            "nb_candidate":10,
            "overshoot":1e-3,
            "max_iter":100,
            "nb_classes":10,
            "clip_min":0,
            "clip_max":1
        }
        assert target is None
        method = DeepFool(model,sess = sess)

    elif attack_method == "lbfgs":
        from cleverhans.attacks import LBFGS
        params = {
            'batch_size':128,
            "binary_search_steps":10,
            "max_iterations":1000,
            "initial_const":1e-2,
            'clip_min': 0.,
            'clip_max': 1.
        }
        assert target is not None
        params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = LBFGS(model,sess = sess)

    elif attack_method == "madry":
        from cleverhans.attacks import MadryEtAl
        params = {'eps':8/255,
                  'eps_iter':1/255,
                  'nb_iter':10,
                  'ord':np.inf,
                  'clip_min': 0.,
                  'clip_max': 1.
                 }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = MadryEtAl(model, sess = sess)

    elif attack_method == "SPSA":
        from cleverhans.attacks import SPSA
        params = {
            'epsilon':1/255,
            'num_steps':10,
            'is_targeted':False,
            'early_stop_loss_threshold':None,
            'learning_rate':0.01,
            'delta':0.01,
            'batch_size':128,
            'spsa_iters':1,
            'is_debug':False
        }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
            params["is_targeted"] = True
        method = SPSA(model, sess = sess)

    else:
        raise ValueError("Can not recognize this attack method: %s" % attack_method)

    adv_x = method.generate(x, **params)
    num_batch = x_test.shape[0] // batch_size
    adv_imgs = []
    for i in range(num_batch):
        x_feed = x_test[i*batch_size:(i+1)*batch_size]
        #y_feed = y_test[i*batch_size:(i+1)*batch_size]

        adv_img = sess.run(adv_x, feed_dict={x: x_feed})
        adv_imgs.append(adv_img)

    adv_imgs = np.concatenate(adv_imgs, axis=0)
    return adv_imgs
예제 #23
0
perturbations = np.zeros((1, source_samples), dtype=float)
grads = jacobian_graph(predictions , X_placeholder, 1)

X_adv = np.zeros((source_samples, X_test.shape[1]))

for sample_ind in range(0, source_samples):
    # We want to find an  adversarial  example  for  each  possible  target  class
    # (i.e. all  classes  that  differ  from  the  label  given  in the  dataset)
    current_class = int(np.argmax(Y_test[sample_ind]))
    
    # Target the benign class
    for target in [0]:
        if (current_class == 0):
            break
        
        # This call runs the Jacobian-based saliency map approac
        adv_x , res , percent_perturb = SaliencyMapMethod(sess, X_placeholder, predictions , grads,
                                             X_test[sample_ind: (sample_ind+1)],
                                             target , theta=1, gamma =0.1,
                                             increase=True ,
                                             clip_min=0, clip_max=1)
        
        X_adv[sample_ind] = adv_x
        results[target , sample_ind] = res
        perturbations[target , sample_ind] = percent_perturb


# ============== Evaluation of the model with adversarial instances ==============

print("Performance when using adversarial testing instances")
mlp_model_eval(X_adv, Y_test, history, 2)
예제 #24
0
def main():
    sess = tf.Session()
    train_size, test_size = 55000, 10000
    batch_size = 100
    lr = 0.05
    epochs = 100
    steps = epochs * int(train_size / batch_size)

    global_step = tf.Variable(0, name='global_step', trainable=False)

    x = tf.placeholder(tf.float32, [None, 784])  # input for real images
    x_adv = tf.placeholder(tf.float32, [None, 784])
    y_ = tf.placeholder(tf.float32, [None, 10])  # groundtruth class label
    y_target = tf.placeholder(tf.float32, [None, 10])
    noise = tf.placeholder(tf.float32, [None, 100])

    tx = transformer(x, noise, 3)
    yx_2, z_norm = classifier_x(x)
    yx = classifier(x)
    ytx = classifier(tx)
    ytx_2, z_norm2 = classifier_x(tx)
    y_x_adv = classifier(x_adv)

    x_fgsm = attack.fgsm(x, yx_2, eps=0.1, clip_min=0, clip_max=1)
    y_x_fgsm = classifier(x_fgsm)

    jsma = SaliencyMapMethod(classifier, back='tf', sess=sess)
    one_hot_target = np.zeros((100, 10), dtype=np.float32)
    one_hot_target[:, 1] = 1
    jsma_params = {
        'theta': 1.,
        'gamma': 0.1,
        'nb_classes': 10,
        'clip_min': 0.,
        'clip_max': 1.,
        'targets': yx,
        'y_val': one_hot_target
    }

    perturb = {}
    perturb['tx'] = tf.reduce_mean(tf.norm(tx - x, axis=1))
    perturb['fgsm'] = tf.reduce_mean(tf.norm(x_fgsm - x, axis=1))

    loss = {}
    loss['cx'] = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=yx_2))
    loss['ctx'] = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=ytx_2))

    loss['cttx'] = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=y_target, logits=ytx))

    loss['classifier'] = loss['cx']  #+ loss['ctx']
    #loss['transformer'] = loss['cttx']
    loss['transformer'] = -loss['ctx']

    all_vars = tf.trainable_variables()
    c_vars = [var for var in all_vars if 'classifier' in var.name]
    t_vars = [var for var in all_vars if 'transformer' in var.name]

    train_op = {}
    train_op['classifier'] = GradientDescentOptimizer(learning_rate = lr) \
        .minimize(loss['classifier'], var_list = c_vars, global_step = global_step)
    train_op['transformer'] = GradientDescentOptimizer(learning_rate = lr) \
        .minimize(loss['transformer'], var_list = t_vars, global_step = global_step)

    acc = {}
    acc['x'] = tf.reduce_mean(
        tf.cast(tf.equal(tf.argmax(yx, 1), tf.argmax(y_, 1)), tf.float32))
    acc['tx'] = tf.reduce_mean(
        tf.cast(tf.equal(tf.argmax(ytx, 1), tf.argmax(y_, 1)), tf.float32))
    acc['x_fgsm'] = tf.reduce_mean(
        tf.cast(tf.equal(tf.argmax(y_x_fgsm, 1), tf.argmax(y_, 1)),
                tf.float32))
    acc['x_adv'] = tf.reduce_mean(
        tf.cast(tf.equal(tf.argmax(y_x_adv, 1), tf.argmax(y_, 1)), tf.float32))

    mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
    x_adv_mnist_fsgm = np.load(os.path.join('data', 'x_fgsm_mnist.npy'))
    tf.set_random_seed(1024)

    sess.run(tf.global_variables_initializer())
    with sess.as_default():
        for t in range(1, steps + 1):
            batch = mnist.train.next_batch(batch_size)
            y_tar = [[1, 0, 0, 0, 0, 0, 0, 0, 0, 0] for i in range(batch_size)]
            y_tar = np.array(y_tar, dtype='float32')

            noise_sample = sample_Z(batch_size, 100)

            sess.run(train_op['classifier'],
                     feed_dict={
                         x: batch[0],
                         y_: batch[1],
                         noise: noise_sample,
                         y_target: y_tar
                     })
            sess.run(train_op['transformer'],
                     feed_dict={
                         x: batch[0],
                         y_: batch[1],
                         noise: noise_sample,
                         y_target: y_tar
                     })

            if t % int(train_size / batch_size) == 0:
                epoch = int(t / int(train_size / batch_size))

                noise_sample2 = sample_Z(10000, 100)
                test_batch = mnist.test.next_batch(10000)
                print(test_batch[0].shape)
                var_list = [acc, z_norm]
                res = sess.run(var_list, feed_dict = {x: test_batch[0], y_: test_batch[1], noise : noise_sample2, \
                    x_adv: x_adv_mnist_fsgm, y_target: y_tar})
                print(epoch)
                for r in res:
                    print(r)
예제 #25
0
        def do_jsma():
            print('Crafting ' + str(source_samples) + ' * ' +
                  str(nb_classes - 1) + ' adversarial examples')

            # Keep track of success (adversarial example classified in target)
            results = np.zeros((nb_classes, source_samples), dtype='i')

            # Rate of perturbed features for each test set example and target class
            perturbations = np.zeros((nb_classes, source_samples), dtype='f')

            # Initialize our array for grid visualization
            grid_shape = (nb_classes, nb_classes, img_rows, img_cols, channels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            # Instantiate a SaliencyMapMethod attack object
            jsma = SaliencyMapMethod(model, back='tf', sess=sess)
            jsma_params = {
                'theta': 1.,
                'gamma': 0.1,
                'clip_min': 0.,
                'clip_max': 1.,
                'y_target': None
            }

            figure = None
            # Loop over the samples we want to perturb into adversarial examples
            for sample_ind in xrange(0, source_samples):
                print('--------------------------------------')
                print('Attacking input %i/%i' %
                      (sample_ind + 1, source_samples))
                sample = X_test[sample_ind:(sample_ind + 1)]

                # We want to find an adversarial example for each possible target class
                # (i.e. all classes that differ from the label given in the dataset)
                current_class = int(np.argmax(Y_test[sample_ind]))
                target_classes = other_classes(nb_classes, current_class)

                # For the grid visualization, keep original images along the diagonal
                grid_viz_data[current_class,
                              current_class, :, :, :] = np.reshape(
                                  sample, (img_rows, img_cols, channels))

                # Loop over all target classes
                for target in target_classes:
                    print('Generating adv. example for target class %i' %
                          target)

                    # This call runs the Jacobian-based saliency map approach
                    one_hot_target = np.zeros((1, nb_classes),
                                              dtype=np.float32)
                    one_hot_target[0, target] = 1
                    jsma_params['y_target'] = one_hot_target
                    adv_x = jsma.generate_np(sample, **jsma_params)

                    # Check if success was achieved
                    res = int(model_argmax(sess, x, preds, adv_x) == target)

                    # Computer number of modified features
                    adv_x_reshape = adv_x.reshape(-1)
                    test_in_reshape = X_test[sample_ind].reshape(-1)
                    nb_changed = np.where(
                        adv_x_reshape != test_in_reshape)[0].shape[0]
                    percent_perturb = float(nb_changed) / adv_x.reshape(
                        -1).shape[0]

                    # Display the original and adversarial images side-by-side
                    if FLAGS.viz_enabled:
                        figure = pair_visual(
                            np.reshape(sample, (img_rows, img_cols)),
                            np.reshape(adv_x, (img_rows, img_cols)), figure)

                    # Add our adversarial example to our grid data
                    grid_viz_data[target, current_class, :, :, :] = np.reshape(
                        adv_x, (img_rows, img_cols, channels))

                    # Update the arrays for later analysis
                    results[target, sample_ind] = res
                    perturbations[target, sample_ind] = percent_perturb

            print('--------------------------------------')

            # Compute the number of adversarial examples that were successfully found
            nb_targets_tried = ((nb_classes - 1) * source_samples)
            succ_rate = float(np.sum(results)) / nb_targets_tried
            print('Avg. rate of successful adv. examples {0:.4f}'.format(
                succ_rate))
            report.clean_train_adv_eval = 1. - succ_rate

            # Compute the average distortion introduced by the algorithm
            percent_perturbed = np.mean(perturbations)
            print('Avg. rate of perturbed features {0:.4f}'.format(
                percent_perturbed))

            # Compute the average distortion introduced for successful samples only
            percent_perturb_succ = np.mean(perturbations * (results == 1))
            print('Avg. rate of perturbed features for successful '
                  'adversarial examples {0:.4f}'.format(percent_perturb_succ))
            if FLAGS.viz_enabled:
                import matplotlib.pyplot as plt
                plt.close(figure)
                _ = grid_visual(grid_viz_data)

            return report
def mnist_tutorial_jsma(train_start=0, train_end=5500, test_start=0,
                        test_end=1000, nb_epochs=8,
                        batch_size=100, nb_classes=10,
                        nb_filters=64,
                        learning_rate=0.001):
    """
    MNIST tutorial for the Jacobian-based saliency map approach (JSMA)
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param learning_rate: learning rate for training
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph
    model = make_basic_cnn()
    preds = model(x)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    # sess.run(tf.global_variables_initializer())
    rng = np.random.RandomState([2017, 8, 30])

    print("x_train shape: ", X_train.shape)
    print("y_train shape: ", Y_train.shape)

    # do not log
    model_train(sess, x, y, preds, X_train, Y_train, args=train_params,verbose=False,
                rng=rng)

    f_out_clean = open("Clean_jsma_elastic_against5.log", "w")

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params)
    assert X_test.shape[0] == test_end - test_start, X_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    f_out_clean.write('Test accuracy on legitimate test examples: ' + str(accuracy) + '\n')


    # Clean test against JSMA
    jsma_params = {'theta': 1., 'gamma': 0.1,
                   'clip_min': 0., 'clip_max': 1.,
                   'y_target': None}

    jsma = SaliencyMapMethod(model, back='tf', sess=sess)
    adv_x_jsma = jsma.generate(x, **jsma_params)
    preds_adv_jsma = model.get_probs(adv_x_jsma)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_jsma, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on JSMA adversarial examples: %0.4f' % acc)
    f_out_clean.write('Clean test accuracy on JSMA adversarial examples: ' + str(acc) + '\n')

    ################################################################
    # Clean test against FGSM
    fgsm_params = {'eps': 0.3,
                   'clip_min': 0.,
                   'clip_max': 1.}

    fgsm = FastGradientMethod(model, sess=sess)
    adv_x_fgsm = fgsm.generate(x, **fgsm_params)
    preds_adv_fgsm = model.get_probs(adv_x_fgsm)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_fgsm, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on FGSM adversarial examples: %0.4f' % acc)
    f_out_clean.write('Clean test accuracy on FGSM adversarial examples: ' + str(acc) + '\n')


    ################################################################
    # Clean test against BIM
    bim_params = {'eps': 0.3,
                  'eps_iter': 0.01,
                  'nb_iter': 100,
                  'clip_min': 0.,
                  'clip_max': 1.}
    bim = BasicIterativeMethod(model, sess=sess)
    adv_x_bim = bim.generate(x, **bim_params)
    preds_adv_bim = model.get_probs(adv_x_bim)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_bim, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on BIM adversarial examples: %0.4f' % acc)
    f_out_clean.write('Clean test accuracy on BIM adversarial examples: ' + str(acc) + '\n')

    ################################################################
    # Clean test against EN
    en_params = {'binary_search_steps': 1,
                 # 'y': None,
                 'max_iterations': 100,
                 'learning_rate': 0.1,
                 'batch_size': batch_size,
                 'initial_const': 10}
    en = ElasticNetMethod(model, back='tf', sess=sess)
    adv_x_en = en.generate(x, **en_params)
    preds_adv_en = model.get_probs(adv_x_en)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_en, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on EN adversarial examples: %0.4f' % acc)
    f_out_clean.write('Clean test accuracy on EN adversarial examples: ' + str(acc) + '\n')
    ################################################################
    # Clean test against DF
    deepfool_params = {'nb_candidate': 10,
                       'overshoot': 0.02,
                       'max_iter': 50,
                       'clip_min': 0.,
                       'clip_max': 1.}
    deepfool = DeepFool(model, sess=sess)
    adv_x_df = deepfool.generate(x, **deepfool_params)
    preds_adv_df = model.get_probs(adv_x_df)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_df, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on DF adversarial examples: %0.4f' % acc)
    f_out_clean.write('Clean test accuracy on DF adversarial examples: ' + str(acc) + '\n')

    ################################################################
    # Clean test against VAT
    vat_params = {'eps': 2.0,
                  'num_iterations': 1,
                  'xi': 1e-6,
                  'clip_min': 0.,
                  'clip_max': 1.}
    vat = VirtualAdversarialMethod(model, sess=sess)
    adv_x_vat = vat.generate(x, **vat_params)
    preds_adv_vat = model.get_probs(adv_x_vat)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_vat, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on VAT adversarial examples: %0.4f\n' % acc)
    f_out_clean.write('Clean test accuracy on VAT adversarial examples: ' + str(acc) + '\n')

    f_out_clean.close()

    ###########################################################################
    # Craft adversarial examples using the Jacobian-based saliency map approach
    ###########################################################################
    print('Crafting ' + str(X_train.shape[0]) + ' * ' + str(nb_classes-1) +
          ' adversarial examples')


    model_2 = make_basic_cnn()
    preds_2 = model(x)

    # need this for constructing the array
    sess.run(tf.global_variables_initializer())

    # run this again
    # sess.run(tf.global_variables_initializer())

    # 1. Instantiate a SaliencyMapMethod attack object
    jsma = SaliencyMapMethod(model_2, back='tf', sess=sess)
    jsma_params = {'theta': 1., 'gamma': 0.1,
                   'clip_min': 0., 'clip_max': 1.,
                   'y_target': None}
    adv_random = jsma.generate(x, **jsma_params)
    preds_adv_random = model_2.get_probs(adv_random)

    # 2. Instantiate FGSM attack
    fgsm_params = {'eps': 0.3,
                   'clip_min': 0.,
                   'clip_max': 1.}
    fgsm = FastGradientMethod(model_2, sess=sess)
    adv_x_fgsm = fgsm.generate(x, **fgsm_params)
    preds_adv_fgsm = model_2.get_probs(adv_x_fgsm)


    # 3. Instantiate Elastic net attack
    en_params = {'binary_search_steps': 5,
         #'y': None,
         'max_iterations': 100,
         'learning_rate': 0.1,
         'batch_size': batch_size,
         'initial_const': 10}
    enet = ElasticNetMethod(model_2, sess=sess)
    adv_x_en = enet.generate(x, **en_params)
    preds_adv_elastic_net = model_2.get_probs(adv_x_en)

    # 4. Deepfool
    deepfool_params = {'nb_candidate':10,
                       'overshoot':0.02,
                       'max_iter': 50,
                       'clip_min': 0.,
                       'clip_max': 1.}
    deepfool = DeepFool(model_2, sess=sess)
    adv_x_df = deepfool.generate(x, **deepfool_params)
    preds_adv_deepfool = model_2.get_probs(adv_x_df)

    # 5. Base Iterative
    bim_params = {'eps': 0.3,
                  'eps_iter': 0.01,
                  'nb_iter': 100,
                  'clip_min': 0.,
                  'clip_max': 1.}
    base_iter = BasicIterativeMethod(model_2, sess=sess)
    adv_x_bi = base_iter.generate(x, **bim_params)
    preds_adv_base_iter = model_2.get_probs(adv_x_bi)

    # 6. C & W Attack
    cw = CarliniWagnerL2(model_2, back='tf', sess=sess)
    cw_params = {'binary_search_steps': 1,
                 # 'y': None,
                 'max_iterations': 100,
                 'learning_rate': 0.1,
                 'batch_size': batch_size,
                 'initial_const': 10}
    adv_x_cw = cw.generate(x, **cw_params)
    preds_adv_cw = model_2.get_probs(adv_x_cw)

    #7
    vat_params = {'eps': 2.0,
                  'num_iterations': 1,
                  'xi': 1e-6,
                  'clip_min': 0.,
                  'clip_max': 1.}
    vat = VirtualAdversarialMethod(model_2, sess=sess)
    adv_x = vat.generate(x, **vat_params)
    preds_adv_vat = model_2.get_probs(adv_x)


    # ==> generate 10 targeted classes for every train data regardless
    # This call runs the Jacobian-based saliency map approach
    # Loop over the samples we want to perturb into adversarial examples

    X_train_adv_set = []
    Y_train_adv_set = []
    for index in range(X_train.shape[0]):
        print('--------------------------------------')
        x_val = X_train[index:(index+1)]
        y_val = Y_train[index]


        # add normal sample in!!!!
        X_train_adv_set.append(x_val)
        Y_train_adv_set.append(y_val)

        # We want to find an adversarial example for each possible target class
        # (i.e. all classes that differ from the label given in the dataset)
        current_class = int(np.argmax(y_val))
        target_classes = other_classes(nb_classes, current_class)
        # Loop over all target classes
        for target in target_classes:
            # print('Generating adv. example for target class %i' % target)
            # This call runs the Jacobian-based saliency map approach

            one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
            one_hot_target[0, target] = 1
            jsma_params['y_target'] = one_hot_target
            adv_x = jsma.generate_np(x_val, **jsma_params)

            # append to X_train_adv_set and Y_train_adv_set
            X_train_adv_set.append(adv_x)
            Y_train_adv_set.append(y_val)

            # shape is: (1, 28, 28, 1)
            # print("adv_x shape is: ", adv_x.shape)

            # check for success rate
            # res = int(model_argmax(sess, x, preds, adv_x) == target)

    print('-------------Finished Generating Np Adversarial Data-------------------------')

    X_train_data = np.concatenate(X_train_adv_set, axis=0)
    Y_train_data = np.stack(Y_train_adv_set, axis=0)
    print("X_train_data shape is: ", X_train_data.shape)
    print("Y_train_data shape is: ", Y_train_data.shape)

    # saves the output so later no need to re-fun file
    np.savez("jsma_training_data.npz", x_train=X_train_data
             , y_train=Y_train_data)

    # >>> data = np.load('/tmp/123.npz')
    # >>> data['a']

    f_out = open("Adversarial_jsma_elastic_against5.log", "w")

    # evaluate the function against 5 attacks
    # fgsm, base iterative, jsma, elastic net, and deepfool
    def evaluate_against_all():
            # 1 Clean Data
            eval_params = {'batch_size': batch_size}
            accuracy = model_eval(sess, x, y, preds, X_test, Y_test,
                                  args=eval_params)
            print('Legitimate accuracy: %0.4f' % accuracy)

            tmp = 'Legitimate accuracy: '+ str(accuracy) + "\n"
            f_out.write(tmp)


            # 2 JSMA
            accuracy = model_eval(sess, x, y, preds_adv_random, X_test,
                                  Y_test, args=eval_params)

            print('JSMA accuracy: %0.4f' % accuracy)
            tmp = 'JSMA accuracy:'+ str(accuracy) + "\n"
            f_out.write(tmp)


            # 3 FGSM
            accuracy = model_eval(sess, x, y, preds_adv_fgsm, X_test,
                                  Y_test, args=eval_params)

            print('FGSM accuracy: %0.4f' % accuracy)
            tmp = 'FGSM accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)

            # 4 Base Iterative
            accuracy = model_eval(sess, x, y, preds_adv_base_iter, X_test,
                                  Y_test, args=eval_params)

            print('Base Iterative accuracy: %0.4f' % accuracy)
            tmp = 'Base Iterative accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)

            # 5 Elastic Net
            accuracy = model_eval(sess, x, y, preds_adv_elastic_net, X_test,
                                  Y_test, args=eval_params)

            print('Elastic Net accuracy: %0.4f' % accuracy)
            tmp = 'Elastic Net accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)

            # 6 DeepFool
            accuracy = model_eval(sess, x, y, preds_adv_deepfool, X_test,
                                  Y_test, args=eval_params)
            print('DeepFool accuracy: %0.4f' % accuracy)
            tmp = 'DeepFool accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)

            # 7 C & W Attack
            accuracy = model_eval(sess, x, y, preds_adv_cw, X_test,
                                  Y_test, args=eval_params)
            print('C & W accuracy: %0.4f' % accuracy)
            tmp = 'C & W  accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)
            f_out.write("*******End of Epoch***********\n\n")

            # 8 Virtual Adversarial
            accuracy = model_eval(sess, x, y, preds_adv_vat, X_test,
                                  Y_test, args=eval_params)
            print('VAT accuracy: %0.4f' % accuracy)
            tmp = 'VAT accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)
            f_out.write("*******End of Epoch***********\n\n")

            print("*******End of Epoch***********\n\n")

        # report.adv_train_adv_eval = accuracy

    print("Now Adversarial Training with Elastic Net  + modified X_train and Y_train")
    # trained_model.out
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': '/home/stephen/PycharmProjects/jsma-runall-mac/',
        'filename': 'trained_model.out'
    }
    model_train(sess, x, y, preds_2, X_train_data, Y_train_data,
                 predictions_adv=preds_adv_elastic_net,
                evaluate=evaluate_against_all, verbose=False,
                args=train_params, rng=rng)


    # Close TF session
    sess.close()
    return report
def mnist_tutorial_jsma(train_start=0, train_end=60000, test_start=0,
                        test_end=10000, viz_enabled=True, nb_epochs=6,
                        batch_size=128, nb_classes=10, source_samples=10,
                        learning_rate=0.001):
    """
    MNIST tutorial for the Jacobian-based saliency map approach (JSMA)
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param source_samples: number of test inputs to attack
    :param learning_rate: learning rate for training
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # MNIST-specific dimensions
    img_rows = 28
    img_cols = 28
    channels = 1

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph
    model = make_basic_cnn()
    preds = model(x)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    sess.run(tf.global_variables_initializer())
    rng = np.random.RandomState([2017, 8, 30])
    model_train(sess, x, y, preds, X_train, Y_train, args=train_params,
                rng=rng)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params)
    assert X_test.shape[0] == test_end - test_start, X_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using the Jacobian-based saliency map approach
    ###########################################################################
    print('Crafting ' + str(source_samples) + ' * ' + str(nb_classes-1) +
          ' adversarial examples')

    # Keep track of success (adversarial example classified in target)
    results = np.zeros((nb_classes, source_samples), dtype='i')

    # Rate of perturbed features for each test set example and target class
    perturbations = np.zeros((nb_classes, source_samples), dtype='f')

    # Initialize our array for grid visualization
    grid_shape = (nb_classes, nb_classes, img_rows, img_cols, channels)
    grid_viz_data = np.zeros(grid_shape, dtype='f')

    # Instantiate a SaliencyMapMethod attack object
    jsma = SaliencyMapMethod(model, back='tf', sess=sess)
    jsma_params = {'theta': 1., 'gamma': 0.1,
                   'clip_min': 0., 'clip_max': 1.,
                   'y_target': None}

    figure = None
    # Loop over the samples we want to perturb into adversarial examples
    for sample_ind in xrange(0, source_samples):
        print('--------------------------------------')
        print('Attacking input %i/%i' % (sample_ind + 1, source_samples))
        sample = X_test[sample_ind:(sample_ind+1)]

        # We want to find an adversarial example for each possible target class
        # (i.e. all classes that differ from the label given in the dataset)
        current_class = int(np.argmax(Y_test[sample_ind]))
        target_classes = other_classes(nb_classes, current_class)

        # For the grid visualization, keep original images along the diagonal
        grid_viz_data[current_class, current_class, :, :, :] = np.reshape(
            sample, (img_rows, img_cols, channels))

        # Loop over all target classes
        for target in target_classes:
            print('Generating adv. example for target class %i' % target)

            # This call runs the Jacobian-based saliency map approach
            one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
            one_hot_target[0, target] = 1
            jsma_params['y_target'] = one_hot_target
            adv_x = jsma.generate_np(sample, **jsma_params)

            # Check if success was achieved
            res = int(model_argmax(sess, x, preds, adv_x) == target)

            # Computer number of modified features
            adv_x_reshape = adv_x.reshape(-1)
            test_in_reshape = X_test[sample_ind].reshape(-1)
            nb_changed = np.where(adv_x_reshape != test_in_reshape)[0].shape[0]
            percent_perturb = float(nb_changed) / adv_x.reshape(-1).shape[0]

            # Display the original and adversarial images side-by-side
            if viz_enabled:
                figure = pair_visual(
                    np.reshape(sample, (img_rows, img_cols, channels)),
                    np.reshape(adv_x, (img_rows, img_cols, channels)), figure)

            # Add our adversarial example to our grid data
            grid_viz_data[target, current_class, :, :, :] = np.reshape(
                adv_x, (img_rows, img_cols, channels))

            # Update the arrays for later analysis
            results[target, sample_ind] = res
            perturbations[target, sample_ind] = percent_perturb

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    nb_targets_tried = ((nb_classes - 1) * source_samples)
    succ_rate = float(np.sum(results)) / nb_targets_tried
    print('Avg. rate of successful adv. examples {0:.4f}'.format(succ_rate))
    report.clean_train_adv_eval = 1. - succ_rate

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(perturbations)
    print('Avg. rate of perturbed features {0:.4f}'.format(percent_perturbed))

    # Compute the average distortion introduced for successful samples only
    percent_perturb_succ = np.mean(perturbations * (results == 1))
    print('Avg. rate of perturbed features for successful '
          'adversarial examples {0:.4f}'.format(percent_perturb_succ))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        import matplotlib.pyplot as plt
        plt.close(figure)
        _ = grid_visual(grid_viz_data)

    return report
예제 #28
0
def main(args):
    normalize = Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    transform = Compose([Resize(256), CenterCrop(224), ToTensor(), normalize])

    dataset = ImageDataset(args.image_folder,
                           transform=transform,
                           return_paths=True)
    # n_images = len(dataset)
    dataloader = DataLoader(dataset,
                            shuffle=False,
                            batch_size=args.batch_size,
                            pin_memory=True,
                            num_workers=0)

    model = models.resnet50(pretrained=True).to(args.device)
    model.eval()

    config = tf.ConfigProto(intra_op_parallelism_threads=1,
                            inter_op_parallelism_threads=1,
                            allow_soft_placement=True,
                            device_count={'CPU': 1})
    sess = tf.Session(config=config)
    x_op = tf.placeholder(tf.float32, shape=(
        None,
        3,
        224,
        224,
    ))

    tf_model = convert_pytorch_model_to_tf(model, args.device)
    cleverhans_model = CallableModelWrapper(tf_model, output_layer='logits')

    # compute clip_min and clip_max suing a full black and a full white image
    clip_min = normalize(torch.zeros(3, 1, 1)).min().item()
    clip_max = normalize(torch.ones(3, 1, 1)).max().item()

    eps = args.eps / 255.
    eps_iter = 20
    nb_iter = 10
    args.ord = np.inf if args.ord < 0 else args.ord
    grad_params = {'eps': eps, 'ord': args.ord}
    common_params = {'clip_min': clip_min, 'clip_max': clip_max}
    iter_params = {'eps_iter': eps_iter / 255., 'nb_iter': nb_iter}

    attack_name = ''
    if args.attack == 'fgsm':
        attack_name = '_L{}_eps{}'.format(args.ord, args.eps)
        attack_op = FastGradientMethod(cleverhans_model, sess=sess)
        attack_params = {**common_params, **grad_params}
    elif args.attack == 'iter':
        attack_name = '_L{}_eps{}_epsi{}_i{}'.format(args.ord, args.eps,
                                                     eps_iter, nb_iter)
        attack_op = BasicIterativeMethod(cleverhans_model, sess=sess)
        attack_params = {**common_params, **grad_params, **iter_params}
    elif args.attack == 'm-iter':
        attack_name = '_L{}_eps{}_epsi{}_i{}'.format(args.ord, args.eps,
                                                     eps_iter, nb_iter)
        attack_op = MomentumIterativeMethod(cleverhans_model, sess=sess)
        attack_params = {**common_params, **grad_params, **iter_params}
    elif args.attack == 'pgd':
        attack_name = '_L{}_eps{}_epsi{}_i{}'.format(args.ord, args.eps,
                                                     eps_iter, nb_iter)
        attack_op = MadryEtAl(cleverhans_model, sess=sess)
        attack_params = {**common_params, **grad_params, **iter_params}
    elif args.attack == 'jsma':
        attack_op = SaliencyMapMethod(cleverhans_model, sess=sess)
        attack_params = {'theta': eps, 'symbolic_impl': False, **common_params}
    elif args.attack == 'deepfool':
        attack_op = DeepFool(cleverhans_model, sess=sess)
        attack_params = common_params
    elif args.attack == 'cw':
        attack_op = CarliniWagnerL2(cleverhans_model, sess=sess)
        attack_params = common_params
    elif args.attack == 'lbfgs':
        attack_op = LBFGS(cleverhans_model, sess=sess)
        target = np.zeros((1, 1000))
        target[0, np.random.randint(1000)] = 1
        y = tf.placeholder(tf.float32, target.shape)
        attack_params = {'y_target': y, **common_params}

    attack_name = args.attack + attack_name

    print('Running [{}]. Params: {}'.format(args.attack.upper(),
                                            attack_params))

    adv_x_op = attack_op.generate(x_op, **attack_params)
    adv_preds_op = tf_model(adv_x_op)
    preds_op = tf_model(x_op)

    n_success = 0
    n_processed = 0
    progress = tqdm(dataloader)
    for paths, x in progress:

        progress.set_description('ATTACK')

        z, adv_x, adv_z = sess.run([preds_op, adv_x_op, adv_preds_op],
                                   feed_dict={
                                       x_op: x,
                                       y: target
                                   })

        src, dst = np.argmax(z, axis=1), np.argmax(adv_z, axis=1)
        success = src != dst
        success_paths = np.array(paths)[success]
        success_adv_x = adv_x[success]
        success_src = src[success]
        success_dst = dst[success]

        n_success += success_adv_x.shape[0]
        n_processed += x.shape[0]

        progress.set_postfix(
            {'Success': '{:3.2%}'.format(n_success / n_processed)})
        progress.set_description('SAVING')

        for p, a, s, d in zip(success_paths, success_adv_x, success_src,
                              success_dst):
            path = '{}_{}_src{}_dst{}.npz'.format(p, attack_name, s, d)
            path = os.path.join(args.out_folder, path)
            np.savez_compressed(path, img=a)
예제 #29
0
def mnist_tutorial(train_start=0, train_end=60000, test_start=0,
                   test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE,
                   clean_train=CLEAN_TRAIN,
                   testing=False,
                   backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                   nb_filters=NB_FILTERS, num_threads=None,
                   label_smoothing=0.1):
  """
  MNIST cleverhans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param clean_train: perform normal training on clean examples only
                      before performing adversarial training.
  :param testing: if true, complete an AccuracyReport for unit tests
                  to verify that performance is adequate
  :param backprop_through_attack: If True, backprop through adversarial
                                  example construction process during
                                  adversarial training.
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """

  # Object used to keep track of (and return) key accuracies
  report = AccuracyReport()

  # Set TF random seed to improve reproducibility
  tf.set_random_seed(1234)

  # Set logging level to see debug information
  set_log_level(logging.DEBUG)

  # Create TF session
  if num_threads:
    config_args = dict(intra_op_parallelism_threads=1)
  else:
    config_args = {'allow_soft_placement' : True}
  sess = tf.Session(config=tf.ConfigProto(**config_args))

  # Get MNIST data
  mnist = MNIST(train_start=train_start, train_end=train_end,
                test_start=test_start, test_end=test_end)
  x_train, y_train = mnist.get_set('train')
  x_test, y_test = mnist.get_set('test')

  # Use Image Parameters
  img_rows, img_cols, nchannels = x_train.shape[1:4]
  nb_classes = y_train.shape[1]

  # Define input TF placeholder
  x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
  y = tf.placeholder(tf.float32, shape=(None, nb_classes))

  # Train an MNIST model
  train_params = {
      'nb_epochs': nb_epochs,
      'batch_size': batch_size,
      'learning_rate': learning_rate
  }
  eval_params = {'batch_size': batch_size}
  fgsm_params = {
      'eps': 0.3,
      'clip_min': 0.,
      'clip_max': 1.
  }
  jsma_params = {'theta': 1., 'gamma': 0.1,
                 'clip_min': 0., 'clip_max': 1.,
                 'y_target': None}
  rng = np.random.RandomState([2017, 8, 30])

  def do_eval(preds, x_set, y_set, report_key, is_adv=None):
    acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
    setattr(report, report_key, acc)
    if is_adv is None:
      report_text = None
    elif is_adv:
      report_text = 'adversarial'
    else:
      report_text = 'legitimate'
    if report_text:
      print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

  if clean_train:
    model = ModelBasicCNN('model1', nb_classes, nb_filters)
    preds = model.get_logits(x)
    loss = CrossEntropy(model, smoothing=label_smoothing)

    def evaluate():
      do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)

    train(sess, loss, x_train, y_train, evaluate=evaluate,
          args=train_params, rng=rng, var_list=model.get_params())

    # Calculate training error
    if testing:
      do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval')

    # Initialize the Fast Gradient Sign Method (FGSM) attack object and
    # graph
    jsma = SaliencyMapMethod(model, sess=sess)
    adv_jsma_x = jsma.generate(x, **jsma_params)
    preds_adv_jsma = model.get_logits(adv_jsma_x)

    # Generate JSMA adversarial examples and save to disk
    dir = 'images/jsma_rotated_adv/'
    if not os.path.exists('images'):
        os.mkdir('images')
    if not os.path.exists(dir):
        os.mkdir(dir)
    if not os.path.exists(dir+'train/'):
        os.mkdir(dir+'train/')
    if not os.path.exists(dir+'test/'):
        os.mkdir(dir+'test/')
    for index in range(len(y_test)):
        print('test '+str(index))
        x_ = x_test[index]
        label = np.argmax(y_test[index])
        raw_data = (jsma.generate_np(x_.reshape((1, 28, 28, 1)), **jsma_params).reshape((28, 28)) * 255).astype('uint8')
        im = Image.fromarray(raw_data, mode='P')
        rot = im.rotate(30)
        rot.save(dir + 'test/' + str(label)+'_'+str(uuid.uuid4())+'.png')
    for index in range(len(y_train)):
        print('train ' + str(index))
        x_ = x_train[index]
        label = np.argmax(y_train[index])
        raw_data = (jsma.generate_np(x_.reshape((1, 28, 28, 1)), **jsma_params).reshape((28, 28)) * 255).astype('uint8')
        im = Image.fromarray(raw_data, mode='P')
        rot = im.rotate(30)
        rot.save(dir + 'train/' + str(label)+'_'+str(uuid.uuid4())+'.png')



  return report
예제 #30
0
# model = loaded_model
# print("Loaded model from disk")


# Generate adversarial samples for all test datapoints
source_samples = X_test_scaled.shape[0]

# Jacobian-based Saliency Map Attack
results = np.zeros((FLAGS.nb_classes, source_samples), dtype='i')
perturbations = np.zeros((FLAGS.nb_classes, source_samples), dtype='f')
grads = jacobian_graph(predictions, x, FLAGS.nb_classes)

X_adv = np.zeros((source_samples, X_test_scaled.shape[1]))

models = KerasModelWrapper(model)
jsma = SaliencyMapMethod(models, sess=sess)
jsma_params = {'theta': 1., 'gamma': 0.1, 'clip_min': 0., 'clip_max': 1., 'y_target': None}

for sample_ind in range(0, source_samples):
	sample = X_test_scaled[sample_ind: (sample_ind+1)]
	# We want to find an adversarial example for each possible target class
	# (i.e. all classes that differ from the label given in the dataset)
	current_class = int(np.argmax(y_test[sample_ind]))

	# Only target the normal class
	for target in [0]:
		if current_class == 0:
			break

		print('Generating adv. example for target class {} for sample {}'.format(target, sample_ind), end='\r')
예제 #31
0
model_train(sess, x, y, predictions, X_train_scaled, y_train, evaluate=evaluate, args=train_params)

# Generate  adversarial  samples  for  all  test  datapoints
source_samples = X_test_scaled.shape[0]
# Jacobian -based  Saliency  Map
results = np.zeros((FLAGS.nb_classes, source_samples), dtype ='i')
perturbations = np.zeros((FLAGS.nb_classes, source_samples), dtype ='f')
grads = jacobian_graph(predictions, x, FLAGS.nb_classes)

X_adv = np.zeros((source_samples, X_test_scaled.shape[1]))


print(type(model)) # <class 'keras.engine.sequential.Sequential'>
wrap = KerasModelWrapper(model)

jsma = SaliencyMapMethod(wrap, sess=sess)

"""
    Take in a dictionary of parameters and applies attack-specific checks
    before saving them as attributes.
    Attack-specific parameters:
    :param theta: (optional float) Perturbation introduced to modified
                  components (can be positive or negative)
    :param gamma: (optional float) Maximum percentage of perturbed features
    :param clip_min: (optional float) Minimum component value for clipping
    :param clip_max: (optional float) Maximum component value for clipping
    :param y_target: (optional) Target tensor if the attack is targeted
    """

# Other paper has theta = 1 and gamma = 0.1 
jsma_params = {'theta': 0.1, 'gamma': 0.01,
예제 #32
0
    if i > 0 and i < n - 1:
        z1[i] = tf.matmul(h1[i - 1], w[i]) + b[i]
        h1[i] = tf.nn.relu(z1[i])
    if i == n - 1:
        z1[i] = tf.matmul(h1[i - 1], w[i]) + b[i]
        h1[i] = z1[i]

y = classifier(x)
yn = classifier_n(x)
loss_cls = softmax_loss(label, yn)
all_vars = tf.trainable_variables()
c_vars = [var for var in all_vars if 'classifier' in var.name]
train_op_classifier = GradientDescentOptimizer(learning_rate = learning_rate) \
        .minimize(loss_cls, var_list = c_vars, global_step = global_step)

jsma = SaliencyMapMethod(classifier, back='tf', sess=sess)
x_fgsm = attack.fgsm(x, y, eps=0.2, clip_min=0, clip_max=1)


def main():
    mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
    sess.run(tf.global_variables_initializer())
    with sess.as_default():
        acc = {}
        print('train classifier')
        for t in range(1, steps + 1):
            batch = mnist.train.next_batch(batch_size)
            f_dict = {x: batch[0], label: batch[1]}
            sess.run(train_op_classifier, feed_dict=f_dict)
            if t % 550 == 0:
                epoch = int(t / 550)
예제 #33
0
def mnist_tutorial_jsma(train_start=0, train_end=60000, test_start=0,
                        test_end=10000, viz_enabled=True, nb_epochs=6,
                        batch_size=128, source_samples=10,
                        learning_rate=0.001):
    """
    MNIST tutorial for the Jacobian-based saliency map approach (JSMA)
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param source_samples: number of test inputs to attack
    :param learning_rate: learning rate for training
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    x_train, y_train, x_test, y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                          nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    nb_filters = 64
    # Define TF model graph
    model = ModelBasicCNN('model1', nb_classes, nb_filters)
    preds = model.get_logits(x)
    loss = LossCrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    sess.run(tf.global_variables_initializer())
    rng = np.random.RandomState([2017, 8, 30])
    train(sess, loss, x, y, x_train, y_train, args=train_params,
          rng=rng)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using the Jacobian-based saliency map approach
    ###########################################################################
    print('Crafting ' + str(source_samples) + ' * ' + str(nb_classes-1) +
          ' adversarial examples')

    # Keep track of success (adversarial example classified in target)
    results = np.zeros((nb_classes, source_samples), dtype='i')

    # Rate of perturbed features for each test set example and target class
    perturbations = np.zeros((nb_classes, source_samples), dtype='f')

    # Initialize our array for grid visualization
    grid_shape = (nb_classes, nb_classes, img_rows, img_cols, nchannels)
    grid_viz_data = np.zeros(grid_shape, dtype='f')

    # Instantiate a SaliencyMapMethod attack object
    jsma = SaliencyMapMethod(model, back='tf', sess=sess)
    jsma_params = {'theta': 1., 'gamma': 0.1,
                   'clip_min': 0., 'clip_max': 1.,
                   'y_target': None}

    figure = None
    # Loop over the samples we want to perturb into adversarial examples
    for sample_ind in xrange(0, source_samples):
        print('--------------------------------------')
        print('Attacking input %i/%i' % (sample_ind + 1, source_samples))
        sample = x_test[sample_ind:(sample_ind+1)]

        # We want to find an adversarial example for each possible target class
        # (i.e. all classes that differ from the label given in the dataset)
        current_class = int(np.argmax(y_test[sample_ind]))
        target_classes = other_classes(nb_classes, current_class)

        # For the grid visualization, keep original images along the diagonal
        grid_viz_data[current_class, current_class, :, :, :] = np.reshape(
            sample, (img_rows, img_cols, nchannels))

        # Loop over all target classes
        for target in target_classes:
            print('Generating adv. example for target class %i' % target)

            # This call runs the Jacobian-based saliency map approach
            one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
            one_hot_target[0, target] = 1
            jsma_params['y_target'] = one_hot_target
            adv_x = jsma.generate_np(sample, **jsma_params)

            # Check if success was achieved
            res = int(model_argmax(sess, x, preds, adv_x) == target)

            # Computer number of modified features
            adv_x_reshape = adv_x.reshape(-1)
            test_in_reshape = x_test[sample_ind].reshape(-1)
            nb_changed = np.where(adv_x_reshape != test_in_reshape)[0].shape[0]
            percent_perturb = float(nb_changed) / adv_x.reshape(-1).shape[0]

            # Display the original and adversarial images side-by-side
            if viz_enabled:
                figure = pair_visual(
                    np.reshape(sample, (img_rows, img_cols, nchannels)),
                    np.reshape(adv_x, (img_rows, img_cols, nchannels)), figure)

            # Add our adversarial example to our grid data
            grid_viz_data[target, current_class, :, :, :] = np.reshape(
                adv_x, (img_rows, img_cols, nchannels))

            # Update the arrays for later analysis
            results[target, sample_ind] = res
            perturbations[target, sample_ind] = percent_perturb

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    nb_targets_tried = ((nb_classes - 1) * source_samples)
    succ_rate = float(np.sum(results)) / nb_targets_tried
    print('Avg. rate of successful adv. examples {0:.4f}'.format(succ_rate))
    report.clean_train_adv_eval = 1. - succ_rate

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(perturbations)
    print('Avg. rate of perturbed features {0:.4f}'.format(percent_perturbed))

    # Compute the average distortion introduced for successful samples only
    percent_perturb_succ = np.mean(perturbations * (results == 1))
    print('Avg. rate of perturbed features for successful '
          'adversarial examples {0:.4f}'.format(percent_perturb_succ))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        import matplotlib.pyplot as plt
        plt.close(figure)
        _ = grid_visual(grid_viz_data)

    return report