Example #1
0
class TestBasicIterativeMethod(TestFastGradientMethod):
    def setUp(self):
        super(TestBasicIterativeMethod, self).setUp()
        import tensorflow as tf

        # The world's simplest neural network
        def my_model(x):
            W1 = tf.constant([[1.5, .3], [-2, 0.3]], dtype=tf.float32)
            h1 = tf.nn.sigmoid(tf.matmul(x, W1))
            W2 = tf.constant([[-2.4, 1.2], [0.5, -2.3]], dtype=tf.float32)
            res = tf.nn.softmax(tf.matmul(x, W2))
            return res

        self.sess = tf.Session()
        self.model = my_model
        self.attack = BasicIterativeMethod(self.model, sess=self.sess)

    def test_generate_np_does_not_cache_graph_computation_for_nb_iter(self):
        import tensorflow as tf

        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val,
                                        eps=1.0,
                                        ord=np.inf,
                                        clip_min=-5.0,
                                        clip_max=5.0,
                                        nb_iter=10)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)
        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

        ok = [False]
        old_grads = tf.gradients

        def fn(*x, **y):
            ok[0] = True
            return old_grads(*x, **y)

        tf.gradients = fn

        x_adv = self.attack.generate_np(x_val,
                                        eps=1.0,
                                        ord=np.inf,
                                        clip_min=-5.0,
                                        clip_max=5.0,
                                        nb_iter=11)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)
        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

        tf.gradients = old_grads

        self.assertTrue(ok[0])
Example #2
0
class TestBasicIterativeMethod(TestFastGradientMethod):
    def setUp(self):
        super(TestBasicIterativeMethod, self).setUp()

        self.sess = tf.Session()
        self.model = SimpleModel()
        self.attack = BasicIterativeMethod(self.model, sess=self.sess)

    def test_attack_strength(self):
        """
        If clipping is not done at each iteration (not passing clip_min and
        clip_max to fgm), this attack fails by
        np.mean(orig_labels == new_labels) == .39.
        """
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val, eps=1.0, ord=np.inf,
                                        clip_min=0.5, clip_max=0.7,
                                        nb_iter=5)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)
        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

    def test_generate_np_does_not_cache_graph_computation_for_nb_iter(self):

        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val, eps=1.0, ord=np.inf,
                                        clip_min=-5.0, clip_max=5.0,
                                        nb_iter=10)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)
        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

        ok = [False]
        old_grads = tf.gradients

        def fn(*x, **y):
            ok[0] = True
            return old_grads(*x, **y)
        tf.gradients = fn

        x_adv = self.attack.generate_np(x_val, eps=1.0, ord=np.inf,
                                        clip_min=-5.0, clip_max=5.0,
                                        nb_iter=11)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)
        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

        tf.gradients = old_grads

        self.assertTrue(ok[0])
Example #3
0
def bim_attack(train_data,model,sess):

    adv_x = []
    wrap = KerasModelWrapper(model)
    bim = BasicIterativeMethod(wrap, sess=sess)
    bim_params = {'eps_iter': 0.01,
              'nb_iter': 10,
              'clip_min': 0.,
              'clip_max': 1.}
    for i in range(train_data.shape[0]//100):
        if i == 0:
            adv_x = bim.generate_np(train_data[i*100:(i+1)*100], **bim_params)
        else:
            adv_x = np.concatenate((adv_x,bim.generate_np(train_data[i*100:(i+1)*100], **bim_params)))
    return adv_x
Example #4
0
def bim_attack(train_data, model, sess):
    wrap = KerasModelWrapper(model)
    bim = BasicIterativeMethod(wrap, sess=sess)
    bim_params = {
        'eps_iter': 0.01,
        'nb_iter': 10,
        'clip_min': 0.,
        'clip_max': 1.
    }
    adv_x = bim.generate_np(train_data, **bim_params)
    return adv_x
def generate_bim_examples(sess, model, x, y, X, Y, attack_params, verbose, attack_log_fpath):
    """
    Untargeted attack. Y is not needed.
    """
    bim = BasicIterativeMethod(model, back='tf', sess=sess)
    bim_params = {'eps': 0.1, 'eps_iter':0.05, 'nb_iter':10, 'y':y,   #y is y for untargeted, y is Y for targeted
                     'ord':np.inf, 'clip_min':0, 'clip_max':1}
    bim_params = override_params(bim_params, attack_params)

    X_adv = bim.generate_np(X, **bim_params)
    return X_adv
Example #6
0
def generate_bim_examples(sess, model, x, y, X, Y, attack_params, verbose,
                          attack_log_fpath):
    """
    Untargeted attack. Y is not needed.
    """
    bim = BasicIterativeMethod(KerasModelWrapper(model), back='tf', sess=sess)
    bim_params = {
        'eps': 0.1,
        'eps_iter': 0.05,
        'nb_iter': 10,
        'clip_min': 0,
        'clip_max': 1
    }

    X_adv = bim.generate_np(X, **bim_params)
    return X_adv
def get_BIM_samples(loaded_model, samples, nb_iter):

    sess = backend.get_session()
    wrap = KerasModelWrapper(loaded_model)

    bim = BasicIterativeMethod(wrap, sess=sess)
    bim_params = {
        'eps_iter': 0.05,
        'nb_iter': nb_iter,
        'clip_min': 0.,
        'clip_max': 1.
    }

    adv_x = bim.generate_np(samples, **bim_params)

    return adv_x
Example #8
0
# - Step 4 - Implement untargeted attack using the Basic Iterative Method
wrap = KerasModelWrapper(model)
bim = BasicIterativeMethod(wrap, sess=sess)
bim_params = {'eps': 0.1, 'eps_iter': 0.01, 'clip_min': 0., 'clip_max': 1.}

# - Step 5 - From each category in the Fashion-MNIST test set,
# select 10 images to generate adversarial examples using 5 and 10 iterations, respectively.
orig_images = np.empty([0, 28, 28, 1])
orig_labels = np.empty([0])
for clz in range(nb_classes):
    idxs = np.where(test_labels == clz)[0][:10]
    orig_images = np.append(orig_images, test_images2[idxs], axis=0)
    orig_labels = np.append(orig_labels, test_labels[idxs], axis=0)

bim_params["nb_iter"] = 5
adv_images5 = bim.generate_np(orig_images, **bim_params)
bim_params["nb_iter"] = 10
adv_images10 = bim.generate_np(orig_images, **bim_params)

# Compute the average distortion introduced by the algorithm
percent_perturbed = np.mean(
    np.sum((adv_images5 - orig_images)**2, axis=(1, 2, 3))**.5)
print('Avg. L_2 norm of perturbations (5 iterations) {0:.4f}'.format(
    percent_perturbed))
percent_perturbed = np.mean(
    np.sum((adv_images10 - orig_images)**2, axis=(1, 2, 3))**.5)
print('Avg. L_2 norm of perturbations (10 iterations) {0:.4f}'.format(
    percent_perturbed))

# Evaluate accuracy on adversarial images
adv_loss, adv_acc5 = model.evaluate(adv_images5, orig_labels)
Example #9
0
class TestBasicIterativeMethod(TestFastGradientMethod):
    def setUp(self):
        TestFastGradientMethod.setUp(self)

        self.sess = tf.Session()
        self.model = SimpleModel()
        self.attack = BasicIterativeMethod(self.model, sess=self.sess)

    def test_generate_np_gives_adversarial_example_linfinity(self):
        self.help_generate_np_gives_adversarial_example(ord=np.infty,
                                                        eps=.5,
                                                        nb_iter=20)

    def test_generate_np_gives_adversarial_example_l1(self):
        self.help_generate_np_gives_adversarial_example(ord=1,
                                                        eps=.5,
                                                        nb_iter=20)

    def test_generate_np_gives_adversarial_example_l2(self):
        self.help_generate_np_gives_adversarial_example(ord=2,
                                                        eps=.5,
                                                        nb_iter=20)

    def test_do_not_reach_lp_boundary(self):
        """
        Make sure that iterative attack don't reach boundary of Lp
        neighbourhood if nb_iter * eps_iter is relatively small compared to
        epsilon.
        """
        for ord in [1, 2, np.infty]:
            _, _, delta = self.generate_adversarial_examples_np(ord=ord,
                                                                eps=.5,
                                                                nb_iter=10,
                                                                eps_iter=.01)
            self.assertTrue(np.max(0.5 - delta) > 0.25)

    def test_attack_strength(self):
        """
        If clipping is not done at each iteration (not passing clip_min and
        clip_max to fgm), this attack fails by
        np.mean(orig_labels == new_labels) == .39.
        """
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val,
                                        eps=1.0,
                                        ord=np.inf,
                                        clip_min=0.5,
                                        clip_max=0.7,
                                        nb_iter=5)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)
        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

    def test_generate_np_does_not_cache_graph_computation_for_nb_iter(self):
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val,
                                        eps=1.0,
                                        ord=np.inf,
                                        clip_min=-5.0,
                                        clip_max=5.0,
                                        nb_iter=10)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)
        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

        ok = [False]
        old_grads = tf.gradients

        def fn(*x, **y):
            ok[0] = True
            return old_grads(*x, **y)

        tf.gradients = fn

        x_adv = self.attack.generate_np(x_val,
                                        eps=1.0,
                                        ord=np.inf,
                                        clip_min=-5.0,
                                        clip_max=5.0,
                                        nb_iter=11)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)
        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

        tf.gradients = old_grads

        self.assertTrue(ok[0])
def mnist_tutorial_bim(train_start=0, train_end=60000, test_start=0,
                      test_end=10000, viz_enabled=VIZ_ENABLED,
                      nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                      source_samples=SOURCE_SAMPLES,
                      learning_rate=LEARNING_RATE,
                      attack_iterations=ATTACK_ITERATIONS,
                      model_path=MODEL_PATH,
                      targeted=TARGETED,
                      noise_output = NOISE_OUTPUT):
  """
  MNIST tutorial for Basic Iterative Method's attack
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param viz_enabled: (boolean) activate plots of adversarial examples
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param nb_classes: number of output classes
  :param source_samples: number of test inputs to attack
  :param learning_rate: learning rate for training
  :param model_path: path to the model file
  :param targeted: should we run a targeted attack? or untargeted?
  :return: an AccuracyReport object
  """
  # Object used to keep track of (and return) key accuracies
  report = AccuracyReport()

  # Set TF random seed to improve reproducibility
  tf.set_random_seed(1234)

  # Create TF session
  sess = tf.Session()
  print("Created TensorFlow session.")

  set_log_level(logging.DEBUG)

   # Get Fashion MNIST test data
  fashion = keras.datasets.fashion_mnist
  (x_train, y_train), (x_test, y_test) = fashion.load_data()
  # cifar10 = CIFAR10(train_start=train_start, train_end=train_end,
  #               test_start=test_start, test_end=test_end)
  # x_train, y_train = cifar10.get_set('train')
  # x_test, y_test = cifar10.get_set('test')
  x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
  x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
  y_train = np_utils.to_categorical(y_train, 10)
  y_test = np_utils.to_categorical(y_test, 10)
  x_train = x_train.astype('float32')
  x_test = x_test.astype('float32')
  x_train /= 255
  x_test /= 255

  # Obtain Image Parameters
  img_rows, img_cols, nchannels = x_train.shape[1:4]
  nb_classes = y_train.shape[1]

  # Define input TF placeholder
  x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
  y = tf.placeholder(tf.float32, shape=(None, nb_classes))
  nb_filters = 64

  # Define TF model graph
  model = ModelBasicCNN('model1', nb_classes, nb_filters)
  preds = model.get_logits(x)
  loss = CrossEntropy(model, smoothing=0.1)
  print("Defined TensorFlow model graph.")

  ###########################################################################
  # Training the model using TensorFlow
  ###########################################################################

  # Train an MNIST model
  train_params = {
      'nb_epochs': nb_epochs,
      'batch_size': batch_size,
      'learning_rate': learning_rate,
      'filename': os.path.split(model_path)[-1]
  }

  rng = np.random.RandomState([2017, 8, 30])
  # check if we've trained before, and if we have, use that pre-trained model
  if os.path.exists(model_path + ".meta"):
    tf_model_load(sess, model_path)
  else:
    train(sess, loss, x_train, y_train, args=train_params, rng=rng)
    saver = tf.train.Saver()
    saver.save(sess, model_path)

  # Evaluate the accuracy of the MNIST model on legitimate test examples
  eval_params = {'batch_size': batch_size}
  accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
  assert x_test.shape[0] == test_end - test_start, x_test.shape
  print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
  report.clean_train_clean_eval = accuracy

  ###########################################################################
  # Craft adversarial examples using Basic Iterative Method's approach
  ###########################################################################
  nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
  print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
        ' adversarial examples')
  print("This could take some time ...")

  # Instantiate a BIM attack object
  bim = BasicIterativeMethod(model, sess=sess)

  if viz_enabled:
    assert source_samples == nb_classes
    idxs = [np.where(np.argmax(y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)]
  if targeted:
    if viz_enabled:
      # Initialize our array for grid visualization
      grid_shape = (nb_classes, 1, img_rows, img_cols,
                    nchannels)
      grid_viz_data = np.zeros(grid_shape, dtype='f')

      adv_inputs = np.array(
          [[instance] * nb_classes for instance in x_test[idxs]],
          dtype=np.float32)
    else:
      adv_inputs = np.array(
          [[instance] * nb_classes for
           instance in x_test[:source_samples]], dtype=np.float32)

    one_hot = np.zeros((nb_classes, nb_classes))
    one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

    adv_inputs = adv_inputs.reshape(
        (source_samples * nb_classes, img_rows, img_cols, nchannels))
    adv_ys = np.array([one_hot] * source_samples,
                      dtype=np.float32).reshape((source_samples *
                                                 nb_classes, nb_classes))
  else:
    if viz_enabled:
      # Initialize our array for grid visualization
      grid_shape = (nb_classes, nb_classes, img_rows, img_cols, nchannels)
      grid_viz_data = np.zeros(grid_shape, dtype='f')

      adv_inputs = x_test[idxs]
    else:
      adv_inputs = x_test[:source_samples]

    adv_ys = None

  bim_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.,
                  'nb_iter': 50,
                  'eps_iter': .01}

  adv = bim.generate_np(adv_inputs,
                       **bim_params)

  eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
  if targeted:
    adv_accuracy = model_eval(
        sess, x, y, preds, adv, adv_ys, args=eval_params)
  else:
    if viz_enabled:
      err = model_eval(sess, x, y, preds, adv, y_test[idxs], args=eval_params)
      adv_accuracy = 1 - err
    else:
      err = model_eval(sess, x, y, preds, adv, y_test[:source_samples],
                       args=eval_params)
      adv_accuracy = 1 - err

  if viz_enabled:
    for i in range(nb_classes):
      if noise_output:
        image = adv[i * nb_classes] - adv_inputs[i * nb_classes]
      else:
        image = adv[i * nb_classes]
      grid_viz_data[i, 0] = image

  print('--------------------------------------')

  # Compute the number of adversarial examples that were successfully found
  print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy))
  report.clean_train_adv_eval = 1. - adv_accuracy

  # Compute the average distortion introduced by the algorithm
  percent_perturbed = np.mean(np.sum((adv - adv_inputs)**2,
                                     axis=(1, 2, 3))**.5)
  print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

  # Close TF session
  sess.close()
  def save_visual(data, path):
    """
    Modified version of cleverhans.plot.pyplot
    """
    figure = plt.figure()
    # figure.canvas.set_window_title('Cleverhans: Grid Visualization')

    # Add the images to the plot
    num_cols = data.shape[0]
    num_rows = data.shape[1]
    num_channels = data.shape[4]
    for y in range(num_rows):
      for x in range(num_cols):
        figure.add_subplot(num_rows, num_cols, (x + 1) + (y * num_cols))
        plt.axis('off')

        if num_channels == 1:
          plt.imshow(data[x, y, :, :, 0], cmap='gray')
        else:
          plt.imshow(data[x, y, :, :, :])

    # Draw the plot and return
    plt.savefig(path)
    return figure

  # Finally, block & display a grid of all the adversarial examples
  if viz_enabled:
    if noise_output:
      image_name = "output/bim_fashion_mnist_noise.png"
    else:
      image_name = "output/bim_fashion_mnist.png"
    _ = save_visual(grid_viz_data, image_name)

  return report
acc = np.mean(np.equal(pred, y_test))

print("The Test accuracy is: {}".format(acc))

#################################### Adversarial Attack (BIM) ###################################
wrap = KerasModelWrapper(keras_model)
bim = BasicIterativeMethod(wrap, sess=sess)
bim_params = {
    'eps': 0.9,
    'eps_iter': 0.6,
    'nb_iter': 10,
    'ord': np.inf,
    'clip_min': 0.,
    'clip_max': 1.
}
adv_x = bim.generate_np(x_test, **bim_params)
adv_conf = keras_model.predict(adv_x)
adv_pred = np.argmax(adv_conf, axis=1)
adv_acc = np.mean(np.equal(adv_pred, y_test))

print("The adversarial  accuracy is: {}".format(adv_acc))

###################################### Original Image ##########################################
x_sample = x_test[7010].reshape(28, 28)
plt.imshow(x_sample, cmap='Blues')
plt.show()

###################################### Adversarial Image ########################################
adv_x_sample = adv_x[7010].reshape(28, 28)
plt.imshow(adv_x_sample, cmap='Blues')
plt.show()
Example #12
0
def mnist_tutorial_adv_train(train_start=0,
                             train_end=60000,
                             test_start=0,
                             test_end=10000,
                             viz_enabled=VIZ_ENABLED,
                             nb_epochs=NB_EPOCHS,
                             batch_size=BATCH_SIZE,
                             source_samples=SOURCE_SAMPLES,
                             learning_rate=LEARNING_RATE,
                             attack_iterations=ATTACK_ITERATIONS,
                             model_path=MODEL_PATH,
                             targeted=TARGETED,
                             noise_output=NOISE_OUTPUT):
    """
  MNIST tutorial for Adversarial Training
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param viz_enabled: (boolean) activate plots of adversarial examples
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param nb_classes: number of output classes
  :param source_samples: number of test inputs to attack
  :param learning_rate: learning rate for training
  :param model_path: path to the model file
  :param targeted: should we run a targeted attack? or untargeted?
  :return: an AccuracyReport object
  """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    mnist = MNIST(train_start=train_start,
                  train_end=train_end,
                  test_start=test_start,
                  test_end=test_end)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    nb_filters = 64

    # Define TF model graph
    model = ModelBasicCNN('model1', nb_classes, nb_filters)
    preds = model.get_logits(x)
    loss = CrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'filename': os.path.split(model_path)[-1]
    }

    rng = np.random.RandomState([2017, 8, 30])
    # check if we've trained before, and if we have, use that pre-trained model
    if os.path.exists(model_path + ".meta"):
        tf_model_load(sess, model_path)
    else:
        train(sess, loss, x_train, y_train, args=train_params, rng=rng)
        saver = tf.train.Saver()
        saver.save(sess, model_path)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using FGSM - BIM - MIM approach
    ###########################################################################
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
    print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
          ' adversarial examples')
    print("This could take some time ...")

    # Instantiate a CW attack object
    fgsm = FastGradientMethod(model, sess=sess)
    bim = BasicIterativeMethod(model, sess=sess)
    mim = MomentumIterativeMethod(model, sess=sess)

    if viz_enabled:
        assert source_samples == nb_classes
        idxs = [
            np.where(np.argmax(y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)
        ]
    if targeted:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, 1, img_rows, img_cols, nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = np.array([[instance] * nb_classes
                                   for instance in x_test[idxs]],
                                  dtype=np.float32)
        else:
            adv_inputs = np.array([[instance] * nb_classes
                                   for instance in x_test[:source_samples]],
                                  dtype=np.float32)

        one_hot = np.zeros((nb_classes, nb_classes))
        one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

        adv_inputs = adv_inputs.reshape(
            (source_samples * nb_classes, img_rows, img_cols, nchannels))
        adv_ys = np.array([one_hot] * source_samples,
                          dtype=np.float32).reshape(
                              (source_samples * nb_classes, nb_classes))
        yname = "y_target"
    else:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, nb_classes, img_rows, img_cols,
                          nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = x_test[idxs]
        else:
            adv_inputs = x_test[:source_samples]

        adv_ys = None
        yname = "y"

    fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
    bim_params = {
        'eps': 0.3,
        'clip_min': 0.,
        'clip_max': 1.,
        'nb_iter': 50,
        'eps_iter': .01
    }
    mim_params = {
        'eps': 0.3,
        'clip_min': 0.,
        'clip_max': 1.,
        'nb_iter': 50,
        'eps_iter': .01
    }

    adv_fgsm = fgsm.generate_np(adv_inputs, **fgsm_params)
    adv_bim = bim.generate_np(adv_inputs, **bim_params)
    adv_mim = mim.generate_np(adv_inputs, **mim_params)
    eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
    if targeted:
        adv_fgsm_accuracy = model_eval(sess,
                                       x,
                                       y,
                                       preds,
                                       adv_fgsm,
                                       adv_ys,
                                       args=eval_params)
        adv_bim_accuracy = model_eval(sess,
                                      x,
                                      y,
                                      preds,
                                      adv_bim,
                                      adv_ys,
                                      args=eval_params)
        adv_mim_accuracy = model_eval(sess,
                                      x,
                                      y,
                                      preds,
                                      adv_mim,
                                      adv_ys,
                                      args=eval_params)

    else:
        if viz_enabled:
            err_fgsm = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  adv_fgsm,
                                  y_test[idxs],
                                  args=eval_params)
            err_bim = model_eval(sess,
                                 x,
                                 y,
                                 preds,
                                 adv_bim,
                                 y_test[idxs],
                                 args=eval_params)
            err_mim = model_eval(sess,
                                 x,
                                 y,
                                 preds,
                                 adv_mim,
                                 y_test[idxs],
                                 args=eval_params)
            adv_fgsm_accuracy = 1 - err_fgsm
            adv_bim_accuracy = 1 - err_bim
            adv_mim_accuracy = 1 - err_mim
        else:
            err_fgsm = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  adv_fgsm,
                                  y_test[:source_samples],
                                  args=eval_params)
            err_bim = model_eval(sess,
                                 x,
                                 y,
                                 preds,
                                 adv_bim,
                                 y_test[:source_samples],
                                 args=eval_params)
            err_mim = model_eval(sess,
                                 x,
                                 y,
                                 preds,
                                 adv_mim,
                                 y_test[:source_samples],
                                 args=eval_params)

            adv_fgsm_accuracy = 1 - err_fgsm
            adv_bim_accuracy = 1 - err_bim
            adv_mim_accuracy = 1 - err_mim

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    print('Avg. rate of successful adv. (FGSM) examples {0:.4f}'.format(
        adv_fgsm_accuracy))
    report.clean_train_adv_fgsm_eval = 1. - adv_fgsm_accuracy
    print('Avg. rate of successful adv. (BIM) examples {0:.4f}'.format(
        adv_bim_accuracy))
    report.clean_train_adv_bim_eval = 1. - adv_bim_accuracy
    print('Avg. rate of successful adv. (MIM) examples {0:.4f}'.format(
        adv_mim_accuracy))
    report.clean_train_adv_mim_eval = 1. - adv_mim_accuracy

    # Compute the average distortion introduced by the algorithm
    percent_perturbed_fgsm = np.mean(
        np.sum((adv_fgsm - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of (FGSM) perturbations {0:.4f}'.format(
        percent_perturbed_fgsm))
    percent_perturbed_bim = np.mean(
        np.sum((adv_bim - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of (BIM) perturbations {0:.4f}'.format(
        percent_perturbed_bim))
    percent_perturbed_mim = np.mean(
        np.sum((adv_mim - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of (MIM) perturbations {0:.4f}'.format(
        percent_perturbed_mim))

    ###########################################################################
    # Adversarial Training
    ###########################################################################

    model2 = ModelBasicCNN('model2', nb_classes, nb_filters)

    fgsm2 = FastGradientMethod(model, sess=sess)

    # bim2 = BasicIterativeMethod(model, sess=sess)
    # mim2 = MomentumIterativeMethod(model, sess=sess)

    def attack_fgsm(x):
        return fgsm2.generate(adv_inputs, **fgsm_params)

    # def attack_bim(x):
    #   return bim2.generate(adv_inputs, **bim_params)
    # def attack_mim(x):
    #   return mim2.generate(adv_inputs, **mim_params)

    preds2 = model2.get_logits(x)
    loss2_fgsm = CrossEntropy(model2, smoothing=0.1, attack=attack_fgsm)
    # loss2_bim = CrossEntropy(model2, smoothing=0.1, attack=attack_bim)
    # loss2_mim = CrossEntropy(model2, smoothing=0.1, attack=attack_mim)

    train(sess, loss2_fgsm, x_train, y_train, args=train_params, rng=rng)
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds2, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on adversarial fgsm test examples: {0}'.format(
        accuracy))
    report.clean_train_clean_eval = accuracy
    print("Defined TensorFlow model graph.")

    adv_fgsm_accuracy = model_eval(sess,
                                   x,
                                   y,
                                   preds,
                                   adv_fgsm,
                                   adv_ys,
                                   args=eval_params)
    adv_bim_accuracy = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  adv_bim,
                                  adv_ys,
                                  args=eval_params)
    adv_mim_accuracy = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  adv_mim,
                                  adv_ys,
                                  args=eval_params)

    # Close TF session
    sess.close()

    return report
def mnist_tutorial(train_start=0, train_end=60000, test_start=0,
                   test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE, train_dir=TRAIN_DIR,
                   filename=FILENAME, load_model=LOAD_MODEL,
                   testing=False, label_smoothing=0.1):
  """
  MNIST CleverHans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param train_dir: Directory storing the saved model
  :param filename: Filename to save model under
  :param load_model: True for load, False for not load
  :param testing: if true, test error is calculated
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """
  tf.keras.backend.set_learning_phase(0)

  # Object used to keep track of (and return) key accuracies
  report = AccuracyReport()

  # Set TF random seed to improve reproducibility
  tf.set_random_seed(1234)

  if keras.backend.image_data_format() != 'channels_last':
    raise NotImplementedError("this tutorial requires keras to be configured to channels_last format")

  # Create TF session and set as Keras backend session
  sess = tf.Session()
  keras.backend.set_session(sess)

  # Get MNIST test data
  mnist = MNIST(train_start=train_start, train_end=train_end,
                test_start=test_start, test_end=test_end)
  x_train, y_train = mnist.get_set('train')
  x_test, y_test = mnist.get_set('test')

  # Get input for adversarial examples
  x_new = x_train[:1000, :, :, :]

  # Obtain Image Parameters
  img_rows, img_cols, nchannels = x_train.shape[1:4]
  nb_classes = y_train.shape[1]

  # Define input TF placeholder
  x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
  y = tf.placeholder(tf.float32, shape=(None, nb_classes))

  # Define TF model graph
  model = cnn_model(img_rows=img_rows, img_cols=img_cols,
                    channels=nchannels, nb_filters=64,
                    nb_classes=nb_classes)
  preds = model(x)
  print("Defined TensorFlow model graph.")

  def evaluate():
    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    report.clean_train_clean_eval = acc
#        assert X_test.shape[0] == test_end - test_start, X_test.shape
    print('Test accuracy on legitimate examples: %0.4f' % acc)

  # Train an MNIST model
  train_params = {
      'nb_epochs': nb_epochs,
      'batch_size': batch_size,
      'learning_rate': learning_rate,
      'train_dir': train_dir,
      'filename': filename
  }

  rng = np.random.RandomState([2017, 8, 30])
  if not os.path.exists(train_dir):
    os.mkdir(train_dir)

  ckpt = tf.train.get_checkpoint_state(train_dir)
  print(train_dir, ckpt)
  ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path
  wrap = KerasModelWrapper(model)

  if load_model and ckpt_path:
    saver = tf.train.Saver()
    print(ckpt_path)
    saver.restore(sess, ckpt_path)
    print("Model loaded from: {}".format(ckpt_path))
    evaluate()
  else:
    print("Model was not loaded, training from scratch.")
    loss = CrossEntropy(wrap, smoothing=label_smoothing)
    train(sess, loss, x_train, y_train, evaluate=evaluate,
          args=train_params, rng=rng)

  # Calculate training error
  if testing:
    eval_params = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds, x_train, y_train, args=eval_params)
    report.train_clean_train_clean_eval = acc

  # Initialize the Iterative Gradient Sign Method (IGSM) attack object and graph
  igsm = BasicIterativeMethod(wrap, sess=sess)
  igsm_params = {'eps': 0.3,
                 'clip_min': 0.,
                 'clip_max': 1.}
  adv_x = igsm.generate(x, **igsm_params)

  # Consider the attack to be constant
  adv_x = tf.stop_gradient(adv_x)
  preds_adv = model(adv_x)

  # Evaluate the accuracy of the MNIST model on adversarial examples
  eval_par = {'batch_size': batch_size}
  acc = model_eval(sess, x, y, preds_adv, x_test, y_test, args=eval_par)
  print('Test accuracy on adversarial examples: %0.4f\n' % acc)
  report.clean_train_adv_eval = acc

  # Calculating train error
  if testing:
    eval_par = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds_adv, x_train,
                     y_train, args=eval_par)
    report.train_clean_train_adv_eval = acc
  #"""
  # Generate adversarial examples
  adv_new = igsm.generate_np(x_new, **igsm_params)
  #eval_par = {'batch_size': batch_size}
  #acc = model_eval(sess, x, y, preds_adv_new, x_test, y_test, args=eval_par)
  #print('Second test accuracy on adversarial examples: %0.4f\n' % acc)

  import tiffile

  for i in range(0, 100):
    tiffile.imsave("/home/dinhtv/code/adversarial-images/igsm/adversarial/adv_%d.tif" % i, adv_new[i])
    #tiffile.imsave("/home/dinhtv/code/adversarial-images/igsm/test/adv_%d.tif" % i, adv_new[i])
  #"""
  """
  print("Repeating the process, using adversarial training")
  # Redefine TF model graph
  model_2 = cnn_model(img_rows=img_rows, img_cols=img_cols,
                      channels=nchannels, nb_filters=64,
                      nb_classes=nb_classes)
  wrap_2 = KerasModelWrapper(model_2)
  preds_2 = model_2(x)
  igsm2 = BasicIterativeMethod(wrap_2, sess=sess)

  def attack(x):
    return igsm2.generate(x, **igsm_params)

  preds_2_adv = model_2(attack(x))
  loss_2 = CrossEntropy(wrap_2, smoothing=label_smoothing, attack=attack)
  #sess.run(tf.Variable(attack(x)))

  def evaluate_2():
    # Accuracy of adversarially trained model on legitimate test inputs
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds_2, x_test, y_test,
                          args=eval_params)
    print('Test accuracy on legitimate examples: %0.4f' % accuracy)
    report.adv_train_clean_eval = accuracy

    # Accuracy of the adversarially trained model on adversarial examples
    accuracy = model_eval(sess, x, y, preds_2_adv, x_test,
                          y_test, args=eval_params)
    print('Test accuracy on adversarial examples: %0.4f' % accuracy)
    report.adv_train_adv_eval = accuracy

  # Perform and evaluate adversarial training
  train(sess, loss_2, x_train, y_train, evaluate=evaluate_2,
        args=train_params, rng=rng)

  # Calculate training errors
  if testing:
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds_2, x_train, y_train,
                          args=eval_params)
    report.train_adv_train_clean_eval = accuracy
    accuracy = model_eval(sess, x, y, preds_2_adv, x_train,
                          y_train, args=eval_params)
    report.train_adv_train_adv_eval = accuracy
  """
  return report
Example #14
0
def main(argv=None):
    """
    CIFAR10 CleverHans tutorial
    :return:
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # CIFAR10-specific dimensions
    img_rows = 32
    img_cols = 32
    channels = 3
    nb_classes = 10

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    sess = tf.Session()

    set_log_level(logging.DEBUG)

    # Get CIFAR10 test data
    X_train, Y_train, X_test, Y_test = data_cifar10()

    # Label smoothing
    assert Y_train.shape[1] == 10.

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, channels))

    y = tf.placeholder(tf.float32, shape=(None, 10))

    model_path = FLAGS.model_path

    from cnn_models import make_basic_cnn
    model = make_basic_cnn('fp_',
                           input_shape=(None, img_rows, img_cols, channels),
                           nb_filters=FLAGS.nb_filters)

    preds = model(x)
    print("Defined TensorFlow model graph with %d parameters" % model.n_params)

    rng = np.random.RandomState([2017, 8, 30])

    def evaluate(eval_params):
        # Evaluate the model on legitimate test examples
        acc = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params)
        return acc

    model_load(sess, model_path)
    print('Restored model from %s' % model_path)
    eval_params = {'batch_size': FLAGS.batch_size}
    accuracy = evaluate(eval_params)
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))

    class_names = [
        'airplane', 'auto', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse',
        'ship', 'truck'
    ]

    np.random.seed(0)
    labels = np.zeros((1, nb_classes))
    conf_ar = np.zeros(nb_classes)
    path = './cifar10_cleverhans_gen'

    attack_params = {
        'eps': 1,
        'eps_iter': FLAGS.eps_iter,
        'nb_iter': FLAGS.nb_iter,
        'clip_min': 0.,
        'clip_max': 1.
    }
    from cleverhans.attacks import BasicIterativeMethod
    attacker = BasicIterativeMethod(model, back='tf', sess=sess)

    # build a rectangle in axes coords
    left, width = .25, .5
    bottom, height = .25, .5
    right = left + width
    top = bottom + height
    fig, axes = plt.subplots(1, 10, squeeze=True, figsize=(8, 1.25))

    # generate unrecognizable adversarial examples
    for i in range(nb_classes):

        print("Generating %s" % class_names[i])
        '''
        Draw some noise from a uniform or Gaussian distribution.
        these settings are fairly arbitrary, feel free to tune the knobs

        You may also want to try:
        adv_img = np.clip(np.random.normal(
                        loc=0.5, scale=0.25, size=(1, img_rows, img_cols, channels)), 0, 1)
        '''
        adv_img = 0.5 + \
            np.random.rand(1, img_rows, img_cols, channels) / 10

        labels[0, :] = 0
        labels[0, i] = 1

        attack_params.update({'y_target': labels})
        adv_img = attacker.generate_np(adv_img, **attack_params)
        axes[i].imshow(adv_img.reshape(img_rows, img_cols, channels))
        axes[i].get_xaxis().set_visible(False)
        axes[i].get_yaxis().set_visible(False)
        if FLAGS.annot:
            ax = axes[i]
            ax.text(0.5 * (left + right),
                    1.0,
                    class_names[i],
                    horizontalalignment='center',
                    verticalalignment='bottom',
                    rotation=30,
                    transform=ax.transAxes,
                    size='larger')
            top = 0.6
        else:
            top = 1.0
        plt.tight_layout(pad=0)

    plt.subplots_adjust(left=0,
                        bottom=0,
                        right=1.0,
                        top=top,
                        wspace=0.2,
                        hspace=0.2)
    plt.show()
    sess.close()
Example #15
0
class TestBasicIterativeMethod(TestFastGradientMethod):
    def setUp(self):
        super(TestBasicIterativeMethod, self).setUp()

        self.sess = tf.Session()
        self.model = SimpleModel()
        self.attack = BasicIterativeMethod(self.model, sess=self.sess)

    def test_generate_np_gives_adversarial_example_linfinity(self):
        self.help_generate_np_gives_adversarial_example(ord=np.infty, eps=.5,
                                                        nb_iter=20)

    def test_generate_np_gives_adversarial_example_l1(self):
        self.help_generate_np_gives_adversarial_example(ord=1, eps=.5,
                                                        nb_iter=20)

    def test_generate_np_gives_adversarial_example_l2(self):
        self.help_generate_np_gives_adversarial_example(ord=2, eps=.5,
                                                        nb_iter=20)

    def test_do_not_reach_lp_boundary(self):
        """
        Make sure that iterative attack don't reach boundary of Lp
        neighbourhood if nb_iter * eps_iter is relatively small compared to
        epsilon.
        """
        for ord in [1, 2, np.infty]:
            _, _, delta = self.generate_adversarial_examples_np(
                ord=ord, eps=.5, nb_iter=10, eps_iter=.01)
            self.assertTrue(np.max(0.5 - delta) > 0.25)

    def test_attack_strength(self):
        """
        If clipping is not done at each iteration (not passing clip_min and
        clip_max to fgm), this attack fails by
        np.mean(orig_labels == new_labels) == .39.
        """
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val, eps=1.0, ord=np.inf,
                                        clip_min=0.5, clip_max=0.7,
                                        nb_iter=5)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)
        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

    def test_generate_np_does_not_cache_graph_computation_for_nb_iter(self):

        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val, eps=1.0, ord=np.inf,
                                        clip_min=-5.0, clip_max=5.0,
                                        nb_iter=10)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)
        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

        ok = [False]
        old_grads = tf.gradients

        def fn(*x, **y):
            ok[0] = True
            return old_grads(*x, **y)
        tf.gradients = fn

        x_adv = self.attack.generate_np(x_val, eps=1.0, ord=np.inf,
                                        clip_min=-5.0, clip_max=5.0,
                                        nb_iter=11)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)
        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

        tf.gradients = old_grads

        self.assertTrue(ok[0])
Example #16
0
def mnist_tutorial_jsma(train_start=0, train_end=60000, test_start=0,
                        test_end=10000, viz_enabled=VIZ_ENABLED,
                        nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                        source_samples=SOURCE_SAMPLES,
                        learning_rate=LEARNING_RATE):
  """
  MNIST tutorial for the Jacobian-based saliency map approach (JSMA)
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param viz_enabled: (boolean) activate plots of adversarial examples
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param nb_classes: number of output classes
  :param source_samples: number of test inputs to attack
  :param learning_rate: learning rate for training
  :return: an AccuracyReport object
  """
  # Object used to keep track of (and return) key accuracies
  report = AccuracyReport()

  # Set TF random seed to improve reproducibility
  tf.set_random_seed(1234)

  # Create TF session and set as Keras backend session
  sess = tf.Session()
  print("Created TensorFlow session.")

  set_log_level(logging.DEBUG)

  # Get MNIST test data
  mnist = MNIST(train_start=train_start, train_end=train_end,
                test_start=test_start, test_end=test_end)
  x_train, y_train = mnist.get_set('train')
  x_test, y_test = mnist.get_set('test')

  # Obtain Image Parameters
  img_rows, img_cols, nchannels = x_train.shape[1:4]
  nb_classes = y_train.shape[1]

  # Define input TF placeholder
  x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
  y = tf.placeholder(tf.float32, shape=(None, nb_classes))

  nb_filters = 64
  # Define TF model graph
  model = ModelBasicCNN('model1', nb_classes, nb_filters)
  preds = model.get_logits(x)
  loss = CrossEntropy(model, smoothing=0.1)
  print("Defined TensorFlow model graph.")

  ###########################################################################
  # Training the model using TensorFlow
  ###########################################################################

  # Train an MNIST model
  train_params = {
      'nb_epochs': nb_epochs,
      'batch_size': batch_size,
      'learning_rate': learning_rate
  }
  sess.run(tf.global_variables_initializer())
  rng = np.random.RandomState([2017, 8, 30])
  train(sess, loss, x_train, y_train, args=train_params, rng=rng)

  # Evaluate the accuracy of the MNIST model on legitimate test examples
  eval_params = {'batch_size': batch_size}
  accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
  assert x_test.shape[0] == test_end - test_start, x_test.shape
  print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
  report.clean_train_clean_eval = accuracy

  ###########################################################################
  # Craft adversarial examples using the Jacobian-based saliency map approach
  ###########################################################################
  print('Crafting ' + str(source_samples) + ' * ' + str(nb_classes - 1) +
        ' adversarial examples')

  # Keep track of success (adversarial example classified in target)
  results = np.zeros((nb_classes, source_samples), dtype='i')

  # Rate of perturbed features for each test set example and target class
  perturbations = np.zeros((nb_classes, source_samples), dtype='f')

  # Initialize our array for grid visualization
  grid_shape = (nb_classes, nb_classes, img_rows, img_cols, nchannels)
  grid_viz_data = np.zeros(grid_shape, dtype='f')
  from cleverhans.attacks import BasicIterativeMethod
  # Instantiate a SaliencyMapMethod attack object
  jsma = BasicIterativeMethod(model, sess=sess)
  jsma_params = {'theta': 1., 'gamma': 0.1,
                 'clip_min': 0., 'clip_max': 1.,
                 'y_target': None}

  figure = None
  # Loop over the samples we want to perturb into adversarial examples
  for sample_ind in xrange(0, source_samples):
    print('--------------------------------------')
    print('Attacking input %i/%i' % (sample_ind + 1, source_samples))
    sample = x_test[sample_ind:(sample_ind + 1)]

    # We want to find an adversarial example for each possible target class
    # (i.e. all classes that differ from the label given in the dataset)
    current_class = int(np.argmax(y_test[sample_ind]))
    target_classes = other_classes(nb_classes, current_class)

    # For the grid visualization, keep original images along the diagonal
    grid_viz_data[current_class, current_class, :, :, :] = np.reshape(
        sample, (img_rows, img_cols, nchannels))
    i=0
    # Loop over all target classes
    for target in target_classes:
      print('Generating adv. example for target class %i' % target)
      i=i+1
      # This call runs the Jacobian-based saliency map approach
      one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
      one_hot_target[0, target] = 1
      jsma_params['y_target'] = one_hot_target
      jsma_params['theta'] = 1
      adv_x = jsma.generate_np(sample)

      # Check if success was achieved
      res = int(model_argmax(sess, x, preds, adv_x) == target)

      # Compute number of modified features
      adv_x_reshape = adv_x.reshape(-1)
      test_in_reshape = x_test[sample_ind].reshape(-1)
      nb_changed = np.where(adv_x_reshape != test_in_reshape)[0].shape[0]
      percent_perturb = float(nb_changed) / adv_x.reshape(-1).shape[0]
      import matplotlib.pyplot as plt
      plt.imsave('./content/drive/My Drive/bim/'+str(i)+'.png',np.reshape(adv_x, (img_rows, img_cols, nchannels)).squeeze())
      # Display the original and adversarial images side-by-side
      if viz_enabled:
        figure = pair_visual(
            np.reshape(sample, (img_rows, img_cols, nchannels)),
            np.reshape(adv_x, (img_rows, img_cols, nchannels)), figure)

      # Add our adversarial example to our grid data
      grid_viz_data[target, current_class, :, :, :] = np.reshape(
          adv_x, (img_rows, img_cols, nchannels))

      # Update the arrays for later analysis
      results[target, sample_ind] = res
      perturbations[target, sample_ind] = percent_perturb

  print('--------------------------------------')

  # Compute the number of adversarial examples that were successfully found
  nb_targets_tried = ((nb_classes - 1) * source_samples)
  succ_rate = float(np.sum(results)) / nb_targets_tried
  print('Avg. rate of successful adv. examples {0:.4f}'.format(succ_rate))
  report.clean_train_adv_eval = 1. - succ_rate

  # Compute the average distortion introduced by the algorithm
  percent_perturbed = np.mean(perturbations)
  print('Avg. rate of perturbed features {0:.4f}'.format(percent_perturbed))

  # Compute the average distortion introduced for successful samples only
  percent_perturb_succ = np.mean(perturbations * (results == 1))
  print('Avg. rate of perturbed features for successful '
        'adversarial examples {0:.4f}'.format(percent_perturb_succ))

  # Close TF session
  sess.close()

  # Finally, block & display a grid of all the adversarial examples
  if viz_enabled:
    import matplotlib.pyplot as plt
    plt.close(figure)
    _ = grid_visual(grid_viz_data)

  return report