예제 #1
0
    def load_mnist(self):
        """Load the training data (MNIST).
        """
        # Get MNIST data
        train_start, train_end = 0, 60000
        test_start, test_end = 0, 10000
        mnist = MNIST(train_start=train_start,
                      train_end=train_end,
                      test_start=test_start,
                      test_end=test_end)

        self._x_train, self._y_train = mnist.get_set('train')
        self._x_test, self._y_test = mnist.get_set('test')

        # Use Image Parameters
        self._img_rows, self._img_cols, self._nchannels = \
             self._x_train.shape[1:4]
        self._nb_classes = self._y_train.shape[1]

        print(f"len(train): {len(self._x_train)} / {len(self._y_train)}")
        print(f"len(test):  {len(self._x_test)} / {len(self._y_test)}")
        print(
            f"img_rows x img_cols x nchannels: {self._img_rows} x {self._img_cols} x {self._nchannels}"
        )
        print(f"nb_classes: {self._nb_classes}")
예제 #2
0
def model_test(attack):
    """
    Evaluates the performances of the model over the original MNIST test set and the augmented set.

    Parameters
    ----------
    attack: str
        The augmented dataset used (either "jsma", "wjsma" or "tjsma").
    """

    mnist = MNIST(train_start=TRAIN_START, train_end=TRAIN_END, test_start=TEST_START, test_end=TEST_END)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    print("ORIGINAL MNIST TEST")

    model_testing("mnist_defense_" + attack + ".joblib", x_train, y_train, x_test, y_test)

    x_add = np.load("defense/augmented/" + attack + "_x.npy")[:AUGMENT_SIZE]
    y_add = np.load("defense/augmented/" + attack + "_y.npy")[:AUGMENT_SIZE]

    x_train = np.concatenate((x_train, x_add.reshape(x_add.shape + (1,))), axis=0).astype(np.float32)
    y_train = np.concatenate((y_train, y_add), axis=0).astype(np.float32)

    print("====================")
    print("AUGMENTED MNIST TEST")

    model_testing("mnist_defense_" + attack + ".joblib", x_train, y_train, x_test, y_test)
 def get_data(self, train_start, train_end, test_start, test_end):
     mnist = MNIST(train_start=train_start, train_end=train_end,
                   test_start=test_start, test_end=test_end)
     self.x_train, self.y_train = mnist.get_set('train')
     self.x_test, self.y_test = mnist.get_set('test')
     self.img_rows, self.img_cols, self.nchannels = self.x_train.shape[1:4]
     self.nb_classes = self.y_train.shape[1]
예제 #4
0
    def get_data(self, train_start, train_end, test_start, test_end, s0):
        mnist = MNIST(train_start=train_start, train_end=train_end,
                      test_start=test_start, test_end=test_end)
        self.x_train, self.y_train = mnist.get_set('train')
        self.x_test, self.y_test = mnist.get_set('test')
        self.img_rows, self.img_cols, self.nchannels = self.x_train.shape[1:4]
        self.nb_classes = self.y_train.shape[1]
        #print('^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^')
        #print(self.x_train.shape, self.y_train.shape, self.x_test.shape, self.y_test.shape)
        self.x_sub = self.x_test[:s0]
        self.y_sub = np.argmax(self.y_test[:s0], axis=1)

        self.x_test = self.x_test[s0:]
        self.y_test = self.y_test[s0:]
예제 #5
0
def model_test(file_name=FILE_NAME):
    """
    Evaluates the performances of the model over the MNIST dataset.

    Parameters
    ----------
    file_name: str, optional
        The name of the joblib file.
    """

    mnist = MNIST(train_start=0, train_end=60000, test_start=0, test_end=10000)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    model_testing(file_name, x_train, y_train, x_test, y_test)
예제 #6
0
def __test():
    # report = AccuracyReport()
    tf.set_random_seed(1234)
    sess = tf.Session()
    set_log_level(logging.DEBUG)

    # Get MNIST test data
    mnist = MNIST(train_start=0, train_end=60000, test_start=0, test_end=10000)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')
    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]
    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    nb_filters = 64
    # Define TF model graph
    model = ModelBasicCNN('model1', nb_classes, nb_filters)
    preds = model.get_logits(x)
    loss = CrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")
    # Train an MNIST model
    train_params = {
        'nb_epochs': NB_EPOCHS,
        'batch_size': BATCH_SIZE,
        'learning_rate': LEARNING_RATE,
        'filename': os.path.split(MODEL_PATH)[-1]
    }

    rng = np.random.RandomState([2017, 8, 30])
    # check if we've trained before, and if we have, use that pre-trained model
    if os.path.exists(model_path + ".meta"):
        tf_model_load(sess, model_path)
    else:
        train(sess, loss, x_train, y_train, args=train_params, rng=rng)
        saver = tf.train.Saver()
        saver.save(sess, model_path)
    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': BATCH_SIZE}
    accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
예제 #7
0
def train_ae(num_epochs=NUM_EPOCHS,
             batch_size=BATCH_SIZE,
             testing=False,
             learning_rate=LEARNING_RATE):

    # can use gpu
    config = tf.ConfigProto(device_count={'GPU': 1, 'CPU': 1})

    # Create TF session and set Keras backend session as TF
    sess = tf.Session(config=config)
    keras.backend.set_session(sess)

    # Get MNIST test data
    mnist = MNIST()
    x_train, y_train = mnist.get_set("train")
    x_test, y_test = mnist.get_set("test")

    # Obtain image params
    n_rows, n_cols, n_channels = x_train.shape[1:4]
    n_classes = y_train.shape[1]

    # define TF model graph
    model = DenoisingAutoencoder((n_rows, n_cols, n_channels))
    model.compile(optimizer=keras.optimizers.Adam(learning_rate), loss='mse')

    # Train an MNIST model
    model.fit(x_train,
              x_train,
              batch_size=batch_size,
              epochs=num_epochs,
              validation_data=(x_test, x_test),
              verbose=1)

    # Evaluate the accuracy on legitimate and adversarial test examples
    x_test_recon = model.predict(x_test, batch_size=batch_size, verbose=0)

    # Save model locally
    keras.models.save_model(model,
                            f"{MODEL_PATH}/autoencoder.hdf5",
                            overwrite=True,
                            include_optimizer=True)
예제 #8
0
def model_train(file_name=FILE_NAME):
    """
    Creates the joblib file of LeNet-5 trained over the MNIST dataset.

    Parameters
    ----------
    file_name: str, optional
        The name of the joblib file.
    """

    layers = [
        Conv2D(20, (5, 5), (1, 1), "VALID"),
        ReLU(),
        MaxPooling2D((2, 2), (2, 2), "VALID"),
        Conv2D(50, (5, 5), (1, 1), "VALID"),
        ReLU(),
        MaxPooling2D((2, 2), (2, 2), "VALID"),
        Flatten(),
        Linear(500),
        ReLU(),
        Linear(10),
        Softmax()
    ]

    model = MLP(layers, (None, 28, 28, 1))

    mnist = MNIST(train_start=0, train_end=60000, test_start=0, test_end=10000)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    model_training(model,
                   file_name,
                   x_train,
                   y_train,
                   x_test,
                   y_test,
                   nb_epochs=20,
                   batch_size=128,
                   learning_rate=0.001)
예제 #9
0
def model_train(attack):
    """
    Creates the joblib file of LeNet-5 trained over the augmented MNIST dataset.

    Parameters
    ----------
    attack: str
        The augmented dataset used (either "jsma", "wjsma" or "tjsma").
    """

    layers = [
        Conv2D(20, (5, 5), (1, 1), "VALID"),
        ReLU(),
        MaxPooling2D((2, 2), (2, 2), "VALID"),
        Conv2D(50, (5, 5), (1, 1), "VALID"),
        ReLU(),
        MaxPooling2D((2, 2), (2, 2), "VALID"),
        Flatten(),
        Linear(500),
        ReLU(),
        Linear(10),
        Softmax()
    ]

    model = MLP(layers, (None, 28, 28, 1))

    mnist = MNIST(train_start=TRAIN_START, train_end=TRAIN_END, test_start=TEST_START, test_end=TEST_END)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    x_add = np.load("defense/augmented/" + attack + "_x.npy")[:AUGMENT_SIZE]
    y_add = np.load("defense/augmented/" + attack + "_y.npy")[:AUGMENT_SIZE]

    x_train = np.concatenate((x_train, x_add.reshape(x_add.shape + (1,))), axis=0).astype(np.float32)
    y_train = np.concatenate((y_train, y_add), axis=0).astype(np.float32)

    model_training(model, "mnist_defense_" + attack + ".joblib", x_train, y_train, x_test, y_test, nb_epochs=NB_EPOCHS,
                   batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE)
예제 #10
0
def mnist_tutorial(train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   nb_epochs=NB_EPOCHS,
                   batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE,
                   clean_train=CLEAN_TRAIN,
                   testing=False,
                   backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                   nb_filters=NB_FILTERS,
                   num_threads=None,
                   label_smoothing=0.1):
    """
  MNIST cleverhans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param clean_train: perform normal training on clean examples only
                      before performing adversarial training.
  :param testing: if true, complete an AccuracyReport for unit tests
                  to verify that performance is adequate
  :param backprop_through_attack: If True, backprop through adversarial
                                  example construction process during
                                  adversarial training.
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {'allow_soft_placement': True}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get MNIST data
    mnist = MNIST(train_start=train_start,
                  train_end=train_end,
                  test_start=test_start,
                  test_end=test_end)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_params = {'batch_size': batch_size}
    fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
    rng = np.random.RandomState([2017, 8, 30])

    def do_eval(preds, x_set, y_set, report_key, is_adv=None):
        acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        setattr(report, report_key, acc)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

    if clean_train:
        model = ModelBasicCNN('model1', nb_classes, nb_filters)
        preds = model.get_logits(x)
        loss = CrossEntropy(model, smoothing=label_smoothing)

        def evaluate():
            do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)

        train(sess,
              loss,
              x_train,
              y_train,
              evaluate=evaluate,
              args=train_params,
              rng=rng,
              var_list=model.get_params())

        # Calculate training error
        if testing:
            do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval')

        # Initialize the Fast Gradient Sign Method (FGSM) attack object and
        # graph
        fgsm = FastGradientMethod(model, sess=sess)
        adv_fgsm_x = fgsm.generate(x, **fgsm_params)
        preds_adv_fgsm = model.get_logits(adv_fgsm_x)

        # Generate fgsm adversarial examples and save to disk
        dir = 'images/fgsm_adv/'
        if not os.path.exists('images'):
            os.mkdir('images')
        if not os.path.exists(dir):
            os.mkdir(dir)
        if not os.path.exists(dir + 'train/'):
            os.mkdir(dir + 'train/')
        if not os.path.exists(dir + 'test/'):
            os.mkdir(dir + 'test/')
        for index in range(len(y_test)):
            print('test ' + str(index))
            x_ = x_test[index]
            label = np.argmax(y_test[index])
            raw_data = (fgsm.generate_np(x_.reshape(
                (1, 28, 28, 1)), **fgsm_params).reshape(
                    (28, 28)) * 255).astype('uint8')
            im = Image.fromarray(raw_data, mode='P')
            im.save(dir + 'test/' + str(label) + '_' + str(uuid.uuid4()) +
                    '.png')
        for index in range(len(y_train)):
            print('train ' + str(index))
            x_ = x_train[index]
            label = np.argmax(y_train[index])
            raw_data = (fgsm.generate_np(x_.reshape(
                (1, 28, 28, 1)), **fgsm_params).reshape(
                    (28, 28)) * 255).astype('uint8')
            im = Image.fromarray(raw_data, mode='P')
            im.save(dir + 'train/' + str(label) + '_' + str(uuid.uuid4()) +
                    '.png')

    return report
예제 #11
0
def mnist_tutorial_fgsm(train_start=0, train_end=60000, test_start=0,
                      test_end=10000, viz_enabled=VIZ_ENABLED,
                      nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                      source_samples=SOURCE_SAMPLES,
                      learning_rate=LEARNING_RATE,
                      attack_iterations=ATTACK_ITERATIONS,
                      model_path=MODEL_PATH,
                      targeted=TARGETED,
                      noise_output=NOISE_OUTPUT):
  """
  MNIST tutorial for Fast Gradient Method's attack
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param viz_enabled: (boolean) activate plots of adversarial examples
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param nb_classes: number of output classes
  :param source_samples: number of test inputs to attack
  :param learning_rate: learning rate for training
  :param model_path: path to the model file
  :param targeted: should we run a targeted attack? or untargeted?
  :return: an AccuracyReport object
  """
  # Object used to keep track of (and return) key accuracies
  report = AccuracyReport()

  # Set TF random seed to improve reproducibility
  tf.set_random_seed(1234)

  # Create TF session
  sess = tf.Session()
  print("Created TensorFlow session.")

  set_log_level(logging.DEBUG)

  # Get MNIST test data
  mnist = MNIST(train_start=train_start, train_end=train_end,
                test_start=test_start, test_end=test_end)
  x_train, y_train = mnist.get_set('train')
  x_test, y_test = mnist.get_set('test')

  # Obtain Image Parameters
  img_rows, img_cols, nchannels = x_train.shape[1:4]
  nb_classes = y_train.shape[1]

  # Define input TF placeholder
  x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
  y = tf.placeholder(tf.float32, shape=(None, nb_classes))
  nb_filters = 64

  # Define TF model graph
  model = ModelBasicCNN('model1', nb_classes, nb_filters)
  preds = model.get_logits(x)
  loss = CrossEntropy(model, smoothing=0.1)
  print("Defined TensorFlow model graph.")

  ###########################################################################
  # Training the model using TensorFlow
  ###########################################################################

  # Train an MNIST model
  train_params = {
      'nb_epochs': nb_epochs,
      'batch_size': batch_size,
      'learning_rate': learning_rate,
      'filename': os.path.split(model_path)[-1]
  }

  rng = np.random.RandomState([2017, 8, 30])
  # check if we've trained before, and if we have, use that pre-trained model
  if os.path.exists(model_path + ".meta"):
    tf_model_load(sess, model_path)
  else:
    train(sess, loss, x_train, y_train, args=train_params, rng=rng)
    saver = tf.train.Saver()
    saver.save(sess, model_path)

  # Evaluate the accuracy of the MNIST model on legitimate test examples
  eval_params = {'batch_size': batch_size}
  accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
  assert x_test.shape[0] == test_end - test_start, x_test.shape
  print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
  report.clean_train_clean_eval = accuracy

  ###########################################################################
  # Craft adversarial examples using Carlini and Wagner's approach
  ###########################################################################
  nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
  print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
        ' adversarial examples')
  print("This could take some time ...")

  # Instantiate a FGSM attack object
  fgsm = FastGradientMethod(model, sess=sess)

  if viz_enabled:
    assert source_samples == nb_classes
    idxs = [np.where(np.argmax(y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)]
  if targeted:
    if viz_enabled:
      # Initialize our array for grid visualization
      grid_shape = (nb_classes, 1, img_rows, img_cols,
                    nchannels)
      grid_viz_data = np.zeros(grid_shape, dtype='f')

      adv_inputs = np.array(
          [[instance] * nb_classes for instance in x_test[idxs]],
          dtype=np.float32)
    else:
      adv_inputs = np.array(
          [[instance] * nb_classes for
           instance in x_test[:source_samples]], dtype=np.float32)

    one_hot = np.zeros((nb_classes, nb_classes))
    one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

    adv_inputs = adv_inputs.reshape(
        (source_samples * nb_classes, img_rows, img_cols, nchannels))
    adv_ys = np.array([one_hot] * source_samples,
                      dtype=np.float32).reshape((source_samples *
                                                 nb_classes, nb_classes))
    yname = "y_target"
  else:
    if viz_enabled:
      # Initialize our array for grid visualization
      grid_shape = (nb_classes, nb_classes, img_rows, img_cols, nchannels)
      grid_viz_data = np.zeros(grid_shape, dtype='f')

      adv_inputs = x_test[idxs]
    else:
      adv_inputs = x_test[:source_samples]

    adv_ys = None
    yname = "y"

  if targeted:
    fgsm_params_batch_size = source_samples * nb_classes
  else:
    fgsm_params_batch_size = source_samples
  fgsm_params = {'eps': 0.3,
                 'clip_min': 0.,
                 'clip_max': 1.}

  adv = fgsm.generate_np(adv_inputs,
                       **fgsm_params)

  eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
  if targeted:
    adv_accuracy = model_eval(
        sess, x, y, preds, adv, adv_ys, args=eval_params)
  else:
    if viz_enabled:
      err = model_eval(sess, x, y, preds, adv, y_test[idxs], args=eval_params)
      adv_accuracy = 1 - err
    else:
      err = model_eval(sess, x, y, preds, adv, y_test[:source_samples],
                       args=eval_params)
      adv_accuracy = 1 - err

  if viz_enabled:
    for i in range(nb_classes):
      if noise_output:
        image = adv[i * nb_classes] - adv_inputs[i * nb_classes]
      else:
        image = adv[i * nb_classes]
      grid_viz_data[i, 0] = image

  print('--------------------------------------')

  # Compute the number of adversarial examples that were successfully found
  print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy))
  report.clean_train_adv_eval = 1. - adv_accuracy

  # Compute the average distortion introduced by the algorithm
  percent_perturbed = np.mean(np.sum((adv - adv_inputs)**2,
                                     axis=(1, 2, 3))**.5)
  print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))


  ###########################################################################
  # Adversarial Training
  ###########################################################################

  model2 = ModelBasicCNN('model2', nb_classes, nb_filters) 
  
  fgsm2 = FastGradientMethod(model2, sess=sess)

  def attack_fgsm(x):
    return fgsm2.generate(adv_inputs, **fgsm_params)

  preds2 = model2.get_logits(x)
  loss2 = CrossEntropy(model2, smoothing=0.1, attack=attack_fgsm)

  train(sess, loss2, x_train, y_train, args=train_params, rng=rng)
  eval_params = {'batch_size': batch_size}
  accuracy = model_eval(sess, x, y, preds2, x_test, y_test, args=eval_params)
  assert x_test.shape[0] == test_end - test_start, x_test.shape
  print('Test accuracy on adversarial fgsm test examples: {0}'.format(accuracy))
  report.clean_train_clean_eval = accuracy
  print("Defined TensorFlow model graph.")

  eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
  if targeted:
    adv_accuracy = model_eval(
        sess, x, y, preds, adv, adv_ys, args=eval_params)
  else:
    if viz_enabled:
      err = model_eval(sess, x, y, preds, adv, y_test[idxs], args=eval_params)
      adv_accuracy = 1 - err
    else:
      err = model_eval(sess, x, y, preds, adv, y_test[:source_samples],
                       args=eval_params)
      adv_accuracy = 1 - err

  if viz_enabled:
    for i in range(nb_classes):
      if noise_output:
        image = adv[i * nb_classes] - adv_inputs[i * nb_classes]
      else:
        image = adv[i * nb_classes]
      grid_viz_data[i, 0] = image

  print('--------------------------------------')

  # Compute the number of adversarial examples that were successfully found
  print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy))
  report.clean_train_adv_eval = 1. - adv_accuracy

  # Compute the average distortion introduced by the algorithm
  percent_perturbed = np.mean(np.sum((adv - adv_inputs)**2,
                                     axis=(1, 2, 3))**.5)
  print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

  # Close TF session
  sess.close()
  def save_visual(data, path):
    """
    Modified version of cleverhans.plot.pyplot
    """
    figure = plt.figure()
    # figure.canvas.set_window_title('Cleverhans: Grid Visualization')

    # Add the images to the plot
    num_cols = data.shape[0]
    num_rows = data.shape[1]
    num_channels = data.shape[4]
    for y in range(num_rows):
      for x in range(num_cols):
        figure.add_subplot(num_rows, num_cols, (x + 1) + (y * num_cols))
        plt.axis('off')

        if num_channels == 1:
          plt.imshow(data[x, y, :, :, 0], cmap='gray')
        else:
          plt.imshow(data[x, y, :, :, :])

    # Draw the plot and return
    plt.savefig(path)
    return figure

  # Finally, block & display a grid of all the adversarial examples
  if viz_enabled:
    # _ = grid_visual(grid_viz_data)
    # cleverhans_image.save("output", grid_viz_data)
    if noise_output:
      image_name = "output/fgsm_mnist_noise.png"
    else:
      image_name = "output/fgsm_mnist.png"
    _ = save_visual(grid_viz_data, image_name)

  return report
def evaluate_model(filepath,
                   train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   batch_size=128,
                   testing=False,
                   num_threads=None):
    """
  Run evaluation on a saved model
  :param filepath: path to model to evaluate
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param batch_size: size of evaluation batches
  """

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.INFO)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get MNIST test data
    mnist = MNIST(train_start=train_start,
                  train_end=train_end,
                  test_start=test_start,
                  test_end=test_end)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    eval_params = {'batch_size': batch_size}
    fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}

    def do_eval(preds, x_set, y_set, report_key, is_adv=None):
        acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

    with sess.as_default():
        model = load(filepath)
    assert len(model.get_params()) > 0

    # Initialize the Fast Gradient Sign Method (FGSM) attack object and
    # graph
    fgsm = FastGradientMethod(model, sess=sess)
    adv_x = fgsm.generate(x, **fgsm_params)
    preds_adv = model.get_logits(adv_x)
    preds = model.get_logits(x)

    # Evaluate the accuracy of the MNIST model on adversarial examples
    do_eval(preds, x_test, y_test, 'train_clean_train_clean_eval', False)
    do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True)
def mnist_tutorial_cw(train_start=0,
                      train_end=60000,
                      test_start=0,
                      test_end=10000,
                      viz_enabled=VIZ_ENABLED,
                      nb_epochs=NB_EPOCHS,
                      batch_size=BATCH_SIZE,
                      source_samples=SOURCE_SAMPLES,
                      learning_rate=LEARNING_RATE,
                      attack_iterations=ATTACK_ITERATIONS,
                      model_path=MODEL_PATH,
                      targeted=TARGETED):
    """
  MNIST tutorial for Carlini and Wagner's attack
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param viz_enabled: (boolean) activate plots of adversarial examples
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param nb_classes: number of output classes
  :param source_samples: number of test inputs to attack
  :param learning_rate: learning rate for training
  :param model_path: path to the model file
  :param targeted: should we run a targeted attack? or untargeted?
  :return: an AccuracyReport object
  """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    mnist = MNIST(train_start=train_start,
                  train_end=train_end,
                  test_start=test_start,
                  test_end=test_end)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    nb_filters = 64

    # Define TF model graph
    model = ModelBasicCNN('model1', nb_classes, nb_filters)
    preds = model.get_logits(x)
    loss = CrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'filename': os.path.split(model_path)[-1]
    }

    rng = np.random.RandomState([2017, 8, 30])
    # check if we've trained before, and if we have, use that pre-trained model
    if os.path.exists(model_path + ".meta"):
        tf_model_load(sess, model_path)
    else:
        train(sess, loss, x_train, y_train, args=train_params, rng=rng)
        saver = tf.train.Saver()
        saver.save(sess, model_path)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using Carlini and Wagner's approach
    ###########################################################################
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
    print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
          ' adversarial examples')
    print("This could take some time ...")

    # Instantiate a CW attack object
    cw = CarliniWagnerL2(model, sess=sess)

    if viz_enabled:
        assert source_samples == nb_classes
        idxs = [
            np.where(np.argmax(y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)
        ]
    if targeted:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, nb_classes, img_rows, img_cols,
                          nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = np.array([[instance] * nb_classes
                                   for instance in x_test[idxs]],
                                  dtype=np.float32)
        else:
            adv_inputs = np.array([[instance] * nb_classes
                                   for instance in x_test[:source_samples]],
                                  dtype=np.float32)

        one_hot = np.zeros((nb_classes, nb_classes))
        one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

        adv_inputs = adv_inputs.reshape(
            (source_samples * nb_classes, img_rows, img_cols, nchannels))
        adv_ys = np.array([one_hot] * source_samples,
                          dtype=np.float32).reshape(
                              (source_samples * nb_classes, nb_classes))
        yname = "y_target"
    else:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, 2, img_rows, img_cols, nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = x_test[idxs]
        else:
            adv_inputs = x_test[:source_samples]

        adv_ys = None
        yname = "y"

    if targeted:
        cw_params_batch_size = source_samples * nb_classes
    else:
        cw_params_batch_size = source_samples
    cw_params = {
        'binary_search_steps': 1,
        yname: adv_ys,
        'max_iterations': attack_iterations,
        'learning_rate': CW_LEARNING_RATE,
        'batch_size': cw_params_batch_size,
        'initial_const': 10
    }

    adv = cw.generate_np(adv_inputs, **cw_params)

    eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
    if targeted:
        adv_accuracy = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  adv,
                                  adv_ys,
                                  args=eval_params)
    else:
        if viz_enabled:
            err = model_eval(sess,
                             x,
                             y,
                             preds,
                             adv,
                             y_test[idxs],
                             args=eval_params)
            adv_accuracy = 1 - err
        else:
            err = model_eval(sess,
                             x,
                             y,
                             preds,
                             adv,
                             y_test[:source_samples],
                             args=eval_params)
            adv_accuracy = 1 - err

    if viz_enabled:
        for j in range(nb_classes):
            if targeted:
                for i in range(nb_classes):
                    grid_viz_data[i, j] = adv[i * nb_classes + j]
            else:
                grid_viz_data[j, 0] = adv_inputs[j]
                grid_viz_data[j, 1] = adv[j]

        print(grid_viz_data.shape)

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy))
    report.clean_train_adv_eval = 1. - adv_accuracy

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(
        np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        _ = grid_visual(grid_viz_data)

    return report
예제 #14
0
def mnist_tutorial_jsma(train_start=0, train_end=60000, test_start=0,
                        test_end=10000, viz_enabled=VIZ_ENABLED,
                        nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                        source_samples=SOURCE_SAMPLES,
                        learning_rate=LEARNING_RATE):
  """
  MNIST tutorial for the Jacobian-based saliency map approach (JSMA)
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param viz_enabled: (boolean) activate plots of adversarial examples
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param nb_classes: number of output classes
  :param source_samples: number of test inputs to attack
  :param learning_rate: learning rate for training
  :return: an AccuracyReport object
  """
  # Object used to keep track of (and return) key accuracies
  report = AccuracyReport()

  # Set TF random seed to improve reproducibility
  tf.set_random_seed(1234)

  # Create TF session and set as Keras backend session
  sess = tf.Session()
  print("Created TensorFlow session.")

  set_log_level(logging.DEBUG)

  # Get MNIST test data
  mnist = MNIST(train_start=train_start, train_end=train_end,
                test_start=test_start, test_end=test_end)
  x_train, y_train = mnist.get_set('train')
  x_test, y_test = mnist.get_set('test')

  # Obtain Image Parameters
  img_rows, img_cols, nchannels = x_train.shape[1:4]
  nb_classes = y_train.shape[1]

  # Define input TF placeholder
  x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
  y = tf.placeholder(tf.float32, shape=(None, nb_classes))

  nb_filters = 64
  # Define TF model graph
  model = ModelBasicCNN('model1', nb_classes, nb_filters)
  preds = model.get_logits(x)
  loss = CrossEntropy(model, smoothing=0.1)
  print("Defined TensorFlow model graph.")

  ###########################################################################
  # Training the model using TensorFlow
  ###########################################################################

  # Train an MNIST model
  train_params = {
      'nb_epochs': nb_epochs,
      'batch_size': batch_size,
      'learning_rate': learning_rate
  }
  sess.run(tf.global_variables_initializer())
  rng = np.random.RandomState([2017, 8, 30])
  train(sess, loss, x_train, y_train, args=train_params, rng=rng)

  # Evaluate the accuracy of the MNIST model on legitimate test examples
  eval_params = {'batch_size': batch_size}
  accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
  assert x_test.shape[0] == test_end - test_start, x_test.shape
  print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
  report.clean_train_clean_eval = accuracy

  ###########################################################################
  # Craft adversarial examples using the Jacobian-based saliency map approach
  ###########################################################################
  print('Crafting ' + str(source_samples) + ' * ' + str(nb_classes - 1) +
        ' adversarial examples')

  # Keep track of success (adversarial example classified in target)
  results = np.zeros((nb_classes, source_samples), dtype='i')

  # Rate of perturbed features for each test set example and target class
  perturbations = np.zeros((nb_classes, source_samples), dtype='f')

  # Initialize our array for grid visualization
  grid_shape = (nb_classes, nb_classes, img_rows, img_cols, nchannels)
  grid_viz_data = np.zeros(grid_shape, dtype='f')

  # Instantiate a SaliencyMapMethod attack object
  jsma = SaliencyMapMethod(model, sess=sess)
  jsma_params = {'theta': 1., 'gamma': 0.1,
                 'clip_min': 0., 'clip_max': 1.,
                 'y_target': None}

  figure = None
  # Loop over the samples we want to perturb into adversarial examples
  for sample_ind in xrange(0, source_samples):
    print('--------------------------------------')
    print('Attacking input %i/%i' % (sample_ind + 1, source_samples))
    sample = x_test[sample_ind:(sample_ind + 1)]

    # We want to find an adversarial example for each possible target class
    # (i.e. all classes that differ from the label given in the dataset)
    current_class = int(np.argmax(y_test[sample_ind]))
    target_classes = other_classes(nb_classes, current_class)

    # For the grid visualization, keep original images along the diagonal
    grid_viz_data[current_class, current_class, :, :, :] = np.reshape(
        sample, (img_rows, img_cols, nchannels))

    # Loop over all target classes
    for target in target_classes:
      print('Generating adv. example for target class %i' % target)

      # This call runs the Jacobian-based saliency map approach
      one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
      one_hot_target[0, target] = 1
      jsma_params['y_target'] = one_hot_target
      adv_x = jsma.generate_np(sample, **jsma_params)

      # Check if success was achieved
      res = int(model_argmax(sess, x, preds, adv_x) == target)

      # Computer number of modified features
      adv_x_reshape = adv_x.reshape(-1)
      test_in_reshape = x_test[sample_ind].reshape(-1)
      nb_changed = np.where(adv_x_reshape != test_in_reshape)[0].shape[0]
      percent_perturb = float(nb_changed) / adv_x.reshape(-1).shape[0]

      # Display the original and adversarial images side-by-side
      if viz_enabled:
        figure = pair_visual(
            np.reshape(sample, (img_rows, img_cols, nchannels)),
            np.reshape(adv_x, (img_rows, img_cols, nchannels)), figure)

      # Add our adversarial example to our grid data
      grid_viz_data[target, current_class, :, :, :] = np.reshape(
          adv_x, (img_rows, img_cols, nchannels))

      # Update the arrays for later analysis
      results[target, sample_ind] = res
      perturbations[target, sample_ind] = percent_perturb

  print('--------------------------------------')

  # Compute the number of adversarial examples that were successfully found
  nb_targets_tried = ((nb_classes - 1) * source_samples)
  succ_rate = float(np.sum(results)) / nb_targets_tried
  print('Avg. rate of successful adv. examples {0:.4f}'.format(succ_rate))
  report.clean_train_adv_eval = 1. - succ_rate

  # Compute the average distortion introduced by the algorithm
  percent_perturbed = np.mean(perturbations)
  print('Avg. rate of perturbed features {0:.4f}'.format(percent_perturbed))

  # Compute the average distortion introduced for successful samples only
  percent_perturb_succ = np.mean(perturbations * (results == 1))
  print('Avg. rate of perturbed features for successful '
        'adversarial examples {0:.4f}'.format(percent_perturb_succ))

  # Close TF session
  sess.close()

  # Finally, block & display a grid of all the adversarial examples
  if viz_enabled:
    import matplotlib.pyplot as plt
    plt.close(figure)
    _ = grid_visual(grid_viz_data)

  return report
####Fetch Data#####
tf.reset_default_graph()

tf.set_random_seed(1234)
sess = tf.Session()

train_start = 0
train_end = 1000
test_start = 1001
test_end = 1200

mnist = MNIST(train_start=train_start,
              train_end=train_end,
              test_start=test_start,
              test_end=test_end)
x_train, y_train = mnist.get_set('train')
x_test, y_test = mnist.get_set('test')
img_rows, img_cols, nchannels = x_train.shape[1:4]

x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
y = tf.placeholder(tf.float32, shape=(None, 10))

nb_classes = 10  #? Y_train.shape[1]
nb_filters = 64  #?

model = ResNet(scope="model1", nb_classes=nb_classes, nb_filters=nb_filters)
preds = model.get_logits(x)
loss = CrossEntropy(model, smoothing=0.1)
print("Defined TensorFlow model graph.")

loss = CrossEntropy(model, smoothing=0.1)
예제 #16
0
def mnist_tutorial(train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   nb_epochs=NB_EPOCHS,
                   batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE,
                   train_dir=TRAIN_DIR,
                   filename=FILENAME,
                   load_model=LOAD_MODEL,
                   testing=False,
                   label_smoothing=0.1):
    """
  MNIST CleverHans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param train_dir: Directory storing the saved model
  :param filename: Filename to save model under
  :param load_model: True for load, False for not load
  :param testing: if true, test error is calculated
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """
    keras.layers.core.K.set_learning_phase(0)

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    if not hasattr(backend, "tf"):
        raise RuntimeError("This tutorial requires keras to be configured"
                           " to use the TensorFlow backend.")

    if keras.backend.image_dim_ordering() != 'tf':
        keras.backend.set_image_dim_ordering('tf')
        print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to "
              "'th', temporarily setting to 'tf'")

    # Create TF session and set as Keras backend session
    os.environ["CUDA_VISIBLE_DEVICES"] = '0'  # only use No.0 GPU
    config = tf.ConfigProto()
    config.allow_soft_placement = True
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    keras.backend.set_session(sess)

    # Get MNIST test data
    mnist = MNIST(train_start=train_start,
                  train_end=train_end,
                  test_start=test_start,
                  test_end=test_end)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Define TF model graph
    model = cnn_model(img_rows=img_rows,
                      img_cols=img_cols,
                      channels=nchannels,
                      nb_filters=64,
                      nb_classes=nb_classes)
    preds = model(x)
    print("Defined TensorFlow model graph.")

    def evaluate():
        # Evaluate the accuracy of the MNIST model on legitimate test examples
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
        report.clean_train_clean_eval = acc
        #        assert X_test.shape[0] == test_end - test_start, X_test.shape
        print('Test accuracy on legitimate examples: %0.4f' % acc)

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': train_dir,
        'filename': filename
    }

    rng = np.random.RandomState([2017, 8, 30])
    if not os.path.exists(train_dir):
        os.mkdir(train_dir)

    ckpt = tf.train.get_checkpoint_state(train_dir)
    print(train_dir, ckpt)
    ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path
    wrap = KerasModelWrapper(model)

    if load_model and ckpt_path:
        saver = tf.train.Saver()
        print(ckpt_path)
        saver.restore(sess, ckpt_path)
        print("Model loaded from: {}".format(ckpt_path))
        evaluate()
    else:
        print("Model was not loaded, training from scratch.")
        loss = CrossEntropy(wrap, smoothing=label_smoothing)
        train(sess,
              loss,
              x_train,
              y_train,
              evaluate=evaluate,
              args=train_params,
              rng=rng)
        saver = tf.train.Saver(max_to_keep=1)
        saver.save(sess,
                   '{}/mnist.ckpt'.format(train_dir),
                   global_step=NB_EPOCHS)
        print("model has been saved")

    # Calculate training error
    if testing:
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds, x_train, y_train, args=eval_params)
        report.train_clean_train_clean_eval = acc

    # Initialize the Basic Iterative Method (BIM) attack object and graph
    lbfgs = LBFGS(wrap, sess=sess)

    # targeted attack, targeted class is 1
    y_target = np.ones(128)
    y_target = keras.utils.to_categorical(y_target, num_classes=10)
    y_target = tf.Variable(y_target)
    sess.run(tf.global_variables_initializer())
    lbfgs_params = {'y_target': y_target, 'batch_size': 128}

    adv_x = lbfgs.generate(x, **lbfgs_params)
    # Consider the attack to be constant
    adv_x = tf.stop_gradient(adv_x)
    preds_adv = model(adv_x)

    # Evaluate the accuracy of the MNIST model on adversarial examples
    eval_par = {'batch_size': batch_size}
    start_time = time.time()
    acc = model_eval(sess, x, y, preds_adv, x_test, y_test, args=eval_par)
    print('Test accuracy on adversarial examples: %0.4f\n' % acc)
    end_time = time.time()
    print("L-BFGS attack time is {}".format(end_time - start_time))
    report.clean_train_adv_eval = acc

    # Calculating train error
    if testing:
        eval_par = {'batch_size': batch_size}
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv,
                         x_train,
                         y_train,
                         args=eval_par)
        report.train_clean_train_adv_eval = acc

    gc.collect()

    return report
예제 #17
0
def mnist_tutorial(train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   nb_epochs=NB_EPOCHS,
                   batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE,
                   train_dir=TRAIN_DIR,
                   filename=FILENAME,
                   load_model=LOAD_MODEL,
                   testing=False,
                   label_smoothing=0.1):
    """
  MNIST CleverHans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param train_dir: Directory storing the saved model
  :param filename: Filename to save model under
  :param load_model: True for load, False for not load
  :param testing: if true, test error is calculated
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """
    tf.keras.backend.set_learning_phase(0)

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    if keras.backend.image_data_format() != 'channels_last':
        raise NotImplementedError(
            "this tutorial requires keras to be configured to channels_last format"
        )

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)

    # Get MNIST test data
    mnist = MNIST(train_start=train_start,
                  train_end=train_end,
                  test_start=test_start,
                  test_end=test_end)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Define TF model graph
    model = cnn_model(img_rows=img_rows,
                      img_cols=img_cols,
                      channels=nchannels,
                      nb_filters=64,
                      nb_classes=nb_classes)
    preds = model(x)
    print("Defined TensorFlow model graph.")

    def evaluate():
        # Evaluate the accuracy of the MNIST model on legitimate test examples
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
        report.clean_train_clean_eval = acc
        #        assert X_test.shape[0] == test_end - test_start, X_test.shape
        print('Test accuracy on legitimate examples: %0.4f' % acc)

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': train_dir,
        'filename': filename
    }

    rng = np.random.RandomState([2017, 8, 30])
    if not os.path.exists(train_dir):
        os.mkdir(train_dir)

    ckpt = tf.train.get_checkpoint_state(train_dir)
    print(train_dir, ckpt)
    ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path
    wrap = KerasModelWrapper(model)

    if load_model and ckpt_path:
        saver = tf.train.Saver()
        print(ckpt_path)
        saver.restore(sess, ckpt_path)
        print("Model loaded from: {}".format(ckpt_path))
        evaluate()
    else:
        print("Model was not loaded, training from scratch.")
        loss = CrossEntropy(wrap, smoothing=label_smoothing)
        train(sess,
              loss,
              x_train,
              y_train,
              evaluate=evaluate,
              args=train_params,
              rng=rng)

    # Calculate training error
    if testing:
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds, x_train, y_train, args=eval_params)
        report.train_clean_train_clean_eval = acc

    # Initialize the Fast Gradient Sign Method (FGSM) attack object and graph
    fgsm = FastGradientMethod(wrap, sess=sess)
    fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
    adv_x = fgsm.generate(x, **fgsm_params)
    # Consider the attack to be constant
    adv_x = tf.stop_gradient(adv_x)
    preds_adv = model(adv_x)

    # Evaluate the accuracy of the MNIST model on adversarial examples
    eval_par = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds_adv, x_test, y_test, args=eval_par)
    print('Test accuracy on adversarial examples: %0.4f\n' % acc)
    report.clean_train_adv_eval = acc

    # Calculating train error
    if testing:
        eval_par = {'batch_size': batch_size}
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv,
                         x_train,
                         y_train,
                         args=eval_par)
        report.train_clean_train_adv_eval = acc

    print("Repeating the process, using adversarial training")
    # Redefine TF model graph
    model_2 = cnn_model(img_rows=img_rows,
                        img_cols=img_cols,
                        channels=nchannels,
                        nb_filters=64,
                        nb_classes=nb_classes)
    wrap_2 = KerasModelWrapper(model_2)
    preds_2 = model_2(x)
    fgsm2 = FastGradientMethod(wrap_2, sess=sess)

    def attack(x):
        return fgsm2.generate(x, **fgsm_params)

    preds_2_adv = model_2(attack(x))
    loss_2 = CrossEntropy(wrap_2, smoothing=label_smoothing, attack=attack)

    def evaluate_2():
        # Accuracy of adversarially trained model on legitimate test inputs
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2,
                              x_test,
                              y_test,
                              args=eval_params)
        print('Test accuracy on legitimate examples: %0.4f' % accuracy)
        report.adv_train_clean_eval = accuracy

        # Accuracy of the adversarially trained model on adversarial examples
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2_adv,
                              x_test,
                              y_test,
                              args=eval_params)
        print('Test accuracy on adversarial examples: %0.4f' % accuracy)
        report.adv_train_adv_eval = accuracy

    # Perform and evaluate adversarial training
    train(sess,
          loss_2,
          x_train,
          y_train,
          evaluate=evaluate_2,
          args=train_params,
          rng=rng)

    # Calculate training errors
    if testing:
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2,
                              x_train,
                              y_train,
                              args=eval_params)
        report.train_adv_train_clean_eval = accuracy
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2_adv,
                              x_train,
                              y_train,
                              args=eval_params)
        report.train_adv_train_adv_eval = accuracy

    return report
예제 #18
0
def run_mnist_adv(
        num_epochs=NUM_EPOCHS,
        batch_size=BATCH_SIZE,
        learning_rate=LEARNING_RATE,
        test_ae=False,  # Test CNN with AE preprocessing
        test_cae=False,  # test CNN with CAE preprocessing
        test_dae=False,  # test CNN with DAE preprocessing
        test_stacked_dae=False,  # test CNN with Stacked DAE preprocessing
        v_noises=[0.1, 0.2, 0.3, 0.4, 0.5],
        lambdas=[1e-5, 1e-4, 1e-3, 1e-2, 1e-1],
        num_stacks=3):

    # ======================================================================
    # General Setup
    # ======================================================================

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # can use gpu
    config = tf.ConfigProto(device_count={'GPU': 1, 'CPU': 1})

    # Create TF session and set Keras backend session as TF
    sess = tf.Session(config=config)
    keras.backend.set_session(sess)

    # Get MNIST test data
    mnist = MNIST()
    x_train, y_train = mnist.get_set("train")
    x_test, y_test = mnist.get_set("test")

    # Obtain image params
    n_rows, n_cols, n_channels = x_train.shape[1:4]
    n_classes = y_train.shape[1]

    cnn_name = "cnn"

    # ======================================================================
    # Test with AE
    # ======================================================================
    if test_ae:
        ae_model = DenoisingAutoencoder((n_rows, n_cols, n_channels))
        ae_model.load_weights(f"{MODEL_PATH}/autoencoder.hdf5", by_name=False)

        final_out = ConvNet((n_rows, n_cols, n_channels),
                            n_classes,
                            concat=True,
                            concat_layer=ae_model.output)

        combined_model = Model(inputs=ae_model.input, outputs=final_out)

        cnn_model = ConvNet((n_rows, n_cols, n_channels), n_classes)
        cnn_model.load_weights(f"{MODEL_PATH}/{cnn_name}.hdf5", by_name=False)

        num_ae_layers = len(ae_model.layers)
        num_cnn_layers = len(cnn_model.layers)
        for i in range(len(combined_model.layers)):
            if i < num_ae_layers:
                weights = ae_model.layers[i].get_weights()
                combined_model.layers[i].set_weights(weights)
            else:
                weights = cnn_model.layers[i - num_ae_layers + 1].get_weights()
                combined_model.layers[i].set_weights(weights)

        combined_model(combined_model.input)
        wrap = KerasModelWrapper(combined_model)
        fgsm = FastGradientMethod(wrap, sess=sess)
        fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
        adv_acc_metric = get_adversarial_acc_metric(combined_model, fgsm,
                                                    fgsm_params)
        combined_model.compile(optimizer=keras.optimizers.Adam(learning_rate),
                               loss='categorical_crossentropy',
                               metrics=['accuracy', adv_acc_metric])

        _, acc, adv_acc = combined_model.evaluate(x_test,
                                                  y_test,
                                                  batch_size=batch_size,
                                                  verbose=0)

        print('Test accuracy on legitimate examples: %0.4f' % acc)
        print('Test accuracy on adversarial examples: %0.4f\n' % adv_acc)

        np.savetxt("ae_accuracies_whitebox.npy", np.array([acc, adv_acc]))

    # ======================================================================
    # Test with CAE
    # ======================================================================
    if test_cae:
        cae_adv_accuracies = []
        cae_accuracies = []
        for lam in lambdas:
            cae_model = ContractiveAutoencoder((n_rows, n_cols, n_channels))
            cae_model.load_weights(
                f"{MODEL_PATH}/contractive_autoencoder_{lam}.hdf5",
                by_name=False)

            final_out = ConvNet((n_rows, n_cols, n_channels),
                                n_classes,
                                concat=True,
                                concat_layer=cae_model.output)

            combined_model = Model(inputs=cae_model.input, outputs=final_out)

            cnn_model = ConvNet((n_rows, n_cols, n_channels), n_classes)
            cnn_model.load_weights(f"{MODEL_PATH}/{cnn_name}.hdf5",
                                   by_name=False)

            num_cae_layers = len(cae_model.layers)
            num_cnn_layers = len(cnn_model.layers)
            for i in range(len(combined_model.layers)):
                if i < num_cae_layers:
                    weights = cae_model.layers[i].get_weights()
                    combined_model.layers[i].set_weights(weights)
                else:
                    weights = cnn_model.layers[i - num_cae_layers +
                                               1].get_weights()
                    combined_model.layers[i].set_weights(weights)

            combined_model(combined_model.input)
            wrap = KerasModelWrapper(combined_model)
            fgsm = FastGradientMethod(wrap, sess=sess)
            fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
            adv_acc_metric = get_adversarial_acc_metric(
                combined_model, fgsm, fgsm_params)
            combined_model.compile(
                optimizer=keras.optimizers.Adam(learning_rate),
                loss='categorical_crossentropy',
                metrics=['accuracy', adv_acc_metric])

            _, acc, adv_acc = combined_model.evaluate(x_test,
                                                      y_test,
                                                      batch_size=batch_size,
                                                      verbose=0)

            cae_accuracies.append(acc)
            cae_adv_accuracies.append(adv_acc)
            print(f"Lambda = {lam}")
            print('Test accuracy on legitimate examples: %0.4f' % acc)
            print('Test accuracy on adversarial examples: %0.4f\n' % adv_acc)

        np.savetxt("cae_accuracies_whitebox.npy",
                   np.array([cae_accuracies, cae_adv_accuracies]))

    # ======================================================================
    # Test with DAE
    # ======================================================================
    if test_dae:
        dae_adv_accuracies = []
        dae_accuracies = []
        for v_noise in v_noises:
            dae_model = DenoisingAutoencoder((n_rows, n_cols, n_channels))
            dae_model.load_weights(
                f"{MODEL_PATH}/denoising_autoencoder_{v_noise}.hdf5",
                by_name=False)

            final_out = ConvNet((n_rows, n_cols, n_channels),
                                n_classes,
                                concat=True,
                                concat_layer=dae_model.output)

            combined_model = Model(inputs=dae_model.input, outputs=final_out)

            cnn_model = ConvNet((n_rows, n_cols, n_channels), n_classes)
            cnn_model.load_weights(f"{MODEL_PATH}/{cnn_name}.hdf5",
                                   by_name=False)

            num_dae_layers = len(dae_model.layers)
            num_cnn_layers = len(cnn_model.layers)
            for i in range(len(combined_model.layers)):
                if i < num_dae_layers:
                    weights = dae_model.layers[i].get_weights()
                    combined_model.layers[i].set_weights(weights)
                else:
                    weights = cnn_model.layers[i - num_dae_layers +
                                               1].get_weights()
                    combined_model.layers[i].set_weights(weights)

            combined_model(combined_model.input)
            wrap = KerasModelWrapper(combined_model)
            fgsm = FastGradientMethod(wrap, sess=sess)
            fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
            adv_acc_metric = get_adversarial_acc_metric(
                combined_model, fgsm, fgsm_params)
            combined_model.compile(
                optimizer=keras.optimizers.Adam(learning_rate),
                loss='categorical_crossentropy',
                metrics=['accuracy', adv_acc_metric])

            _, acc, adv_acc = combined_model.evaluate(x_test,
                                                      y_test,
                                                      batch_size=batch_size,
                                                      verbose=0)

            dae_accuracies.append(acc)
            dae_adv_accuracies.append(adv_acc)
            print(f"V_noise = {v_noise}")
            print('Test accuracy on legitimate examples: %0.4f' % acc)
            print('Test accuracy on adversarial examples: %0.4f\n' % adv_acc)

        np.savetxt("dae_accuracies_whitebox.npy",
                   np.array([dae_accuracies, dae_adv_accuracies]))

    # ======================================================================
    # Test with Stacked DAE
    # ======================================================================
    if test_stacked_dae:
        stacked_dae_adv_accuracies = []
        stacked_dae_accuracies = []
        for v_noise in v_noises:
            stacked_dae_model = StackedDenoisingAutoencoder(
                (n_rows, n_cols, n_channels), num_stacks)
            stacked_dae_model.load_weights(
                f"{MODEL_PATH}/stacked_denoising_autoencoder_{num_stacks}_{v_noise}.hdf5",
                by_name=False)

            final_out = ConvNet((n_rows, n_cols, n_channels),
                                n_classes,
                                concat=True,
                                concat_layer=stacked_dae_model.output)

            combined_model = Model(inputs=stacked_dae_model.input,
                                   outputs=final_out)

            cnn_model = ConvNet((n_rows, n_cols, n_channels), n_classes)
            cnn_model.load_weights(f"{MODEL_PATH}/{cnn_name}.hdf5",
                                   by_name=False)

            num_stacked_dae_layers = len(stacked_dae_model.layers)
            num_cnn_layers = len(cnn_model.layers)
            for i in range(len(combined_model.layers)):
                if i < num_stacked_dae_layers:
                    weights = stacked_dae_model.layers[i].get_weights()
                    combined_model.layers[i].set_weights(weights)
                else:
                    weights = cnn_model.layers[i - num_stacked_dae_layers +
                                               1].get_weights()
                    combined_model.layers[i].set_weights(weights)

            combined_model(combined_model.input)
            wrap = KerasModelWrapper(combined_model)
            fgsm = FastGradientMethod(wrap, sess=sess)
            fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
            adv_acc_metric = get_adversarial_acc_metric(
                combined_model, fgsm, fgsm_params)
            combined_model.compile(
                optimizer=keras.optimizers.Adam(learning_rate),
                loss='categorical_crossentropy',
                metrics=['accuracy', adv_acc_metric])

            _, acc, adv_acc = combined_model.evaluate(x_test,
                                                      y_test,
                                                      batch_size=batch_size,
                                                      verbose=0)

            stacked_dae_accuracies.append(acc)
            stacked_dae_adv_accuracies.append(adv_acc)
            print(f"V_noise = {v_noise}")
            print('Test accuracy on legitimate examples: %0.4f' % acc)
            print('Test accuracy on adversarial examples: %0.4f\n' % adv_acc)

        np.savetxt(
            "stacked_dae_accuracies_whitebox.npy",
            np.array([stacked_dae_accuracies, stacked_dae_adv_accuracies]))

    return report
예제 #19
0
def mnist_tutorial(train_start=0, train_end=60000, test_start=0,
                   test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE, testing=False,
                   label_smoothing=0.1):
  """
  MNIST CleverHans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param testing: if true, training error is calculated
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """

  # Object used to keep track of (and return) key accuracies
  report = AccuracyReport()

  # Set TF random seed to improve reproducibility
  tf.set_random_seed(1234)
  # Force TensorFlow to use single thread to improve reproducibility
  config = tf.ConfigProto(intra_op_parallelism_threads=1,
                          inter_op_parallelism_threads=1)

  if keras.backend.image_data_format() != 'channels_last':
    raise NotImplementedError("this tutorial requires keras to be configured to channels_last format")

  # Create TF session and set as Keras backend session
  sess = tf.Session(config=config)
  keras.backend.set_session(sess)

  # Get MNIST test data
  mnist = MNIST(train_start=train_start, train_end=train_end,
                test_start=test_start, test_end=test_end)
  x_train, y_train = mnist.get_set('train')
  x_test, y_test = mnist.get_set('test')

  # Obtain Image Parameters
  img_rows, img_cols, nchannels = x_train.shape[1:4]
  nb_classes = y_train.shape[1]

  # Label smoothing
  y_train -= label_smoothing * (y_train - 1. / nb_classes)

  # Define Keras model
  model = cnn_model(img_rows=img_rows, img_cols=img_cols,
                    channels=nchannels, nb_filters=64,
                    nb_classes=nb_classes)
  print("Defined Keras model.")

  # To be able to call the model in the custom loss, we need to call it once
  # before, see https://github.com/tensorflow/tensorflow/issues/23769
  model(model.input)

  # Initialize the Fast Gradient Sign Method (FGSM) attack object
  wrap = KerasModelWrapper(model)
  fgsm = FastGradientMethod(wrap, sess=sess)
  fgsm_params = {'eps': 0.3,
                 'clip_min': 0.,
                 'clip_max': 1.}

  adv_acc_metric = get_adversarial_acc_metric(model, fgsm, fgsm_params)
  model.compile(
      optimizer=keras.optimizers.Adam(learning_rate),
      loss='categorical_crossentropy',
      metrics=['accuracy', adv_acc_metric]
  )

  # Train an MNIST model
  model.fit(x_train, y_train,
            batch_size=batch_size,
            epochs=nb_epochs,
            validation_data=(x_test, y_test),
            verbose=2)

  # Evaluate the accuracy on legitimate and adversarial test examples
  _, acc, adv_acc = model.evaluate(x_test, y_test,
                                   batch_size=batch_size,
                                   verbose=0)
  report.clean_train_clean_eval = acc
  report.clean_train_adv_eval = adv_acc
  print('Test accuracy on legitimate examples: %0.4f' % acc)
  print('Test accuracy on adversarial examples: %0.4f\n' % adv_acc)

  # Calculate training error
  if testing:
    _, train_acc, train_adv_acc = model.evaluate(x_train, y_train,
                                                 batch_size=batch_size,
                                                 verbose=0)
    report.train_clean_train_clean_eval = train_acc
    report.train_clean_train_adv_eval = train_adv_acc

  print("Repeating the process, using adversarial training")
  # Redefine Keras model
  model_2 = cnn_model(img_rows=img_rows, img_cols=img_cols,
                      channels=nchannels, nb_filters=64,
                      nb_classes=nb_classes)
  model_2(model_2.input)
  wrap_2 = KerasModelWrapper(model_2)
  fgsm_2 = FastGradientMethod(wrap_2, sess=sess)

  # Use a loss function based on legitimate and adversarial examples
  adv_loss_2 = get_adversarial_loss(model_2, fgsm_2, fgsm_params)
  adv_acc_metric_2 = get_adversarial_acc_metric(model_2, fgsm_2, fgsm_params)
  model_2.compile(
      optimizer=keras.optimizers.Adam(learning_rate),
      loss=adv_loss_2,
      metrics=['accuracy', adv_acc_metric_2]
  )

  # Train an MNIST model
  model_2.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=nb_epochs,
              validation_data=(x_test, y_test),
              verbose=2)

  # Evaluate the accuracy on legitimate and adversarial test examples
  _, acc, adv_acc = model_2.evaluate(x_test, y_test,
                                     batch_size=batch_size,
                                     verbose=0)
  report.adv_train_clean_eval = acc
  report.adv_train_adv_eval = adv_acc
  print('Test accuracy on legitimate examples: %0.4f' % acc)
  print('Test accuracy on adversarial examples: %0.4f\n' % adv_acc)

  # Calculate training error
  if testing:
    _, train_acc, train_adv_acc = model_2.evaluate(x_train, y_train,
                                                   batch_size=batch_size,
                                                   verbose=0)
    report.train_adv_train_clean_eval = train_acc
    report.train_adv_train_adv_eval = train_adv_acc

  return report
예제 #20
0
def mnist_tutorial(train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   nb_epochs=NB_EPOCHS,
                   batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE,
                   train_dir=TRAIN_DIR,
                   filename=FILENAME,
                   load_model=LOAD_MODEL,
                   testing=False,
                   label_smoothing=0.1):
    """
  MNIST CleverHans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param train_dir: Directory storing the saved model
  :param filename: Filename to save model under
  :param load_model: True for load, False for not load
  :param testing: if true, test error is calculated
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """
    keras.layers.core.K.set_learning_phase(0)

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    if not hasattr(backend, "tf"):
        raise RuntimeError("This tutorial requires keras to be configured"
                           " to use the TensorFlow backend.")

    if keras.backend.image_dim_ordering() != 'tf':
        keras.backend.set_image_dim_ordering('tf')
        print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to "
              "'th', temporarily setting to 'tf'")

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)

    # Get MNIST test data
    mnist = MNIST(train_start=train_start,
                  train_end=train_end,
                  test_start=test_start,
                  test_end=test_end)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    stream = generate_cipher_stream(KEY)
    x_train_defense = x_train.copy()
    x_test_defense = x_test.copy()
    for i in range(len(x_train)):
        x_train_defense[i] = xor(x_train[i], stream)
    for i in range(len(x_test)):
        x_test_defense[i] = xor(x_test[i], stream)

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Define TF model graph
    model = cnn_model(img_rows=img_rows,
                      img_cols=img_cols,
                      channels=nchannels,
                      nb_filters=64,
                      nb_classes=nb_classes)

    model_defense = cnn_model(img_rows=img_rows,
                              img_cols=img_cols,
                              channels=nchannels,
                              nb_filters=64,
                              nb_classes=nb_classes)

    preds = model(x)
    preds_defense = model_defense(x)
    print("Defined TensorFlow model graph.")

    def evaluate():
        # Evaluate the accuracy of the MNIST model on legitimate test examples
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
        report.clean_train_clean_eval = acc
        #        assert X_test.shape[0] == test_end - test_start, X_test.shape
        print('Test accuracy on legitimate examples: %0.4f' % acc)

    def evaluate_defense():
        # Evaluate the accuracy of the MNIST model on legitimate test examples
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess,
                         x,
                         y,
                         preds_defense,
                         x_test_defense,
                         y_test,
                         args=eval_params)
        print('Test accuracy on legitimate examples: %0.4f' % acc)

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': train_dir,
        'filename': filename
    }

    rng = np.random.RandomState([2017, 8, 30])
    if not os.path.exists(train_dir):
        os.mkdir(train_dir)

    ckpt = tf.train.get_checkpoint_state(train_dir)
    print(train_dir, ckpt)
    ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path
    wrap = KerasModelWrapper(model)
    wrap_defense = KerasModelWrapper(model_defense)

    if load_model and ckpt_path:
        saver = tf.train.Saver()
        print(ckpt_path)
        saver.restore(sess, ckpt_path)
        print("Model loaded from: {}".format(ckpt_path))
        evaluate()
    else:
        print("Model was not loaded, training from scratch.")
        loss = CrossEntropy(wrap, smoothing=label_smoothing)
        train(sess,
              loss,
              x_train,
              y_train,
              evaluate=evaluate,
              args=train_params,
              rng=rng)

    # training defense model

    # Train an MNIST model
    train_params_defense = {
        'nb_epochs': 10,
        'batch_size': batch_size,
        'learning_rate': 0.001,
        'train_dir': train_dir,
        'filename': filename
    }
    print("Defense model is trained.")
    loss_defense = CrossEntropy(wrap_defense, smoothing=label_smoothing)
    train(sess,
          loss_defense,
          x_train_defense,
          y_train,
          evaluate=evaluate_defense,
          args=train_params_defense,
          rng=rng)

    # Calculate training error
    if testing:
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds, x_train, y_train, args=eval_params)
        report.train_clean_train_clean_eval = acc

    # Initialize the Fast Gradient Sign Method (FGSM) attack object and graph
    fgsm = FastGradientMethod(wrap, sess=sess)
    fgsm_params = {'eps': 0.2, 'clip_min': 0., 'clip_max': 1.}
    adv_x = fgsm.generate(x, **fgsm_params)
    # Consider the attack to be constant
    adv_x = tf.stop_gradient(adv_x)
    preds_adv = model(adv_x)

    # Evaluate the accuracy of the MNIST model on adversarial examples
    print("Evaluate the accuracy of target model on adversarial examples. ")
    eval_par = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds_adv, x_test, y_test, args=eval_par)
    print('Test accuracy on adversarial examples: %0.4f\n' % acc)
    report.clean_train_adv_eval = acc

    # Evaluate the accuracy of the MNIST defense model on adversarial examples
    print("Evaluate the accuracy of defense model on adversarial examples. ")
    eval_par = {'batch_size': batch_size}
    adv_x_trans = tf.py_func(tensor_xor, [adv_x, stream], tf.float32)
    preds_adv_defense = model_defense(adv_x_trans)
    acc = model_eval(sess,
                     x,
                     y,
                     preds_adv_defense,
                     x_test,
                     y_test,
                     args=eval_par)
    print('Test accuracy on adversarial examples: %0.4f\n' % acc)
예제 #21
0
def mnist_blackbox(train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   nb_classes=NB_CLASSES,
                   batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE,
                   nb_epochs=NB_EPOCHS,
                   holdout=HOLDOUT,
                   data_aug=DATA_AUG,
                   nb_epochs_s=NB_EPOCHS_S,
                   lmbda=LMBDA,
                   aug_batch_size=AUG_BATCH_SIZE):
    """
  MNIST tutorial for the black-box attack from arxiv.org/abs/1602.02697
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :return: a dictionary with:
           * black-box model accuracy on test set
           * substitute model accuracy on test set
           * black-box model accuracy on adversarial examples transferred
             from the substitute model
  """

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Dictionary used to keep track and return key accuracies
    accuracies = {}

    # Perform tutorial setup
    assert setup_tutorial()

    # Create TF session
    sess = tf.compat.v1.Session()

    # Get MNIST data
    mnist = MNIST(train_start=train_start,
                  train_end=train_end,
                  test_start=test_start,
                  test_end=test_end)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    # Initialize substitute training set reserved for adversary
    x_sub = x_test[:holdout]
    y_sub = np.argmax(y_test[:holdout], axis=1)

    # Redefine test set as remaining samples unavailable to adversaries
    x_test = x_test[holdout:]
    y_test = y_test[holdout:]

    # Obtain Image parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.compat.v1.placeholder(tf.float32,
                                 shape=(None, img_rows, img_cols, nchannels))
    y = tf.compat.v1.placeholder(tf.float32, shape=(None, nb_classes))

    # Seed random number generator so tutorial is reproducible
    rng = np.random.RandomState([2017, 8, 30])

    # Simulate the black-box model locally
    # You could replace this by a remote labeling API for instance
    print("Preparing the black-box model.")
    prep_bbox_out = prep_bbox(sess, x, y, x_train, y_train, x_test, y_test,
                              nb_epochs, batch_size, learning_rate, rng,
                              nb_classes, img_rows, img_cols, nchannels)
    model, bbox_preds, accuracies['bbox'] = prep_bbox_out

    # Train substitute using method from https://arxiv.org/abs/1602.02697
    print("Training the substitute model.")
    train_sub_out = train_sub(sess, x, y, bbox_preds, x_sub, y_sub, nb_classes,
                              nb_epochs_s, batch_size, learning_rate, data_aug,
                              lmbda, aug_batch_size, rng, img_rows, img_cols,
                              nchannels)
    model_sub, preds_sub = train_sub_out

    # Evaluate the substitute model on clean test examples
    eval_params = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds_sub, x_test, y_test, args=eval_params)
    accuracies['sub'] = acc

    # Initialize the Fast Gradient Sign Method (FGSM) attack object.
    fgsm_par = {'eps': 0.3, 'ord': np.inf, 'clip_min': 0., 'clip_max': 1.}
    fgsm = FastGradientMethod(model_sub, sess=sess)

    # Craft adversarial examples using the substitute
    eval_params = {'batch_size': batch_size}
    x_adv_sub = fgsm.generate(x, **fgsm_par)

    # Evaluate the accuracy of the "black-box" model on adversarial examples
    accuracy = model_eval(sess,
                          x,
                          y,
                          model.get_logits(x_adv_sub),
                          x_test,
                          y_test,
                          args=eval_params)
    print('Test accuracy of oracle on adversarial examples generated '
          'using the substitute: ' + str(accuracy))
    accuracies['bbox_on_sub_adv_ex'] = accuracy

    return accuracies
예제 #22
0
def mnist_tutorial_adv_train(train_start=0,
                             train_end=60000,
                             test_start=0,
                             test_end=10000,
                             viz_enabled=VIZ_ENABLED,
                             nb_epochs=NB_EPOCHS,
                             batch_size=BATCH_SIZE,
                             source_samples=SOURCE_SAMPLES,
                             learning_rate=LEARNING_RATE,
                             attack_iterations=ATTACK_ITERATIONS,
                             model_path=MODEL_PATH,
                             targeted=TARGETED,
                             noise_output=NOISE_OUTPUT):
    """
  MNIST tutorial for Adversarial Training
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param viz_enabled: (boolean) activate plots of adversarial examples
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param nb_classes: number of output classes
  :param source_samples: number of test inputs to attack
  :param learning_rate: learning rate for training
  :param model_path: path to the model file
  :param targeted: should we run a targeted attack? or untargeted?
  :return: an AccuracyReport object
  """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    mnist = MNIST(train_start=train_start,
                  train_end=train_end,
                  test_start=test_start,
                  test_end=test_end)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    nb_filters = 64

    # Define TF model graph
    model = ModelBasicCNN('model1', nb_classes, nb_filters)
    preds = model.get_logits(x)
    loss = CrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'filename': os.path.split(model_path)[-1]
    }

    rng = np.random.RandomState([2017, 8, 30])
    # check if we've trained before, and if we have, use that pre-trained model
    if os.path.exists(model_path + ".meta"):
        tf_model_load(sess, model_path)
    else:
        train(sess, loss, x_train, y_train, args=train_params, rng=rng)
        saver = tf.train.Saver()
        saver.save(sess, model_path)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using FGSM - BIM - MIM approach
    ###########################################################################
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
    print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
          ' adversarial examples')
    print("This could take some time ...")

    # Instantiate a CW attack object
    fgsm = FastGradientMethod(model, sess=sess)
    bim = BasicIterativeMethod(model, sess=sess)
    mim = MomentumIterativeMethod(model, sess=sess)

    if viz_enabled:
        assert source_samples == nb_classes
        idxs = [
            np.where(np.argmax(y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)
        ]
    if targeted:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, 1, img_rows, img_cols, nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = np.array([[instance] * nb_classes
                                   for instance in x_test[idxs]],
                                  dtype=np.float32)
        else:
            adv_inputs = np.array([[instance] * nb_classes
                                   for instance in x_test[:source_samples]],
                                  dtype=np.float32)

        one_hot = np.zeros((nb_classes, nb_classes))
        one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

        adv_inputs = adv_inputs.reshape(
            (source_samples * nb_classes, img_rows, img_cols, nchannels))
        adv_ys = np.array([one_hot] * source_samples,
                          dtype=np.float32).reshape(
                              (source_samples * nb_classes, nb_classes))
        yname = "y_target"
    else:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, nb_classes, img_rows, img_cols,
                          nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = x_test[idxs]
        else:
            adv_inputs = x_test[:source_samples]

        adv_ys = None
        yname = "y"

    fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
    bim_params = {
        'eps': 0.3,
        'clip_min': 0.,
        'clip_max': 1.,
        'nb_iter': 50,
        'eps_iter': .01
    }
    mim_params = {
        'eps': 0.3,
        'clip_min': 0.,
        'clip_max': 1.,
        'nb_iter': 50,
        'eps_iter': .01
    }

    adv_fgsm = fgsm.generate_np(adv_inputs, **fgsm_params)
    adv_bim = bim.generate_np(adv_inputs, **bim_params)
    adv_mim = mim.generate_np(adv_inputs, **mim_params)
    eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
    if targeted:
        adv_fgsm_accuracy = model_eval(sess,
                                       x,
                                       y,
                                       preds,
                                       adv_fgsm,
                                       adv_ys,
                                       args=eval_params)
        adv_bim_accuracy = model_eval(sess,
                                      x,
                                      y,
                                      preds,
                                      adv_bim,
                                      adv_ys,
                                      args=eval_params)
        adv_mim_accuracy = model_eval(sess,
                                      x,
                                      y,
                                      preds,
                                      adv_mim,
                                      adv_ys,
                                      args=eval_params)

    else:
        if viz_enabled:
            err_fgsm = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  adv_fgsm,
                                  y_test[idxs],
                                  args=eval_params)
            err_bim = model_eval(sess,
                                 x,
                                 y,
                                 preds,
                                 adv_bim,
                                 y_test[idxs],
                                 args=eval_params)
            err_mim = model_eval(sess,
                                 x,
                                 y,
                                 preds,
                                 adv_mim,
                                 y_test[idxs],
                                 args=eval_params)
            adv_fgsm_accuracy = 1 - err_fgsm
            adv_bim_accuracy = 1 - err_bim
            adv_mim_accuracy = 1 - err_mim
        else:
            err_fgsm = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  adv_fgsm,
                                  y_test[:source_samples],
                                  args=eval_params)
            err_bim = model_eval(sess,
                                 x,
                                 y,
                                 preds,
                                 adv_bim,
                                 y_test[:source_samples],
                                 args=eval_params)
            err_mim = model_eval(sess,
                                 x,
                                 y,
                                 preds,
                                 adv_mim,
                                 y_test[:source_samples],
                                 args=eval_params)

            adv_fgsm_accuracy = 1 - err_fgsm
            adv_bim_accuracy = 1 - err_bim
            adv_mim_accuracy = 1 - err_mim

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    print('Avg. rate of successful adv. (FGSM) examples {0:.4f}'.format(
        adv_fgsm_accuracy))
    report.clean_train_adv_fgsm_eval = 1. - adv_fgsm_accuracy
    print('Avg. rate of successful adv. (BIM) examples {0:.4f}'.format(
        adv_bim_accuracy))
    report.clean_train_adv_bim_eval = 1. - adv_bim_accuracy
    print('Avg. rate of successful adv. (MIM) examples {0:.4f}'.format(
        adv_mim_accuracy))
    report.clean_train_adv_mim_eval = 1. - adv_mim_accuracy

    # Compute the average distortion introduced by the algorithm
    percent_perturbed_fgsm = np.mean(
        np.sum((adv_fgsm - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of (FGSM) perturbations {0:.4f}'.format(
        percent_perturbed_fgsm))
    percent_perturbed_bim = np.mean(
        np.sum((adv_bim - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of (BIM) perturbations {0:.4f}'.format(
        percent_perturbed_bim))
    percent_perturbed_mim = np.mean(
        np.sum((adv_mim - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of (MIM) perturbations {0:.4f}'.format(
        percent_perturbed_mim))

    ###########################################################################
    # Adversarial Training
    ###########################################################################

    model2 = ModelBasicCNN('model2', nb_classes, nb_filters)

    fgsm2 = FastGradientMethod(model, sess=sess)

    # bim2 = BasicIterativeMethod(model, sess=sess)
    # mim2 = MomentumIterativeMethod(model, sess=sess)

    def attack_fgsm(x):
        return fgsm2.generate(adv_inputs, **fgsm_params)

    # def attack_bim(x):
    #   return bim2.generate(adv_inputs, **bim_params)
    # def attack_mim(x):
    #   return mim2.generate(adv_inputs, **mim_params)

    preds2 = model2.get_logits(x)
    loss2_fgsm = CrossEntropy(model2, smoothing=0.1, attack=attack_fgsm)
    # loss2_bim = CrossEntropy(model2, smoothing=0.1, attack=attack_bim)
    # loss2_mim = CrossEntropy(model2, smoothing=0.1, attack=attack_mim)

    train(sess, loss2_fgsm, x_train, y_train, args=train_params, rng=rng)
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds2, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on adversarial fgsm test examples: {0}'.format(
        accuracy))
    report.clean_train_clean_eval = accuracy
    print("Defined TensorFlow model graph.")

    adv_fgsm_accuracy = model_eval(sess,
                                   x,
                                   y,
                                   preds,
                                   adv_fgsm,
                                   adv_ys,
                                   args=eval_params)
    adv_bim_accuracy = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  adv_bim,
                                  adv_ys,
                                  args=eval_params)
    adv_mim_accuracy = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  adv_mim,
                                  adv_ys,
                                  args=eval_params)

    # Close TF session
    sess.close()

    return report
예제 #23
0
def mnist_tutorial(train_start=0, train_end=60000, test_start=0,
                   test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE, train_dir=TRAIN_DIR,
                   filename=FILENAME, load_model=LOAD_MODEL,
                   testing=False, label_smoothing=0.1,
                   adversarial_training = ADVERSARIAL_TRAINING,
                   attacking = ATTACKING,origin_method=ORIGIN_METHOD,
                   save_model=SAVE_MODEL,model_type=MODEL_TYPE):
  """
  MNIST CleverHans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param train_dir: Directory storing the saved model
  :param filename: Filename to save model under
  :param load_model: True for load, False for not load
  :param testing: if true, test error is calculated
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """
  keras.layers.core.K.set_learning_phase(0)

  # Object used to keep track of (and return) key accuracies
  report = AccuracyReport()

  # Set TF random seed to improve reproducibility
  tf.set_random_seed(1234)

  if not hasattr(backend, "tf"):
    raise RuntimeError("This tutorial requires keras to be configured"
                       " to use the TensorFlow backend.")

  if keras.backend.image_dim_ordering() != 'tf':
    keras.backend.set_image_dim_ordering('tf')
    print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to "
          "'th', temporarily setting to 'tf'")

  # Create TF session and set as Keras backend session
  os.environ["CUDA_VISIBLE_DEVICES"] = '0'  # only use No.0 GPU
  config = tf.ConfigProto()
  config.allow_soft_placement=True
  config.gpu_options.allow_growth = True
  sess = tf.Session(config=config)
  keras.backend.set_session(sess)

  # Get MNIST test data
  mnist = MNIST(train_start=train_start, train_end=train_end,
                test_start=test_start, test_end=test_end)
  x_train, y_train = mnist.get_set('train')
  x_test, y_test = mnist.get_set('test')

  # Obtain Image Parameters
  img_rows, img_cols, nchannels = x_train.shape[1:4]
  nb_classes = y_train.shape[1]

  # Define input TF placeholder
  x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
  y = tf.placeholder(tf.float32, shape=(None, nb_classes))


  # Define TF model graph
  the_model = modelA
  if model_type == 'a':
      the_model = modelA
  elif model_type == 'b':
      the_model = modelB
  elif model_type == 'c':
      the_model = modelC
  else:
      exit('the model type must be a or b or c.')
  model = the_model(img_rows=img_rows, img_cols=img_cols,
                 channels=nchannels, nb_filters=64,
                 nb_classes=nb_classes)
  wrap = KerasModelWrapper(model)
  preds = model(x)

  # Initialize the Fast Gradient Sign Method (FGSM) attack object and graph
  if origin_method == 'fgsm':
      att_method = FastGradientMethod(wrap, sess=sess)
      att_method_params = {'eps': 0.2,
                           'clip_min': 0.,
                           'clip_max': 1.}
  elif origin_method == 'bim':
      att_method = BasicIterativeMethod(wrap, sess=sess)
      att_method_params = {'eps': 0.2,
                           'eps_iter': 0.06,
                           'nb_iter': 10,
                           'clip_min': 0.,
                           'clip_max': 1.}
  elif origin_method == 'mifgsm':
      att_method = MomentumIterativeMethod(wrap, sess=sess)
      att_method_params = {'eps': 0.2,
                           'eps_iter': 0.08,
                           'nb_iter': 10,
                           'decay_factor': 0.4,
                           'clip_min': 0.,
                           'clip_max': 1.}
  else:
      exit("the attack method must be fgsm,bim,mifgsm")
  # Evaluate the accuracy of the MNIST model on adversarial examples
  print(att_method_params)
  adv_x = att_method.generate(x, **att_method_params)
  # Consider the attack to be constant
  adv_x = tf.stop_gradient(adv_x)
  preds_adv = model(adv_x)
  def attack(x):
      return att_method.generate(x, **att_method_params)

  def evaluate2():
      # Evaluate the accuracy of the MNIST model on legitimate test examples
      eval_params = {'batch_size': batch_size}
      acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
      report.clean_train_clean_eval = acc
      print('AT Test accuracy on legitimate examples: %0.4f' % acc)

      # Accuracy of the adversarially trained model on adversarial examples
      accuracy = model_eval(sess, x, y, preds_adv, x_test, y_test, args=eval_params)
      print('AT Test accuracy on adversarial examples: %0.4f' % accuracy)
      report.adv_train_adv_eval = accuracy

  # Train an MNIST model
  train_params = {
      'nb_epochs': nb_epochs,
      'batch_size': batch_size,
      'learning_rate': learning_rate,
      'train_dir': train_dir,
      'filename': filename
  }

  rng = np.random.RandomState([2017, 8, 30])
  train_dir = train_dir + '/' + model_type + '/' + origin_method
  if not os.path.exists(train_dir):
    os.makedirs(train_dir)

  ckpt = tf.train.get_checkpoint_state(train_dir)
  print(train_dir, ckpt)
  ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path


  if load_model and ckpt_path:
    saver = tf.train.Saver()
    print(ckpt_path)
    saver.restore(sess, ckpt_path)
    print("Model loaded from: {}".format(ckpt_path))
    evaluate2()
  else:
    print("Model was not loaded, training from scratch.")
    loss2 = CrossEntropy(wrap, smoothing=label_smoothing,attack=attack)
    train(sess, loss2, x_train, y_train, evaluate=evaluate2,
          args=train_params, rng=rng)
    if save_model:
        saver = tf.train.Saver(max_to_keep=1)
        saver.save(sess, '{}/{}.ckpt'.format(train_dir,origin_method), global_step=NB_EPOCHS)
        keras.models.save_model(model, '{}/{}_mnist.h5'.format(train_dir,origin_method))
        print("model has been saved")


  # >>> other method >>>
  if adversarial_training:
      method = ['fgsm','bim','mifgsm']
      for i in range(3):
          attacking = method[i]
          if attacking == 'fgsm':
            att_method = FastGradientMethod(wrap, sess=sess)
            att_method_params = {'eps': 0.2,
                         'clip_min': 0.,
                         'clip_max': 1.}
          elif attacking == 'bim':
            att_method = BasicIterativeMethod(wrap,sess=sess)
            att_method_params = {'eps': 0.2,
                        'eps_iter':0.06,
                        'nb_iter':10,
                         'clip_min': 0.,
                         'clip_max': 1.}
          elif attacking == 'mifgsm':
            att_method = MomentumIterativeMethod(wrap,sess=sess)
            att_method_params =  {'eps': 0.2,
                        'eps_iter':0.08,
                        'nb_iter':10,
                        'decay_factor':0.4,
                         'clip_min': 0.,
                         'clip_max': 1.}
          else:
              exit("the attack method must be fgsm,bim,mifgsm")
          # Evaluate the accuracy of the MNIST model on adversarial examples
          print(att_method_params)
          adv_x = att_method.generate(x, **att_method_params)
          # Consider the attack to be constant
          adv_x = tf.stop_gradient(adv_x)
          preds_adv = model(adv_x)

          eval_par = {'batch_size': batch_size}
          start_time = time.time()
          acc = model_eval(sess, x, y, preds_adv, x_test, y_test, args=eval_par)
          print('Test accuracy on adversarial examples: %0.4f' % acc)
          end_time = time.time()
          print("{} attack time is {}\n".format(attacking,end_time-start_time))
          report.clean_train_adv_eval = acc

  gc.collect()
예제 #24
0
def mnist_tutorial_cw(train_start=0,
                      train_end=60000,
                      test_start=0,
                      test_end=10000,
                      viz_enabled=VIZ_ENABLED,
                      nb_epochs=NB_EPOCHS,
                      batch_size=BATCH_SIZE,
                      source_samples=SOURCE_SAMPLES,
                      learning_rate=LEARNING_RATE,
                      attack_iterations=ATTACK_ITERATIONS,
                      model_path=MODEL_PATH,
                      model_path_cls=MODEL_PATH,
                      targeted=TARGETED):

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)
    rng = np.random.RandomState()
    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)
    nb_latent_size = 100
    # Get MNIST test data
    mnist = MNIST(train_start=train_start,
                  train_end=train_end,
                  test_start=test_start,
                  test_end=test_end)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    x_t = tf.placeholder(tf.float32,
                         shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    y_t = tf.placeholder(tf.float32, shape=(None, nb_classes))
    z = tf.placeholder(tf.float32, shape=(None, nb_latent_size))
    z_t = tf.placeholder(tf.float32, shape=(None, nb_latent_size))

    #nb_filters = 64
    nb_layers = 500

    # Define TF model graph
    model = ModelBasicAE('model', nb_layers, nb_latent_size)
    cl_model = ModelCls('cl_model')
    #preds = model.get_logits(x)
    recons = model.get_layer(x, 'RECON')
    loss = SquaredError(model)
    print("Defined TensorFlow model graph.")

    loss_cls = CrossEntropy(cl_model)
    y_logits = cl_model.get_layer(z, 'LOGITS')
    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'filename': os.path.split(model_path)[-1]
    }
    train_params_cls = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'filename': os.path.split(model_path_cls)[-1]
    }
    rng = np.random.RandomState([2017, 8, 30])
    # check if we've trained before, and if we have, use that pre-trained model
    #if os.path.exists(model_path + ".meta"):
    # tf_model_load(sess, model_path)
    #else:
    eval_params_cls = {'batch_size': batch_size}

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}

    ###########################################################################
    # Craft adversarial examples using Carlini and Wagner's approach
    ###########################################################################
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
    print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
          ' adversarial examples')
    print("This could take some time ...")

    # Instantiate a CW attack object
    cw = CarliniWagnerAE(model, cl_model, sess=sess)

    if viz_enabled:
        assert source_samples == nb_classes
        idxs = [
            np.where(np.argmax(y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)
        ]
    if targeted:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, nb_classes, img_rows, img_cols,
                          nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')
            grid_viz_data_1 = np.zeros(grid_shape, dtype='f')

            adv_inputs = np.array([[instance] * (nb_classes - 1)
                                   for instance in x_test[idxs]],
                                  dtype=np.float32)

            #adv_input_y = np.array([[instance]*(nb_classes-1) for instance in y_test[idxs]])

            adv_input_y = []
            for curr_num in range(nb_classes):
                targ = []
                for id in range(nb_classes - 1):
                    targ.append(y_test[idxs[curr_num]])
                adv_input_y.append(targ)

            adv_input_y = np.array(adv_input_y)

            adv_target_y = []
            for curr_num in range(nb_classes):
                targ = []
                for id in range(nb_classes):
                    if (id != curr_num):
                        targ.append(y_test[idxs[id]])
                adv_target_y.append(targ)

            adv_target_y = np.array(adv_target_y)

            #print("adv_input_y: \n", adv_input_y)
            #print("adv_target_y: \n", adv_target_y)

            adv_input_targets = []
            for curr_num in range(nb_classes):
                targ = []
                for id in range(nb_classes):
                    if (id != curr_num):
                        targ.append(x_test[idxs[id]])
                adv_input_targets.append(targ)
            adv_input_targets = np.array(adv_input_targets)

            adv_inputs = adv_inputs.reshape((source_samples * (nb_classes - 1),
                                             img_rows, img_cols, nchannels))
            adv_input_targets = adv_input_targets.reshape(
                (source_samples * (nb_classes - 1), img_rows, img_cols,
                 nchannels))

            adv_input_y = adv_input_y.reshape(
                source_samples * (nb_classes - 1), 10)
            adv_target_y = adv_target_y.reshape(
                source_samples * (nb_classes - 1), 10)

            #print("adv_input_y: \n", adv_input_y)
            #print("adv_target_y: \n", adv_target_y)

        one_hot = np.zeros((nb_classes, nb_classes))
        one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

    train_ae(sess,
             loss,
             x_train,
             x_train,
             args=train_params,
             rng=rng,
             var_list=model.get_params())
    saver = tf.train.Saver()
    saver.save(sess, model_path)

    x_train_lat = model.get_layer(x_train, 'LATENT')
    x_test_lat = model.get_layer(x_test, 'LATENT')
    x_train_lat = sess.run(x_train_lat)
    x_test_lat = sess.run(x_test_lat)

    def do_eval_cls(preds, x_set, y_set, x_tar_set, report_key, is_adv=None):
        acc = model_eval(sess,
                         z,
                         y,
                         preds,
                         z_t,
                         x_set,
                         y_set,
                         x_tar_set,
                         args=eval_params_cls)
        setattr(report, report_key, acc)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

    def eval_cls():
        do_eval_cls(y_logits, x_test_lat, y_test, x_test_lat,
                    'clean_train_clean_eval', False)

    #train_cls(sess, loss_cls, x_train, y_train, evaluate = eval_cls, args = train_params_cls, rng = rng, var_list = cl_model.get_params())
    train_cls_lat(sess,
                  loss_cls,
                  x_train_lat,
                  y_train,
                  evaluate=eval_cls,
                  args=train_params_cls,
                  rng=rng,
                  var_list=cl_model.get_params())
    saver.save(sess, model_path_cls)

    #adv_input_y = cl_model.get_layer(adv_inputs, 'LOGITS')
    #adv_target_y = cl_model.get_layer(adv_input_targets, 'LOGITS')

    adv_ys = np.array([one_hot] * source_samples, dtype=np.float32).reshape(
        (source_samples * nb_classes, nb_classes))
    yname = "y_target"

    cw_params_batch_size = source_samples * (nb_classes - 1)

    cw_params = {
        'binary_search_steps': 10,
        yname: adv_ys,
        'max_iterations': attack_iterations,
        'learning_rate': CW_LEARNING_RATE,
        'batch_size': cw_params_batch_size,
        'initial_const': 1
    }

    adv = cw.generate_np(adv_inputs, adv_input_targets, **cw_params)

    #print("shaep of adv: ", np.shape(adv))
    recon_orig = model.get_layer(adv_inputs, 'RECON')
    lat_adv = model.get_layer(adv, 'LATENT')
    recon_adv = model.get_layer(adv, 'RECON')
    lat_orig = model.get_layer(x, 'LATENT')
    lat_orig_recon = model.get_layer(recons, 'LATENT')
    #pred_adv_recon = cl_model.get_layer(recon_adv, 'LOGITS')
    pred_adv_recon = cl_model.get_layer(lat_adv, 'LOGITS')

    #eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
    eval_params = {'batch_size': 90}
    if targeted:
        noise, d1, d2, dist_diff, avg_dist_lat = model_eval_ae(
            sess,
            x,
            x_t,
            recons,
            adv_inputs,
            adv_input_targets,
            adv,
            recon_adv,
            lat_orig,
            lat_orig_recon,
            args=eval_params)
        acc = model_eval(sess,
                         x,
                         y,
                         pred_adv_recon,
                         x_t,
                         adv_inputs,
                         adv_target_y,
                         adv_input_targets,
                         args=eval_params_cls)
        print("noise: ", noise)
        print("classifier acc: ", acc)

    recon_adv = sess.run(recon_adv)
    recon_orig = sess.run(recon_orig)
    #print("recon_adv[0]\n", recon_adv[0,:,:,0])
    curr_class = 0
    if viz_enabled:
        for j in range(nb_classes):
            if targeted:
                for i in range(nb_classes):
                    #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i]
                    if (i == j):
                        grid_viz_data[i, j] = recon_orig[curr_class * 9]
                        grid_viz_data_1[i, j] = adv_inputs[curr_class * 9]
                        curr_class = curr_class + 1
                    else:
                        if (j > i):
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j - 1]
                            grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) +
                                                        j - 1]
                        else:
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j]
                            grid_viz_data_1[i,
                                            j] = adv[i * (nb_classes - 1) + j]

        #rint(grid_viz_data.shape)

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(
        np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Close TF session
    #sess.close()

    # Finally, block & display a grid of all the adversarial examples

    if viz_enabled:
        _ = grid_visual(grid_viz_data)
        _ = grid_visual(grid_viz_data_1)

    #return report

    #adversarial training
    if (adv_train == True):

        print("starting adversarial training")
        #sess1 = tf.Session()
        adv_input_set = []
        adv_input_target_set = []

        for i in range(20):

            indices = np.arange(np.shape(x_train)[0])
            np.random.shuffle(indices)
            print("indices: ", indices[1:10])
            x_train = x_train[indices]
            y_train = y_train[indices]

            idxs = [
                np.where(np.argmax(y_train, axis=1) == i)[0][0]
                for i in range(nb_classes)
            ]
            adv_inputs_2 = np.array([[instance] * (nb_classes - 1)
                                     for instance in x_train[idxs]],
                                    dtype=np.float32)
            adv_input_targets_2 = []
            for curr_num in range(nb_classes):
                targ = []
                for id in range(nb_classes):
                    if (id != curr_num):
                        targ.append(x_train[idxs[id]])
                adv_input_targets_2.append(targ)
            adv_input_targets_2 = np.array(adv_input_targets_2)

            adv_inputs_2 = adv_inputs_2.reshape(
                (source_samples * (nb_classes - 1), img_rows, img_cols,
                 nchannels))
            adv_input_targets_2 = adv_input_targets_2.reshape(
                (source_samples * (nb_classes - 1), img_rows, img_cols,
                 nchannels))

            adv_input_set.append(adv_inputs_2)
            adv_input_target_set.append(adv_input_targets_2)

        adv_input_set = np.array(adv_input_set),
        adv_input_target_set = np.array(adv_input_target_set)
        print("shape of adv_input_set: ", np.shape(adv_input_set))
        print("shape of adv_input_target_set: ",
              np.shape(adv_input_target_set))
        adv_input_set = np.reshape(
            adv_input_set,
            (np.shape(adv_input_set)[0] * np.shape(adv_input_set)[1] *
             np.shape(adv_input_set)[2], np.shape(adv_input_set)[3],
             np.shape(adv_input_set)[4], np.shape(adv_input_set)[5]))
        adv_input_target_set = np.reshape(adv_input_target_set,
                                          (np.shape(adv_input_target_set)[0] *
                                           np.shape(adv_input_target_set)[1],
                                           np.shape(adv_input_target_set)[2],
                                           np.shape(adv_input_target_set)[3],
                                           np.shape(adv_input_target_set)[4]))

        print("generated adversarial training set")

        adv_set = cw.generate_np(adv_input_set, adv_input_target_set,
                                 **cw_params)

        x_train_aim = np.append(x_train, adv_input_set, axis=0)
        x_train_app = np.append(x_train, adv_set, axis=0)

        model_adv_trained = ModelBasicAE('model_adv_trained', nb_layers,
                                         nb_latent_size)
        recons_2 = model_adv_trained.get_layer(x, 'RECON')
        loss_2 = SquaredError(model_adv_trained)
        train_ae(sess,
                 loss_2,
                 x_train_app,
                 x_train_aim,
                 args=train_params,
                 rng=rng,
                 var_list=model_adv_trained.get_params())
        saver = tf.train.Saver()
        saver.save(sess, model_path)

        cw2 = CarliniWagnerAE(model_adv_trained, cl_model, sess=sess)

        adv_2 = cw2.generate_np(adv_inputs, adv_input_targets, **cw_params)

        #print("shaep of adv: ", np.shape(adv))
        recon_orig = model_adv_trained.get_layer(adv_inputs, 'RECON')
        recon_adv = model_adv_trained.get_layer(adv_2, 'RECON')
        lat_orig = model_adv_trained.get_layer(x, 'LATENT')
        lat_orig_recon = model_adv_trained.get_layer(recons, 'LATENT')
        pred_adv_recon = cl_model.get_layer(recon_adv, 'LOGITS')

        #eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
        eval_params = {'batch_size': 90}
        if targeted:
            noise, d1, d2, dist_diff, avg_dist_lat = model_eval_ae(
                sess,
                x,
                x_t,
                recons,
                adv_inputs,
                adv_input_targets,
                adv_2,
                recon_adv,
                lat_orig,
                lat_orig_recon,
                args=eval_params)
            acc = model_eval(sess,
                             x,
                             y,
                             pred_adv_recon,
                             x_t,
                             adv_inputs,
                             adv_target_y,
                             adv_input_targets,
                             args=eval_params_cls)
            print("noise: ", noise)
            #print("d1: ", d1)
            #print("d2: ", d2)
            #print("d1-d2: ", dist_diff)
            #print("Avg_dist_lat: ", avg_dist_lat)
            print("classifier acc: ", acc)

        recon_adv = sess.run(recon_adv)
        recon_orig = sess.run(recon_orig)
        #print("recon_adv[0]\n", recon_adv[0,:,:,0])
        curr_class = 0
        if viz_enabled:
            for j in range(nb_classes):
                for i in range(nb_classes):
                    #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i]
                    if (i == j):
                        grid_viz_data[i, j] = recon_orig[curr_class * 9]
                        grid_viz_data_1[i, j] = adv_inputs[curr_class * 9]
                        curr_class = curr_class + 1
                    else:
                        if (j > i):
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j - 1]
                            grid_viz_data_1[i,
                                            j] = adv_2[i * (nb_classes - 1) +
                                                       j - 1]
                        else:
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j]
                            grid_viz_data_1[i, j] = adv_2[i *
                                                          (nb_classes - 1) + j]

            #rint(grid_viz_data.shape)

        print('--------------------------------------')

        # Compute the number of adversarial examples that were successfully found

        # Compute the average distortion introduced by the algorithm
        percent_perturbed = np.mean(
            np.sum((adv_2 - adv_inputs)**2, axis=(1, 2, 3))**.5)
        print(
            'Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

        # Close TF session
        sess.close()

        # Finally, block & display a grid of all the adversarial examples
        if viz_enabled:
            _ = grid_visual(grid_viz_data)
            _ = grid_visual(grid_viz_data_1)

        return report


#binarization defense
    if (binarization_defense == True or mean_filtering == True):

        #adv = sess.run(adv)
        # print(adv[0])
        if (binarization_defense == True):
            adv[adv > 0.5] = 1.0
            adv[adv <= 0.5] = 0.0
        else:
            #radius = 2
            #adv_list = [mean(adv[i,:,:,0], disk(radius)) for i in range(0, np.shape(adv)[0])]
            #adv = np.array(adv_list)
            #adv = np.expand_dims(adv, axis = 3)
            adv = uniform_filter(adv, 2)
            #adv = median_filter(adv, 2)
        #print("after bin ")
        #print(adv[0])

        recon_orig = model.get_layer(adv_inputs, 'RECON')
        recon_adv = model.get_layer(adv, 'RECON')
        lat_adv = model.get_layer(adv, 'LATENT')
        lat_orig = model.get_layer(x, 'LATENT')
        lat_orig_recon = model.get_layer(recons, 'LATENT')
        pred_adv_recon = cl_model.get_layer(lat_adv, 'LOGITS')

        #eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
        eval_params = {'batch_size': 90}
        if targeted:
            noise, d1, d2, dist_diff, avg_dist_lat = model_eval_ae(
                sess,
                x,
                x_t,
                recons,
                adv_inputs,
                adv_input_targets,
                adv,
                recon_adv,
                lat_orig,
                lat_orig_recon,
                args=eval_params)
            acc1 = model_eval(sess,
                              x,
                              y,
                              pred_adv_recon,
                              x_t,
                              adv_inputs,
                              adv_target_y,
                              adv_input_targets,
                              args=eval_params_cls)
            acc2 = model_eval(sess,
                              x,
                              y,
                              pred_adv_recon,
                              x_t,
                              adv_inputs,
                              adv_input_y,
                              adv_input_targets,
                              args=eval_params_cls)
            print("noise: ", noise)
            print("classifier acc for target class: ", acc1)
            print("classifier acc for true class: ", acc2)

        recon_adv = sess.run(recon_adv)
        recon_orig = sess.run(recon_orig)
        #print("recon_adv[0]\n", recon_adv[0,:,:,0])
        curr_class = 0
        if viz_enabled:
            for j in range(nb_classes):
                for i in range(nb_classes):
                    #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i]
                    if (i == j):
                        grid_viz_data[i, j] = recon_orig[curr_class * 9]
                        grid_viz_data_1[i, j] = adv_inputs[curr_class * 9]
                        curr_class = curr_class + 1
                    else:
                        if (j > i):
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j - 1]
                            grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) +
                                                        j - 1]
                        else:
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j]
                            grid_viz_data_1[i,
                                            j] = adv[i * (nb_classes - 1) + j]
            sess.close()

            _ = grid_visual(grid_viz_data)
            _ = grid_visual(grid_viz_data_1)
예제 #25
0
def SNNL_example(train_start=0,
                 train_end=60000,
                 test_start=0,
                 test_end=10000,
                 nb_epochs=NB_EPOCHS,
                 batch_size=BATCH_SIZE,
                 learning_rate=LEARNING_RATE,
                 nb_filters=NB_FILTERS,
                 SNNL_factor=SNNL_FACTOR,
                 output_dir=OUTPUT_DIR):
    """
  A simple model trained to minimize Cross Entropy and Maximize Soft Nearest
  Neighbor Loss at each internal layer. This outputs a TSNE of the sign of
  the adversarial gradients of a trained model. A model with a negative
  SNNL_factor will show little or no class clusters, while a model with a
  0 SNNL_factor will have class clusters in the adversarial gradient direction.
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param SNNL_factor: multiplier for Soft Nearest Neighbor Loss
  :return: an AccuracyReport object
  """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    sess = tf.Session()

    # Get MNIST data
    mnist = MNIST(train_start=train_start,
                  train_end=train_end,
                  test_start=test_start,
                  test_end=test_end)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_params = {'batch_size': batch_size}
    rng = np.random.RandomState([2017, 8, 30])

    def do_eval(preds, x_set, y_set, report_key):
        acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        setattr(report, report_key, acc)
        print('Test accuracy on legitimate examples: %0.4f' % (acc))

    model = ModelBasicCNN('model', nb_classes, nb_filters)
    preds = model.get_logits(x)
    cross_entropy_loss = CrossEntropy(model)
    if not SNNL_factor:
        loss = cross_entropy_loss
    else:
        loss = SNNLCrossEntropy(model,
                                factor=SNNL_factor,
                                optimize_temperature=False)

    def evaluate():
        do_eval(preds, x_test, y_test, 'clean_train_clean_eval')

    train(sess,
          loss,
          x_train,
          y_train,
          evaluate=evaluate,
          args=train_params,
          rng=rng,
          var_list=model.get_params())

    do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval')

    def imscatter(points, images, ax=None, zoom=1, cmap="hot"):
        if ax is None:
            ax = plt.gca()
        artists = []
        i = 0
        if not isinstance(cmap, list):
            cmap = [cmap] * len(points)
        for x0, y0 in points:
            transformed = (images[i] - np.min(images[i])) / \
                (np.max(images[i]) - np.min(images[i]))
            im = OffsetImage(transformed[:, :, 0], zoom=zoom, cmap=cmap[i])
            ab = AnnotationBbox(im, (x0, y0), xycoords='data', frameon=False)
            artists.append(ax.add_artist(ab))
            i += 1
        ax.update_datalim(np.column_stack(np.transpose(points)))
        ax.autoscale()
        ax.get_xaxis().set_ticks([])
        ax.get_yaxis().set_ticks([])
        return artists

    adv_grads = tf.sign(tf.gradients(cross_entropy_loss.fprop(x, y), x))
    feed_dict = {x: x_test[:batch_size], y: y_test[:batch_size]}
    adv_grads_val = sess.run(adv_grads, feed_dict=feed_dict)
    adv_grads_val = np.reshape(adv_grads_val,
                               (batch_size, img_rows * img_cols))

    X_embedded = TSNE(n_components=2, verbose=0).fit_transform(adv_grads_val)
    plt.figure(num=None,
               figsize=(50, 50),
               dpi=40,
               facecolor='w',
               edgecolor='k')
    plt.title(
        "TSNE of Sign of Adv Gradients, SNNLCrossEntropy Model, factor:" +
        str(FLAGS.SNNL_factor),
        fontsize=42)
    imscatter(X_embedded, x_test[:batch_size], zoom=2, cmap="Purples")
    plt.savefig(output_dir + 'adversarial_gradients_SNNL_factor_' +
                str(SNNL_factor) + '.png')
def mnist_tutorial(train_start=0, train_end=60000, test_start=0,
                   test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE,
                   clean_train=True,
                   testing=False,
                   backprop_through_attack=False,
                   nb_filters=NB_FILTERS, num_threads=None,
                   attack_string=None):
    """
    MNIST cleverhans tutorial
    :param train_start: index of first training set example.
    :param train_end: index of last training set example.
    :param test_start: index of first test set example.
    :param test_end: index of last test set example.
    :param nb_epochs: number of epochs to train model.
    :param batch_size: size of training batches.
    :param learning_rate: learning rate for training.
    :param clean_train: perform normal training on clean examples only
                      before performing adversarial training.
    :param testing: if true, complete an AccuracyReport for unit tests
                  to verify that performance is adequate.
    :param backprop_through_attack: If True, backprop through adversarial
                                  example construction process during
                                  adversarial training.
    :param nb_filters: number of filters in the CNN used for training.
    :param num_threads: number of threads used for running the process.
    :param attack_string: attack name for crafting adversarial attacks and
                          adversarial training, in string format.
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Get MNIST test data
    mnist = MNIST(train_start=train_start, train_end=train_end,
                  test_start=test_start, test_end=test_end)
    X_train, Y_train = mnist.get_set('train')
    X_test, Y_test = mnist.get_set('test')

    # Use label smoothing
    assert Y_train.shape[1] == 10
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }

    # Initialize the attack object
    attack_class = attack_selection(attack_string)
    attack_params = {'eps': 0.3, 'clip_min': 0.,
                     'clip_max': 1.}

    rng = np.random.RandomState([2018, 6, 18])
    if clean_train:
        model = ModelBasicCNNTFE(nb_filters=nb_filters)

        def evaluate_clean():
            """
            Evaluate the accuracy of the MNIST model on legitimate test
            examples
            """
            eval_params = {'batch_size': batch_size}
            acc = model_eval(model, X_test, Y_test, args=eval_params)
            report.clean_train_clean_eval = acc
            assert X_test.shape[0] == test_end - test_start, X_test.shape
            print('Test accuracy on legitimate examples: %0.4f' % acc)

        train(model, X_train, Y_train, evaluate=evaluate_clean,
              args=train_params, rng=rng, var_list=model.get_params())

        if testing:
            # Calculate training error
            eval_params = {'batch_size': batch_size}
            acc = model_eval(model, X_train, Y_train, args=eval_params)
            report.train_clean_train_clean_eval = acc

        # Evaluate the accuracy of the MNIST model on adversarial examples
        eval_par = {'batch_size': batch_size}
        attack = attack_class(model)
        acc = model_eval(
            model, X_test, Y_test, args=eval_par,
            attack=attack, attack_args=attack_params)
        print('Test accuracy on adversarial examples: %0.4f\n' % acc)
        report.clean_train_adv_eval = acc

        # Calculate training error
        if testing:
            eval_par = {'batch_size': batch_size}
            acc = model_eval(
                model, X_train, Y_train, args=eval_par,
                attack=attack, attack_args=attack_params)
            print('Train accuracy on adversarial examples: %0.4f\n' % acc)
            report.train_clean_train_adv_eval = acc

        attack = None
        print("Repeating the process, using adversarial training")

    model_adv_train = ModelBasicCNNTFE(nb_filters=nb_filters)
    attack = attack_class(model_adv_train)

    def evaluate_adv():
        # Accuracy of adversarially trained model on legitimate test inputs
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(
            model_adv_train, X_test, Y_test,
            args=eval_params)
        print('Test accuracy on legitimate examples: %0.4f' % accuracy)
        report.adv_train_clean_eval = accuracy
        # Accuracy of the adversarially trained model on adversarial examples
        accuracy = model_eval(
            model_adv_train, X_test, Y_test,
            args=eval_params, attack=attack,
            attack_args=attack_params)
        print('Test accuracy on adversarial examples: %0.4f' % accuracy)
        report.adv_train_adv_eval = accuracy

    # Perform and evaluate adversarial training
    train(model_adv_train, X_train, Y_train, evaluate=evaluate_adv,
          args=train_params, rng=rng,
          var_list=model_adv_train.get_params(),
          attack=attack, attack_args=attack_params)

    # Calculate training errors
    if testing:
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(
            model_adv_train, X_train, Y_train, args=eval_params,
            attack=None, attack_args=None)
        report.train_adv_train_clean_eval = accuracy
        accuracy = model_eval(
            model_adv_train, X_train, Y_train, args=eval_params,
            attack=attack, attack_args=attack_params)
        report.train_adv_train_adv_eval = accuracy
    return report
def mnist_tutorial(train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   nb_epochs=NB_EPOCHS,
                   batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE,
                   train_dir=TRAIN_DIR,
                   filename=FILENAME,
                   load_model=LOAD_MODEL,
                   testing=True,
                   label_smoothing=0.1):
    """
  MNIST CleverHans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param train_dir: Directory storing the saved model
  :param filename: Filename to save model under
  :param load_model: True for load, False for not load
  :param testing: if true, test error is calculated
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """
    tf.keras.backend.set_learning_phase(0)

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    if keras.backend.image_data_format() != 'channels_last':
        raise NotImplementedError(
            "this tutorial requires keras to be configured to channels_last format"
        )

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)

    # Get MNIST test data
    mnist = MNIST(train_start=train_start,
                  train_end=train_end,
                  test_start=test_start,
                  test_end=test_end)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Define TF model graph
    model = cnn_model(img_rows=img_rows,
                      img_cols=img_cols,
                      channels=nchannels,
                      nb_filters=64,
                      nb_classes=nb_classes)
    preds = model(x)
    print("Defined TensorFlow model graph.")

    def evaluate():
        # Evaluate the accuracy of the MNIST model on legitimate test examples
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
        report.clean_train_clean_eval = acc
        #        assert X_test.shape[0] == test_end - test_start, X_test.shape
        print('Test accuracy on legitimate examples: %0.4f' % acc)

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': train_dir,
        'filename': filename
    }

    rng = np.random.RandomState([2017, 8, 30])
    if not os.path.exists(train_dir):
        os.mkdir(train_dir)

    ckpt = tf.train.get_checkpoint_state(train_dir)
    print(train_dir, ckpt)
    ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path
    wrap = KerasModelWrapper(model)

    if load_model and ckpt_path:
        saver = tf.train.Saver()
        print(ckpt_path)
        saver.restore(sess, ckpt_path)
        print("Model loaded from: {}".format(ckpt_path))
        evaluate()

    eval_params = {'batch_size': batch_size}

    def eval(xx):
        acc = model_eval(sess, x, y, preds, xx, y_test, args=eval_params)
        print(acc)

    eval(x_test)
    model = load_modell(str("./autoencoder_bim.h5"))
    x_test_decoded = model.predict(x_test)
    eval(x_test_decoded)

    _, bim_test = pickle.load(open("/data/mnist/bim.pkl"))
    bim_test = bim_test.astype('float32') / 255.
    bim_test = np.mean(bim_test, axis=3)
    bim_test = np.expand_dims(bim_test, axis=3)
    bim_test_decoded = model.predict(bim_test)
    eval(bim_test)
    eval(bim_test_decoded)
예제 #28
0
def mnist_tutorial(train_start=0, train_end=60000, test_start=0,
                   test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE, train_dir=TRAIN_DIR,
                   filename=FILENAME, load_model=LOAD_MODEL,
                   testing=True, label_smoothing=0.1):
  """
  MNIST CleverHans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param train_dir: Directory storing the saved model
  :param filename: Filename to save model under
  :param load_model: True for load, False for not load
  :param testing: if true, test error is calculated
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """
  tf.keras.backend.set_learning_phase(0)

  # Object used to keep track of (and return) key accuracies
  report = AccuracyReport()

  # Set TF random seed to improve reproducibility
  tf.set_random_seed(1234)

  if keras.backend.image_data_format() != 'channels_last':
    raise NotImplementedError("this tutorial requires keras to be configured to channels_last format")

  # Create TF session and set as Keras backend session
  sess = tf.Session()
  keras.backend.set_session(sess)

  # Get MNIST test data
  mnist = MNIST(train_start=train_start, train_end=train_end,
                test_start=test_start, test_end=test_end)
  [x_adv_train, x_adv_test] = pickle.load(open(PKLDATA+'.pkl'))
  x_train, y_train = mnist.get_set('train')
  x_train = x_adv_train
  x_test, y_test = mnist.get_set('test')
  x_test = x_adv_test

  # Obtain Image Parameters
  img_rows, img_cols, nchannels = x_train.shape[1:4]
  nb_classes = y_train.shape[1]

  # Define input TF placeholder
  x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
  y = tf.placeholder(tf.float32, shape=(None, nb_classes))

  # Define TF model graph
  model = cnn_model(img_rows=img_rows, img_cols=img_cols,
                    channels=nchannels, nb_filters=64,
                    nb_classes=nb_classes)
  preds = model(x)
  print("Defined TensorFlow model graph.")

  def evaluate():
    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    report.clean_train_clean_eval = acc
    print('Test accuracy on adversarial examples: %0.4f' % acc)

  # Train an MNIST model
  train_params = {
      'nb_epochs': nb_epochs,
      'batch_size': batch_size,
      'learning_rate': learning_rate,
      'train_dir': train_dir,
      'filename': filename
  }

  rng = np.random.RandomState([2017, 8, 30])
  if not os.path.exists(train_dir):
    os.mkdir(train_dir)

  ckpt = tf.train.get_checkpoint_state(train_dir)
  print(train_dir, ckpt)
  ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path
  wrap = KerasModelWrapper(model)

  if load_model and ckpt_path:
    saver = tf.train.Saver()
    print(ckpt_path)
    saver.restore(sess, ckpt_path)
    print("Model loaded from: {}".format(ckpt_path))
    evaluate()
  else:
    print("Model was not loaded, training from scratch.")
    loss = CrossEntropy(wrap, smoothing=label_smoothing)
    train(sess, loss, x_train, y_train, evaluate=evaluate,
          args=train_params, rng=rng)

  # Calculate training error
  if testing:
    eval_params = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds, x_train, y_train, args=eval_params)
    report.train_clean_train_clean_eval = acc

  # Initialize the Fast Gradient Sign Method (FGSM) attack object and graph
  filter_x = binary_filter(x)

  batch = 1000

  def convert(x_input):
    x_output = None
    for i in tqdm(range(int(len(x_input) / batch))):
      tmp = sess.run(filter_x, feed_dict={x: x_input[i * batch:(i + 1) * batch]})
      if x_output is None:
        x_output = tmp
      else:
        x_output = np.concatenate((x_output, tmp))
    return x_output

  x_filter_test = convert(x_test)
  x_filter_train = convert(x_train)


  def evaluate_adv():
    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds, x_filter_test, y_test, args=eval_params)
    report.clean_train_clean_eval = acc
    print('Test accuracy on filtered examples: %0.4f' % acc)

  evaluate_adv()
def mnist_tutorial(train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   nb_epochs=NB_EPOCHS,
                   batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE,
                   clean_train=CLEAN_TRAIN,
                   testing=False,
                   backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                   nb_filters=NB_FILTERS,
                   num_threads=None,
                   label_smoothing=0.1):
    """
  MNIST cleverhans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param clean_train: perform normal training on clean examples only
                      before performing adversarial training.
  :param testing: if true, complete an AccuracyReport for unit tests
                  to verify that performance is adequate
  :param backprop_through_attack: If True, backprop through adversarial
                                  example construction process during
                                  adversarial training.
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get MNIST test data
    mnist = MNIST(train_start=train_start,
                  train_end=train_end,
                  test_start=test_start,
                  test_end=test_end)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_params = {'batch_size': batch_size}
    fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
    rng = np.random.RandomState([2017, 8, 30])

    def do_eval(preds, x_set, y_set, report_key, is_adv=None):
        acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        setattr(report, report_key, acc)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

    if clean_train:
        model = make_basic_picklable_cnn()
        # Tag the model so that when it is saved to disk, future scripts will
        # be able to tell what data it was trained on
        model.dataset_factory = mnist.get_factory()
        preds = model.get_logits(x)
        assert len(model.get_params()) > 0
        loss = CrossEntropy(model, smoothing=label_smoothing)

        def evaluate():
            do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)

        train(sess,
              loss,
              x_train,
              y_train,
              evaluate=evaluate,
              args=train_params,
              rng=rng,
              var_list=model.get_params())

        with sess.as_default():
            save("clean_model.joblib", model)

            print("Now that the model has been saved, you can evaluate it in a"
                  " separate process using `evaluate_pickled_model.py`. "
                  "You should get exactly the same result for both clean and "
                  "adversarial accuracy as you get within this program.")

        # Calculate training error
        if testing:
            do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval')

        # Initialize the Fast Gradient Sign Method (FGSM) attack object and
        # graph
        fgsm = FastGradientMethod(model, sess=sess)
        adv_x = fgsm.generate(x, **fgsm_params)
        preds_adv = model.get_logits(adv_x)

        # Evaluate the accuracy of the MNIST model on adversarial examples
        do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True)

        # Calculate training error
        if testing:
            do_eval(preds_adv, x_train, y_train, 'train_clean_train_adv_eval')

        print('Repeating the process, using adversarial training')

    # Create a new model and train it to be robust to FastGradientMethod
    model2 = make_basic_picklable_cnn()
    # Tag the model so that when it is saved to disk, future scripts will
    # be able to tell what data it was trained on
    model2.dataset_factory = mnist.get_factory()
    fgsm2 = FastGradientMethod(model2, sess=sess)

    def attack(x):
        return fgsm2.generate(x, **fgsm_params)

    loss2 = CrossEntropy(model2, smoothing=label_smoothing, attack=attack)
    preds2 = model2.get_logits(x)
    adv_x2 = attack(x)

    if not backprop_through_attack:
        # For the fgsm attack used in this tutorial, the attack has zero
        # gradient so enabling this flag does not change the gradient.
        # For some other attacks, enabling this flag increases the cost of
        # training, but gives the defender the ability to anticipate how
        # the atacker will change their strategy in response to updates to
        # the defender's parameters.
        adv_x2 = tf.stop_gradient(adv_x2)
    preds2_adv = model2.get_logits(adv_x2)

    def evaluate2():
        # Accuracy of adversarially trained model on legitimate test inputs
        do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False)
        # Accuracy of the adversarially trained model on adversarial examples
        do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True)

    # Perform and evaluate adversarial training
    train(sess,
          loss2,
          x_train,
          y_train,
          evaluate=evaluate2,
          args=train_params,
          rng=rng,
          var_list=model2.get_params())

    with sess.as_default():
        save("adv_model.joblib", model2)
        print(
            "Now that the model has been saved, you can evaluate it in a "
            "separate process using "
            "`python evaluate_pickled_model.py adv_model.joblib`. "
            "You should get exactly the same result for both clean and "
            "adversarial accuracy as you get within this program."
            " You can also move beyond the tutorials directory and run the "
            " real `compute_accuracy.py` script (make sure cleverhans/scripts "
            "is in your PATH) to see that this FGSM-trained "
            "model is actually not very robust---it's just a model that trains "
            " quickly so the tutorial does not take a long time")

    # Calculate training errors
    if testing:
        do_eval(preds2, x_train, y_train, 'train_adv_train_clean_eval')
        do_eval(preds2_adv, x_train, y_train, 'train_adv_train_adv_eval')

    return report
예제 #30
0
def run_mnist_adv(num_epochs=NUM_EPOCHS, batch_size=BATCH_SIZE,
                  testing=False, learning_rate=LEARNING_RATE):

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # set random seed
    tf.set_random_seed(42)

    # can use gpu
    config = tf.ConfigProto( device_count = {'GPU': 1 , 'CPU': 1} )

    # Create TF session and set Keras backend session as TF
    sess = tf.Session(config=config)
    keras.backend.set_session(sess)

    # Get MNIST test data
    mnist = MNIST()
    x_train, y_train = mnist.get_set("train")
    x_test, y_test = mnist.get_set("test")

    # Obtain image params
    n_rows, n_cols, n_channels = x_train.shape[1:4]
    n_classes = y_train.shape[1]

    # define TF model graph
    model = ConvNet((n_rows, n_cols, n_channels), n_classes)
    model(model.input)

    wrap = KerasModelWrapper(model)
    fgsm = FastGradientMethod(wrap, sess=sess)
    fgsm_params = {
        'eps': 0.3,
        'clip_min': 0.,
        'clip_max': 1.
    }
    adv_acc_metric = get_adversarial_acc_metric(model, fgsm, fgsm_params)
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate),
        loss='categorical_crossentropy',
        metrics=['accuracy', adv_acc_metric]
    )

    # Train an MNIST model
    model.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=num_epochs,
              validation_data=(x_test, y_test),
              verbose=1)

    # Evaluate the accuracy on legitimate and adversarial test examples
    _, acc, adv_acc = model.evaluate(x_test, y_test,
                                     batch_size=batch_size,
                                     verbose=0)
    report.clean_train_clean_eval = acc
    report.clean_train_adv_eval = adv_acc
    print('Test accuracy on legitimate examples: %0.4f' % acc)
    print('Test accuracy on adversarial examples: %0.4f\n' % adv_acc)

    # Calculate training error
    if testing:
        _, train_acc, train_adv_acc = model.evaluate(x_train, y_train,
                                                     batch_size=batch_size,
                                                     verbose=0)
        report.train_clean_train_clean_eval = train_acc
        report.train_clean_train_adv_eval = train_adv_acc

    print("Repeating the process, using adversarial training")
    # Redefine Keras model
    model_2 = ConvNet((n_rows, n_cols, n_channels), n_classes)
    model_2(model_2.input)
    wrap_2 = KerasModelWrapper(model_2)
    fgsm_2 = FastGradientMethod(wrap_2, sess=sess)

    # Use a loss function based on legitimate and adversarial examples
    adv_loss_2 = get_adversarial_loss(model_2, fgsm_2, fgsm_params)
    adv_acc_metric_2 = get_adversarial_acc_metric(model_2, fgsm_2, fgsm_params)
    model_2.compile(
        optimizer=keras.optimizers.Adam(learning_rate),
        loss=adv_loss_2,
        metrics=['accuracy', adv_acc_metric_2]
    )

    # Train an MNIST model
    model_2.fit(x_train, y_train,
                batch_size=batch_size,
                epochs=num_epochs,
                validation_data=(x_test, y_test),
                verbose=1)

    # Evaluate the accuracy on legitimate and adversarial test examples
    _, acc, adv_acc = model_2.evaluate(x_test, y_test,
                                       batch_size=batch_size,
                                       verbose=0)
    report.adv_train_clean_eval = acc
    report.adv_train_adv_eval = adv_acc
    print('Test accuracy on legitimate examples: %0.4f' % acc)
    print('Test accuracy on adversarial examples: %0.4f\n' % adv_acc)

    # Calculate training error
    if testing:
        _, train_acc, train_adv_acc = model_2.evaluate(x_train, y_train,
                                                       batch_size=batch_size,
                                                       verbose=0)
        report.train_adv_train_clean_eval = train_acc
        report.train_adv_train_adv_eval = train_adv_acc

    return report