예제 #1
0
def mnist_tutorial(train_start=0, train_end=60000, test_start=0,
                   test_end=10000, nb_epochs=6, batch_size=128,
                   learning_rate=0.001,
                   clean_train=True,
                   testing=False,
                   backprop_through_attack=False,
                   nb_filters=64, num_threads=None):
    """
    MNIST cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param testing: if true, complete an AccuracyReport for unit tests
                    to verify that performance is adequate
    :param backprop_through_attack: If True, backprop through adversarial
                                    example construction process during
                                    adversarial training.
    :param clean_train: if true, train on clean examples
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Use label smoothing
    assert Y_train.shape[1] == 10
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    model_path = "models/mnist"
    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    fgsm_params = {'eps': 0.3,
                   'clip_min': 0.,
                   'clip_max': 1.}
    rng = np.random.RandomState([2017, 8, 30])

    if clean_train:
        model = make_basic_cnn(nb_filters=nb_filters)
        preds = model.get_probs(x)

        def evaluate():
            # Evaluate the accuracy of the MNIST model on legitimate test
            # examples
            eval_params = {'batch_size': batch_size}
            acc = model_eval(
                sess, x, y, preds, X_test, Y_test, args=eval_params)
            report.clean_train_clean_eval = acc
            assert X_test.shape[0] == test_end - test_start, X_test.shape
            print('Test accuracy on legitimate examples: %0.4f' % acc)
        model_train(sess, x, y, preds, X_train, Y_train, evaluate=evaluate,
                    args=train_params, rng=rng)

        # Calculate training error
        if testing:
            eval_params = {'batch_size': batch_size}
            acc = model_eval(
                sess, x, y, preds, X_train, Y_train, args=eval_params)
            report.train_clean_train_clean_eval = acc

        # Initialize the Fast Gradient Sign Method (FGSM) attack object and
        # graph
        fgsm = FastGradientMethod(model, sess=sess)
        adv_x = fgsm.generate(x, **fgsm_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on adversarial examples
        eval_par = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Test accuracy on adversarial examples: %0.4f\n' % acc)
        report.clean_train_adv_eval = acc

        # Calculate training error
        if testing:
            eval_par = {'batch_size': batch_size}
            acc = model_eval(sess, x, y, preds_adv, X_train,
                             Y_train, args=eval_par)
            report.train_clean_train_adv_eval = acc

        print("Repeating the process, using adversarial training")
    # Redefine TF model graph
    model_2 = make_basic_cnn(nb_filters=nb_filters)
    preds_2 = model_2(x)
    fgsm2 = FastGradientMethod(model_2, sess=sess)
    adv_x_2 = fgsm2.generate(x, **fgsm_params)
    if not backprop_through_attack:
        # For the fgsm attack used in this tutorial, the attack has zero
        # gradient so enabling this flag does not change the gradient.
        # For some other attacks, enabling this flag increases the cost of
        # training, but gives the defender the ability to anticipate how
        # the atacker will change their strategy in response to updates to
        # the defender's parameters.
        adv_x_2 = tf.stop_gradient(adv_x_2)
    preds_2_adv = model_2(adv_x_2)

    def evaluate_2():
        # Accuracy of adversarially trained model on legitimate test inputs
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess, x, y, preds_2, X_test, Y_test,
                              args=eval_params)
        print('Test accuracy on legitimate examples: %0.4f' % accuracy)
        report.adv_train_clean_eval = accuracy

        # Accuracy of the adversarially trained model on adversarial examples
        accuracy = model_eval(sess, x, y, preds_2_adv, X_test,
                              Y_test, args=eval_params)
        print('Test accuracy on adversarial examples: %0.4f' % accuracy)
        report.adv_train_adv_eval = accuracy

    # Perform and evaluate adversarial training
    model_train(sess, x, y, preds_2, X_train, Y_train,
                predictions_adv=preds_2_adv, evaluate=evaluate_2,
                args=train_params, rng=rng)

    # Calculate training errors
    if testing:
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess, x, y, preds_2, X_train, Y_train,
                              args=eval_params)
        report.train_adv_train_clean_eval = accuracy
        accuracy = model_eval(sess, x, y, preds_2_adv, X_train,
                              Y_train, args=eval_params)
        report.train_adv_train_adv_eval = accuracy

    return report
예제 #2
0
def mnist_tutorial(train_start=0, train_end=60000, test_start=0,
                   test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE, testing=False,
                   label_smoothing=0.1):
  """
  MNIST CleverHans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param testing: if true, training error is calculated
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """

  # Object used to keep track of (and return) key accuracies
  report = AccuracyReport()

  # Set TF random seed to improve reproducibility
  tf.set_random_seed(1234)
  # Force TensorFlow to use single thread to improve reproducibility
  config = tf.ConfigProto(intra_op_parallelism_threads=1,
                          inter_op_parallelism_threads=1)

  if keras.backend.image_data_format() != 'channels_last':
    raise NotImplementedError("this tutorial requires keras to be configured to channels_last format")

  # Create TF session and set as Keras backend session
  sess = tf.Session(config=config)
  keras.backend.set_session(sess)

  # Get MNIST test data
  mnist = MNIST(train_start=train_start, train_end=train_end,
                test_start=test_start, test_end=test_end)
  x_train, y_train = mnist.get_set('train')
  x_test, y_test = mnist.get_set('test')

  # Obtain Image Parameters
  img_rows, img_cols, nchannels = x_train.shape[1:4]
  nb_classes = y_train.shape[1]

  # Label smoothing
  y_train -= label_smoothing * (y_train - 1. / nb_classes)

  # Define Keras model
  model = cnn_model(img_rows=img_rows, img_cols=img_cols,
                    channels=nchannels, nb_filters=64,
                    nb_classes=nb_classes)
  print("Defined Keras model.")

  # To be able to call the model in the custom loss, we need to call it once
  # before, see https://github.com/tensorflow/tensorflow/issues/23769
  model(model.input)

  # Initialize the Fast Gradient Sign Method (FGSM) attack object
  wrap = KerasModelWrapper(model)
  fgsm = FastGradientMethod(wrap, sess=sess)
  fgsm_params = {'eps': 0.3,
                 'clip_min': 0.,
                 'clip_max': 1.}

  adv_acc_metric = get_adversarial_acc_metric(model, fgsm, fgsm_params)
  model.compile(
      optimizer=keras.optimizers.Adam(learning_rate),
      loss='categorical_crossentropy',
      metrics=['accuracy', adv_acc_metric]
  )

  # Train an MNIST model
  model.fit(x_train, y_train,
            batch_size=batch_size,
            epochs=nb_epochs,
            validation_data=(x_test, y_test),
            verbose=2)

  # Evaluate the accuracy on legitimate and adversarial test examples
  _, acc, adv_acc = model.evaluate(x_test, y_test,
                                   batch_size=batch_size,
                                   verbose=0)
  report.clean_train_clean_eval = acc
  report.clean_train_adv_eval = adv_acc
  print('Test accuracy on legitimate examples: %0.4f' % acc)
  print('Test accuracy on adversarial examples: %0.4f\n' % adv_acc)

  # Calculate training error
  if testing:
    _, train_acc, train_adv_acc = model.evaluate(x_train, y_train,
                                                 batch_size=batch_size,
                                                 verbose=0)
    report.train_clean_train_clean_eval = train_acc
    report.train_clean_train_adv_eval = train_adv_acc

  print("Repeating the process, using adversarial training")
  # Redefine Keras model
  model_2 = cnn_model(img_rows=img_rows, img_cols=img_cols,
                      channels=nchannels, nb_filters=64,
                      nb_classes=nb_classes)
  model_2(model_2.input)
  wrap_2 = KerasModelWrapper(model_2)
  fgsm_2 = FastGradientMethod(wrap_2, sess=sess)

  # Use a loss function based on legitimate and adversarial examples
  adv_loss_2 = get_adversarial_loss(model_2, fgsm_2, fgsm_params)
  adv_acc_metric_2 = get_adversarial_acc_metric(model_2, fgsm_2, fgsm_params)
  model_2.compile(
      optimizer=keras.optimizers.Adam(learning_rate),
      loss=adv_loss_2,
      metrics=['accuracy', adv_acc_metric_2]
  )

  # Train an MNIST model
  model_2.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=nb_epochs,
              validation_data=(x_test, y_test),
              verbose=2)

  # Evaluate the accuracy on legitimate and adversarial test examples
  _, acc, adv_acc = model_2.evaluate(x_test, y_test,
                                     batch_size=batch_size,
                                     verbose=0)
  report.adv_train_clean_eval = acc
  report.adv_train_adv_eval = adv_acc
  print('Test accuracy on legitimate examples: %0.4f' % acc)
  print('Test accuracy on adversarial examples: %0.4f\n' % adv_acc)

  # Calculate training error
  if testing:
    _, train_acc, train_adv_acc = model_2.evaluate(x_train, y_train,
                                                   batch_size=batch_size,
                                                   verbose=0)
    report.train_adv_train_clean_eval = train_acc
    report.train_adv_train_adv_eval = train_adv_acc

  return report
def baseline_jsma(train_start=0, train_end=60000, test_start=0,
                  test_end=10000, nb_epochs=6, batch_size=128,
                  learning_rate=0.001,
                  clean_train=True,
                  testing=False,
                  nb_filters=64):
    """
    MNIST cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param testing: if true, complete an AccuracyReport for unit tests
                    to verify that performance is adequate
    :param clean_train: if true, train on clean examples
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    sess = tf.Session()

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Use label smoothing
    # assert Y_train.shape[1] == 10
    # label_smooth = .1
    # Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    model_path = "models/mnist"
    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    jsma_params = {'theta': 1., 'gamma': 0.1,
                   'clip_min': 0., 'clip_max': 1.,
                   'y_target': None}
    rng = np.random.RandomState([2017, 8, 30])

    if clean_train:
        model = make_basic_cnn(nb_filters=nb_filters)
        preds = model.get_probs(x)

        def evaluate():
            # Evaluate the accuracy of the MNIST model on legitimate test
            # examples
            eval_params = {'batch_size': batch_size}
            acc = model_eval(
                sess, x, y, preds, X_test, Y_test, args=eval_params)
            report.clean_train_clean_eval = acc
            assert X_test.shape[0] == test_end - test_start, X_test.shape
            print('Test accuracy on legitimate examples: %0.4f' % acc)

        #
        # HERE already trained model, thus we need a new one (model_2)
        model_train(sess, x, y, preds, X_train, Y_train, evaluate=evaluate,
                    args=train_params, rng=rng)

        # Calculate training error
        if testing:
            eval_params = {'batch_size': batch_size}
            acc = model_eval(
                sess, x, y, preds, X_train, Y_train, args=eval_params)
            report.train_clean_train_clean_eval = acc

        # Initialize the JSMA attack object and
        # graph
        jsma = SaliencyMapMethod(model, sess=sess)
        adv_x = jsma.generate(x, **jsma_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on adversarial examples
        eval_par = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Test accuracy on adversarial examples: %0.4f\n' % acc)
        report.clean_train_adv_eval = acc

        # Calculate training error
        if testing:
            eval_par = {'batch_size': batch_size}
            acc = model_eval(sess, x, y, preds_adv, X_train,
                             Y_train, args=eval_par)
            report.train_clean_train_adv_eval = acc

        print("Repeating the process, using adversarial training")
    # Redefine TF model graph
    model_2 = make_basic_cnn(nb_filters=nb_filters)
    preds_2 = model_2(x)
    jsma2 = SaliencyMapMethod(model_2, sess=sess)
    adv_x_2 = jsma2.generate(x, **jsma_params)
    preds_2_adv = model_2(adv_x_2)

    #
    # let's generate FGSM examples for model_2
    #
    fgsm = FastGradientMethod(model_2, sess=sess)
    fgsm_params = {'eps': 0.3,
                   'clip_min': 0.,
                   'clip_max': 1.}
    adv_x_fgsm = fgsm.generate(x, **fgsm_params)
    preds_2_fgsm = model_2(adv_x_fgsm)

    # DON'T WANT TO TRAIN on FGSM adv examples yet

    def evaluate_2():
        # Accuracy of adversarially trained model on legitimate test inputs
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess, x, y, preds_2, X_test, Y_test,
                              args=eval_params)
        print('Test accuracy on legitimate examples: %0.4f' % accuracy)
        report.adv_train_clean_eval = accuracy

        # Accuracy of the adversarially trained model on JSMA adversarial examples
        accuracy = model_eval(sess, x, y, preds_2_adv, X_test,
                              Y_test, args=eval_params)
        print('Test accuracy on FGSM adversarial examples: %0.4f' % accuracy)
        report.adv_train_adv_eval = accuracy

        # Accuracy of the JSMA adv trained model on FGSM adv examples
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess, x, y, preds_2_fgsm, X_test,
                              Y_test, args=eval_params)
        print('Test accuracy on SaliencyMapMethod adversarial examples: %0.4f' % accuracy)

    # Perform and evaluate adversarial training
    model_train(sess, x, y, preds_2, X_train, Y_train,
                predictions_adv=preds_2_adv, evaluate=evaluate_2,
                args=train_params, rng=rng)

    # Calculate training errors
    if testing:
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess, x, y, preds_2, X_train, Y_train,
                              args=eval_params)
        report.train_adv_train_clean_eval = accuracy
        accuracy = model_eval(sess, x, y, preds_2_adv, X_train,
                              Y_train, args=eval_params)
        report.train_adv_train_adv_eval = accuracy

    return report
예제 #4
0
    def test_run_single_gpu_fgsm(self):
        """
        Test the basic single GPU performance by comparing to the FGSM
        tutorial.
        """
        from cleverhans_tutorials import mnist_tutorial_tf

        # Run the MNIST tutorial on a dataset of reduced size
        flags = {'train_start': 0,
                 'train_end': 5000,
                 'test_start': 0,
                 'test_end': 333,
                 'nb_epochs': 5,
                 'testing': True}
        report = mnist_tutorial_tf.mnist_tutorial(**flags)

        # Run the multi-gpu trainer for clean training
        flags.update({'batch_size': 128, 'adam_lrn': 0.001,
                      'dataset': 'mnist', 'only_adv_train': False,
                      'eval_iters': 1, 'ngpu': 1, 'fast_tests': False,
                      'attack_type_train': '',
                      'save_dir': None, 'save_steps': 10000,
                      'attack_nb_iter_train': None, 'save': False,
                      'model_type': 'basic', 'attack_type_test': 'FGSM'})

        flags.update({'adv_train': False})
        HParams = namedtuple('HParams', flags.keys())

        hparams = HParams(**flags)
        np.random.seed(42)
        tf.set_random_seed(42)
        with tf.variable_scope(None, 'runner'):
            report_dict = run_trainer(hparams)
        report_2 = AccuracyReport()
        report_2.train_clean_train_clean_eval = report_dict['train']
        report_2.clean_train_clean_eval = report_dict['test']
        report_2.clean_train_adv_eval = report_dict['FGSM']

        # Run the multi-gpu trainer for adversarial training
        flags.update({'adv_train': True,
                      'attack_type_train': 'FGSM',
                      })
        HParams = namedtuple('HParams', flags.keys())

        hparams = HParams(**flags)
        np.random.seed(42)
        tf.set_random_seed(42)
        with tf.variable_scope(None, 'runner'):
            report_dict = run_trainer(hparams)
        report_2.train_adv_train_clean_eval = report_dict['train']
        report_2.adv_train_clean_eval = report_dict['test']
        report_2.adv_train_adv_eval = report_dict['FGSM']

        self.assertClose(report.train_clean_train_clean_eval,
                         report_2.train_clean_train_clean_eval,
                         atol=5e-2)
        self.assertClose(report.clean_train_clean_eval,
                         report_2.clean_train_clean_eval,
                         atol=2e-2)
        self.assertClose(report.clean_train_adv_eval,
                         report_2.clean_train_adv_eval,
                         atol=5e-2)
        self.assertClose(report.train_adv_train_clean_eval,
                         report_2.train_adv_train_clean_eval,
                         atol=1e-1)
        self.assertClose(report.adv_train_clean_eval,
                         report_2.adv_train_clean_eval,
                         atol=2e-2)
        self.assertClose(report.adv_train_adv_eval,
                         report_2.adv_train_adv_eval,
                         atol=1e-1)
예제 #5
0
def mnist_tutorial(
    train_start=0,
    train_end=60000,
    test_start=0,
    test_end=10000,
    nb_epochs=NB_EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=LEARNING_RATE,
    train_dir=TRAIN_DIR,
    filename=FILENAME,
    load_model=LOAD_MODEL,
    testing=False,
    label_smoothing=0.1,
):
    """
    MNIST CleverHans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param train_dir: Directory storing the saved model
    :param filename: Filename to save model under
    :param load_model: True for load, False for not load
    :param testing: if true, test error is calculated
    :param label_smoothing: float, amount of label smoothing for cross entropy
    :return: an AccuracyReport object
    """
    tf.keras.backend.set_learning_phase(0)

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    if keras.backend.image_data_format() != "channels_last":
        raise NotImplementedError(
            "this tutorial requires keras to be configured to channels_last format"
        )

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)

    # Get MNIST test data
    mnist = MNIST(
        train_start=train_start,
        train_end=train_end,
        test_start=test_start,
        test_end=test_end,
    )
    x_train, y_train = mnist.get_set("train")
    x_test, y_test = mnist.get_set("test")

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Define TF model graph
    model = cnn_model(
        img_rows=img_rows,
        img_cols=img_cols,
        channels=nchannels,
        nb_filters=64,
        nb_classes=nb_classes,
    )
    preds = model(x)
    print("Defined TensorFlow model graph.")

    def evaluate():
        # Evaluate the accuracy of the MNIST model on legitimate test examples
        eval_params = {"batch_size": batch_size}
        acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
        report.clean_train_clean_eval = acc
        #        assert X_test.shape[0] == test_end - test_start, X_test.shape
        print("Test accuracy on legitimate examples: %0.4f" % acc)

    # Train an MNIST model
    train_params = {
        "nb_epochs": nb_epochs,
        "batch_size": batch_size,
        "learning_rate": learning_rate,
        "train_dir": train_dir,
        "filename": filename,
    }

    rng = np.random.RandomState([2017, 8, 30])
    if not os.path.exists(train_dir):
        os.mkdir(train_dir)

    ckpt = tf.train.get_checkpoint_state(train_dir)
    print(train_dir, ckpt)
    ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path
    wrap = KerasModelWrapper(model)

    if load_model and ckpt_path:
        saver = tf.train.Saver()
        print(ckpt_path)
        saver.restore(sess, ckpt_path)
        print("Model loaded from: {}".format(ckpt_path))
        evaluate()
    else:
        print("Model was not loaded, training from scratch.")
        loss = CrossEntropy(wrap, smoothing=label_smoothing)
        train(
            sess, loss, x_train, y_train, evaluate=evaluate, args=train_params, rng=rng
        )

    # Calculate training error
    if testing:
        eval_params = {"batch_size": batch_size}
        acc = model_eval(sess, x, y, preds, x_train, y_train, args=eval_params)
        report.train_clean_train_clean_eval = acc

    # Initialize the Fast Gradient Sign Method (FGSM) attack object and graph
    fgsm = FastGradientMethod(wrap, sess=sess)
    fgsm_params = {"eps": 0.3, "clip_min": 0.0, "clip_max": 1.0}
    adv_x = fgsm.generate(x, **fgsm_params)
    # Consider the attack to be constant
    adv_x = tf.stop_gradient(adv_x)
    preds_adv = model(adv_x)

    # Evaluate the accuracy of the MNIST model on adversarial examples
    eval_par = {"batch_size": batch_size}
    acc = model_eval(sess, x, y, preds_adv, x_test, y_test, args=eval_par)
    print("Test accuracy on adversarial examples: %0.4f\n" % acc)
    report.clean_train_adv_eval = acc

    # Calculating train error
    if testing:
        eval_par = {"batch_size": batch_size}
        acc = model_eval(sess, x, y, preds_adv, x_train, y_train, args=eval_par)
        report.train_clean_train_adv_eval = acc

    print("Repeating the process, using adversarial training")
    # Redefine TF model graph
    model_2 = cnn_model(
        img_rows=img_rows,
        img_cols=img_cols,
        channels=nchannels,
        nb_filters=64,
        nb_classes=nb_classes,
    )
    wrap_2 = KerasModelWrapper(model_2)
    preds_2 = model_2(x)
    fgsm2 = FastGradientMethod(wrap_2, sess=sess)

    def attack(x):
        return fgsm2.generate(x, **fgsm_params)

    preds_2_adv = model_2(attack(x))
    loss_2 = CrossEntropy(wrap_2, smoothing=label_smoothing, attack=attack)

    def evaluate_2():
        # Accuracy of adversarially trained model on legitimate test inputs
        eval_params = {"batch_size": batch_size}
        accuracy = model_eval(sess, x, y, preds_2, x_test, y_test, args=eval_params)
        print("Test accuracy on legitimate examples: %0.4f" % accuracy)
        report.adv_train_clean_eval = accuracy

        # Accuracy of the adversarially trained model on adversarial examples
        accuracy = model_eval(sess, x, y, preds_2_adv, x_test, y_test, args=eval_params)
        print("Test accuracy on adversarial examples: %0.4f" % accuracy)
        report.adv_train_adv_eval = accuracy

    # Perform and evaluate adversarial training
    train(
        sess, loss_2, x_train, y_train, evaluate=evaluate_2, args=train_params, rng=rng
    )

    # Calculate training errors
    if testing:
        eval_params = {"batch_size": batch_size}
        accuracy = model_eval(sess, x, y, preds_2, x_train, y_train, args=eval_params)
        report.train_adv_train_clean_eval = accuracy
        accuracy = model_eval(
            sess, x, y, preds_2_adv, x_train, y_train, args=eval_params
        )
        report.train_adv_train_adv_eval = accuracy

    return report
예제 #6
0
def mnist_tutorial(train_start=0, train_end=60000, test_start=0,
                   test_end=10000, nb_epochs=6, batch_size=128,
                   learning_rate=0.001, train_dir="train_dir",
                   filename="mnist.ckpt", load_model=False,
                   testing=False, label_smoothing=0.1):
    """
    MNIST CleverHans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param train_dir: Directory storing the saved model
    :param filename: Filename to save model under
    :param load_model: True for load, False for not load
    :param testing: if true, test error is calculated
    :param label_smoothing: float, amount of label smoothing for cross entropy
    :return: an AccuracyReport object
    """
    keras.layers.core.K.set_learning_phase(0)

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    if not hasattr(backend, "tf"):
        raise RuntimeError("This tutorial requires keras to be configured"
                           " to use the TensorFlow backend.")

    if keras.backend.image_dim_ordering() != 'tf':
        keras.backend.set_image_dim_ordering('tf')
        print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to "
              "'th', temporarily setting to 'tf'")

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)

    # Get MNIST test data
    x_train, y_train, x_test, y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                          nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Define TF model graph
    model = cnn_model(img_rows=img_rows, img_cols=img_cols,
                      channels=nchannels, nb_filters=64,
                      nb_classes=nb_classes)
    preds = model(x)
    print("Defined TensorFlow model graph.")

    def evaluate():
        # Evaluate the accuracy of the MNIST model on legitimate test examples
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
        report.clean_train_clean_eval = acc
#        assert X_test.shape[0] == test_end - test_start, X_test.shape
        print('Test accuracy on legitimate examples: %0.4f' % acc)

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': train_dir,
        'filename': filename
    }

    rng = np.random.RandomState([2017, 8, 30])
    if not os.path.exists(train_dir):
        os.mkdir(train_dir)

    ckpt = tf.train.get_checkpoint_state(train_dir)
    print(train_dir, ckpt)
    ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path
    wrap = KerasModelWrapper(model)

    if load_model and ckpt_path:
        saver = tf.train.Saver()
        print(ckpt_path)
        saver.restore(sess, ckpt_path)
        print("Model loaded from: {}".format(ckpt_path))
        evaluate()
    else:
        print("Model was not loaded, training from scratch.")
        loss = LossCrossEntropy(wrap, smoothing=label_smoothing)
        train(sess, loss, x, y, x_train, y_train, evaluate=evaluate,
              args=train_params, save=True, rng=rng)

    # Calculate training error
    if testing:
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds, x_train, y_train, args=eval_params)
        report.train_clean_train_clean_eval = acc

    # Initialize the Fast Gradient Sign Method (FGSM) attack object and graph
    fgsm = FastGradientMethod(wrap, sess=sess)
    fgsm_params = {'eps': 0.3,
                   'clip_min': 0.,
                   'clip_max': 1.}
    adv_x = fgsm.generate(x, **fgsm_params)
    # Consider the attack to be constant
    adv_x = tf.stop_gradient(adv_x)
    preds_adv = model(adv_x)

    # Evaluate the accuracy of the MNIST model on adversarial examples
    eval_par = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds_adv, x_test, y_test, args=eval_par)
    print('Test accuracy on adversarial examples: %0.4f\n' % acc)
    report.clean_train_adv_eval = acc

    # Calculating train error
    if testing:
        eval_par = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds_adv, x_train,
                         y_train, args=eval_par)
        report.train_clean_train_adv_eval = acc

    print("Repeating the process, using adversarial training")
    # Redefine TF model graph
    model_2 = cnn_model(img_rows=img_rows, img_cols=img_cols,
                        channels=nchannels, nb_filters=64,
                        nb_classes=nb_classes)
    wrap_2 = KerasModelWrapper(model_2)
    preds_2 = model_2(x)
    fgsm2 = FastGradientMethod(wrap_2, sess=sess)

    def attack(x):
        return fgsm2.generate(x, **fgsm_params)

    preds_2_adv = model_2(attack(x))
    loss_2 = LossCrossEntropy(wrap_2, smoothing=label_smoothing, attack=attack)

    def evaluate_2():
        # Accuracy of adversarially trained model on legitimate test inputs
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess, x, y, preds_2, x_test, y_test,
                              args=eval_params)
        print('Test accuracy on legitimate examples: %0.4f' % accuracy)
        report.adv_train_clean_eval = accuracy

        # Accuracy of the adversarially trained model on adversarial examples
        accuracy = model_eval(sess, x, y, preds_2_adv, x_test,
                              y_test, args=eval_params)
        print('Test accuracy on adversarial examples: %0.4f' % accuracy)
        report.adv_train_adv_eval = accuracy

    # Perform and evaluate adversarial training
    train(sess, loss_2, x, y, x_train, y_train, evaluate=evaluate_2,
          args=train_params, save=False, rng=rng)

    # Calculate training errors
    if testing:
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess, x, y, preds_2, x_train, y_train,
                              args=eval_params)
        report.train_adv_train_clean_eval = accuracy
        accuracy = model_eval(sess, x, y, preds_2_adv, x_train,
                              y_train, args=eval_params)
        report.train_adv_train_adv_eval = accuracy

    return report
def mnist_tutorial(train_start=0, train_end=60000, test_start=0,
                   test_end=10000, nb_epochs=6, batch_size=128,
                   learning_rate=0.001, train_dir="/tmp",
                   filename="mnist.ckpt", load_model=False,
                   testing=False):
    """
    MNIST CleverHans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param train_dir: Directory storing the saved model
    :param filename: Filename to save model under
    :param load_model: True for load, False for not load
    :param testing: if true, test error is calculated
    :return: an AccuracyReport object
    """
    keras.layers.core.K.set_learning_phase(0)

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    if not hasattr(backend, "tf"):
        raise RuntimeError("This tutorial requires keras to be configured"
                           " to use the TensorFlow backend.")

    # Image dimensions ordering should follow the Theano convention
    if keras.backend.image_dim_ordering() != 'tf':
        keras.backend.set_image_dim_ordering('tf')
        print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to "
              "'th', temporarily setting to 'tf'")

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Use label smoothing
    assert Y_train.shape[1] == 10
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph
    model = cnn_model()
    preds = model(x)
    print("Defined TensorFlow model graph.")

    def evaluate():
        # Evaluate the accuracy of the MNIST model on legitimate test examples
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params)
        report.clean_train_clean_eval = acc
        assert X_test.shape[0] == test_end - test_start, X_test.shape
        print('Test accuracy on legitimate examples: %0.4f' % acc)

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': train_dir,
        'filename': filename
    }
    ckpt = tf.train.get_checkpoint_state(train_dir)
    ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path

    rng = np.random.RandomState([2017, 8, 30])
    if load_model and ckpt_path:
        saver = tf.train.Saver()
        saver.restore(sess, ckpt_path)
        print("Model loaded from: {}".format(ckpt_path))
        evaluate()
    else:
        print("Model was not loaded, training from scratch.")
        train(sess, x, y, preds, X_train, Y_train, evaluate=evaluate,
              args=train_params, save=True)

    # Calculate training error
    if testing:
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds, X_train, Y_train, args=eval_params)
        report.train_clean_train_clean_eval = acc

    # Initialize the Fast Gradient Sign Method (FGSM) attack object and graph
    wrap = KerasModelWrapper(model)
    fgsm = FastGradientMethod(wrap, sess=sess)
    fgsm_params = {'eps': 0.3}
    adv_x = fgsm.generate(x, **fgsm_params)
    # Consider the attack to be constant
    adv_x = tf.stop_gradient(adv_x)
    preds_adv = model(adv_x)

    # Evaluate the accuracy of the MNIST model on adversarial examples
    eval_par = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
    print('Test accuracy on adversarial examples: %0.4f\n' % acc)
    report.clean_train_adv_eval = acc

    # Calculating train error
    if testing:
        eval_par = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds_adv, X_train,
                         Y_train, args=eval_par)
        report.train_clean_train_adv_eval = acc

    print("Repeating the process, using adversarial training")
    # Redefine TF model graph
    model_2 = cnn_model()
    preds_2 = model_2(x)
    wrap_2 = KerasModelWrapper(model_2)
    fgsm2 = FastGradientMethod(wrap_2, sess=sess)
    preds_2_adv = model_2(fgsm2.generate(x, **fgsm_params))

    def evaluate_2():
        # Accuracy of adversarially trained model on legitimate test inputs
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess, x, y, preds_2, X_test, Y_test,
                              args=eval_params)
        print('Test accuracy on legitimate examples: %0.4f' % accuracy)
        report.adv_train_clean_eval = accuracy

        # Accuracy of the adversarially trained model on adversarial examples
        accuracy = model_eval(sess, x, y, preds_2_adv, X_test,
                              Y_test, args=eval_params)
        print('Test accuracy on adversarial examples: %0.4f' % accuracy)
        report.adv_train_adv_eval = accuracy

    # Perform and evaluate adversarial training
    train(sess, x, y, preds_2, X_train, Y_train,
          predictions_adv=preds_2_adv, evaluate=evaluate_2,
          args=train_params, save=False)

    # Get a random slice of the data for linear extrapolation plots
    random_idx = np.random.randint(0, X_train.shape[0])
    X_slice = X_train[random_idx]
    Y_slice = Y_train[random_idx]

    # Plot the linear extrapolation plot for clean model
    log_prob_adv_array = get_logits_over_interval(
        sess, wrap, X_slice, fgsm_params)
    linear_extrapolation_plot(log_prob_adv_array, Y_slice,
                              'lep_clean.png')

    # Plot the linear extrapolation plot for adv model
    log_prob_adv_array = get_logits_over_interval(
        sess, wrap_2, X_slice, fgsm_params)
    linear_extrapolation_plot(log_prob_adv_array, Y_slice,
                              'lep_adv.png')

    # Calculate training errors
    if testing:
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess, x, y, preds_2, X_train, Y_train,
                              args=eval_params)
        report.train_adv_train_clean_eval = accuracy
        accuracy = model_eval(sess, x, y, preds_2_adv, X_train,
                              Y_train, args=eval_params)
        report.train_adv_train_adv_eval = accuracy

    return report
예제 #8
0
    def evaluate_2():
        # Accuracy of adversarially trained model on legitimate test inputs
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess, x, y, preds_2, X_test, Y_test,
                              args=eval_params)
        print('Test accuracy on legitimate examples: %0.4f' % accuracy)
        report.adv_train_clean_eval = accuracy

        # Accuracy of the adversarially trained model on adversarial examples
        accuracy = model_eval(sess, x, y, preds_2_adv, X_test,
                              Y_test, args=eval_params)
        print('Test accuracy on adversarial examples: %0.4f' % accuracy)
        report.adv_train_adv_eval = accuracy

    # Perform and evaluate adversarial training
    model_train(sess, x, y, preds_2, X_train, Y_train,
                predictions_adv=preds_2_adv, evaluate=evaluate_2,
                args=train_params, save=False, rng=rng)

    # Calculate training errors
    if testing:
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess, x, y, preds_2, X_train, Y_train,
                              args=eval_params)
        report.train_adv_train_clean_eval = accuracy
        accuracy = model_eval(sess, x, y, preds_2_adv, X_train,
                              Y_train, args=eval_params)
        report.train_adv_train_adv_eval = accuracy

    print(report)
예제 #9
0
    def helper_run_multi_gpu_madryetal(self, extra_flags=None):
        """
        Compare the single GPU performance to multiGPU performance.
        """
        # Run the trainers on a dataset of reduced size
        flags = {
            "train_start": 0,
            "train_end": 5000,
            "test_start": 0,
            "test_end": 333,
            "nb_epochs": 5,
            "testing": True,
        }

        # Run the multi-gpu trainer for adversarial training
        flags.update(
            {
                "batch_size": 128,
                "adam_lrn": 0.001,
                "dataset": "mnist",
                "only_adv_train": False,
                "eval_iters": 1,
                "fast_tests": True,
                "save_dir": None,
                "save_steps": 10000,
                "attack_nb_iter_train": 10,
                "sync_step": None,
                "adv_train": True,
                "save": False,
                "model_type": "basic",
                "attack_type_test": "MadryEtAl_y",
            }
        )
        if extra_flags is not None:
            flags.update(extra_flags)

        # Run the multi-gpu trainer for adversarial training using 2 gpus
        # trainer_multigpu by default sets `allow_soft_placement=True`
        flags.update(
            {"ngpu": 2, "attack_type_train": "MadryEtAl_y_multigpu", "sync_step": 1}
        )
        HParams = namedtuple("HParams", flags.keys())

        hparams = HParams(**flags)
        np.random.seed(42)
        tf.set_random_seed(42)
        with tf.variable_scope(None, "runner"):
            report_dict = run_trainer(hparams)
        report_m = AccuracyReport()
        report_m.train_adv_train_clean_eval = report_dict["train"]
        report_m.adv_train_clean_eval = report_dict["test"]
        report_m.adv_train_adv_eval = report_dict["MadryEtAl_y"]

        flags.update({"ngpu": 1, "attack_type_train": "MadryEtAl_y"})
        hparams = HParams(**flags)
        np.random.seed(42)
        tf.set_random_seed(42)
        with tf.variable_scope(None, "runner"):
            report_dict = run_trainer(hparams)
        report_s = AccuracyReport()
        report_s.train_adv_train_clean_eval = report_dict["train"]
        report_s.adv_train_clean_eval = report_dict["test"]
        report_s.adv_train_adv_eval = report_dict["MadryEtAl_y"]

        self.assertClose(
            report_s.train_adv_train_clean_eval,
            report_m.train_adv_train_clean_eval,
            atol=5e-2,
        )
        self.assertClose(
            report_s.adv_train_clean_eval, report_m.adv_train_clean_eval, atol=2e-2
        )
        self.assertClose(
            report_s.adv_train_adv_eval, report_m.adv_train_adv_eval, atol=5e-2
        )
예제 #10
0
def mnist_tutorial(train_start=0, train_end=60000, test_start=0,
                   test_end=10000, nb_epochs=100, batch_size=128,
                   learning_rate=0.001, train_dir="/tmp",
                   filename="mnist.ckpt", load_model=False,
                   testing=True, w=2, rel=0):
    """
    MNIST CleverHans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param train_dir: Directory storing the saved model
    :param filename: Filename to save model under
    :param load_model: True for load, False for not load
    :param testing: if true, test error is calculated
    :param w : number of perceptive neurons
    :return: an AccuracyReport object
    """
    
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()
    
    # Set TF random seed to improve reproducibility
    #tf.set_random_seed(1234)
    
    if not hasattr(backend, "tf"):
        raise RuntimeError("This tutorial requires keras to be configured"
                           " to use the TensorFlow backend.")
    
    if keras.backend.image_dim_ordering() != 'tf':
        keras.backend.set_image_dim_ordering('tf')
        print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to "
              "'th', temporarily setting to 'tf'")
    
    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)
    keras.layers.core.K.set_learning_phase(1)
    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)
    
    # Use label smoothing
    assert Y_train.shape[1] == 10
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)
    
    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))
    
    # Define TF model graph
    models = spartan_network(sess=sess,w=w,train=True,create_surrogate=True)
    model_to_attack = models[0]
    spartan_model = models[1]
    ckpt = tf.train.get_checkpoint_state(train_dir)
    print(ckpt)
    trainvalue=True if ckpt is None else False
    #model_to_attack = unprotected_network(sess=sess, train=trainvalue, save=True)
    preds = model_to_attack(x)
    print("Defined TensorFlow model graph.")
    
    def evaluate():
        # Evaluate the accuracy of the MNIST model on legitimate test examples
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params)
        report.clean_train_clean_eval = acc
        assert X_test.shape[0] == test_end - test_start, X_test.shape
        print('Test accuracy on legitimate examples: %0.4f' % acc)
    
    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': train_dir,
        'filename': filename
    }
    rng = None
    ckpt = tf.train.get_checkpoint_state(train_dir)
    ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path
    
    if load_model and ckpt_path:
        saver = tf.train.Saver()
        saver.restore(sess, ckpt_path)
        print("Model loaded from: {}".format(ckpt_path))
        evaluate()
    else:
        print("Model was not loaded, training from scratch.")
        keras.layers.core.K.set_learning_phase(1)
        # model_train(sess, x, y, preds, X_train, Y_train, evaluate=evaluate,
          #          args=train_params, save=True, rng=rng)
    keras.layers.core.K.set_learning_phase(0)
    
    # Calculate training error
    if testing:
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params)
        print("With no Dropout : %s" % acc)
        report.train_clean_train_clean_eval = acc
    
    # Initialize the Fast Gradient Sign Method (FGSM) attack object and graph
    wrap = KerasModelWrapper(model_to_attack)
    global mdl
    mdl = model_to_attack
    fgsm = FastGradientMethod(wrap, sess=sess)
    for epstep in range(40):
        fgsm_params = {'eps': 0.01+0.02*epstep,
                   'clip_min': 0.,
                   'clip_max': 1.}
    # cw_params = {'confidence': 0.5,
    #              'batch_size': 4,
    #              'learning_rate': 2e-2,
    #              'max_iterations': 400,
    #              'clip_min': 0.,
    #              'clip_max': 1.}
        adv_x = fgsm.generate(x, **fgsm_params)
    # Consider the attack to be constant
        adv_x = tf.stop_gradient(adv_x)
    # cwattack = atk.CarliniWagnerL2(model_to_attack,sess=sess)
    # adv_x = cwattack.generate(x, **cw_params)
    # adv_x = tf.stop_gradient(adv_x)
    # adv_x_np = cwattack.generate_np(X_test[500:704], **cw_params)
    # from matplotlib import pyplot as plt
    # plt.rc('figure', figsize=(12.0, 12.0))
    # for j in range(40):
    #
    #     plt.imshow(adv_x_np[j].reshape((28, 28)),
    #                cmap="gray")
    #     plt.pause(0.15)
    # return
    
        preds_adv = spartan_model(adv_x)
    
    
    # Evaluate the accuracy of the MNIST model on adversarial examples
        eval_par = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Test accuracy on epsilon-%0.4f-adversarial examples: %0.4f\n' % (fgsm_params["eps"],acc))
    return
    report.clean_train_adv_eval = acc
    
    # Calculating train error
    if testing:
        eval_par = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds_adv, X_train,
                         Y_train, args=eval_par)
        report.train_clean_train_adv_eval = acc
    
    print("Repeating the process, using adversarial training")
    # Redefine TF model graph
    model_2 = cnn_model(w=w, rel=rel)
    preds_2 = model_2(x)
    wrap_2 = KerasModelWrapper(model_2)
    fgsm2 = FastGradientMethod(wrap_2, sess=sess)
    preds_2_adv = model_2(fgsm2.generate(x, **fgsm_params))
    
    def evaluate_2():
        # Accuracy of adversarially trained model on legitimate test inputs
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess, x, y, preds_2, X_test, Y_test,
                              args=eval_params)
        print('Test accuracy on legitimate examples: %0.4f' % accuracy)
        report.adv_train_clean_eval = accuracy
        
        # Accuracy of the adversarially trained model on adversarial examples
        accuracy = model_eval(sess, x, y, preds_2_adv, X_test,
                              Y_test, args=eval_params)
        print('Test accuracy on adversarial examples: %0.4f' % accuracy)
        report.adv_train_adv_eval = accuracy
    
    # Perform and evaluate adversarial training
    model_train(sess, x, y, preds_2, X_train, Y_train,
                predictions_adv=preds_2_adv, evaluate=evaluate_2,
                args=train_params, save=False, rng=rng)
    
    # Calculate training errors
    if testing:
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess, x, y, preds_2, X_train, Y_train,
                              args=eval_params)
        report.train_adv_train_clean_eval = accuracy
        accuracy = model_eval(sess, x, y, preds_2_adv, X_train,
                              Y_train, args=eval_params)
        report.train_adv_train_adv_eval = accuracy
예제 #11
0
def mnist_tutorial(train_start=0, train_end=60000, test_start=0,
                   test_end=10000, nb_epochs=6, batch_size=128,
                   learning_rate=0.001,
                   clean_train=True,
                   testing=False,
                   backprop_through_attack=False,
                   nb_filters=64):
    """
    MNIST cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param testing: if true, complete an AccuracyReport for unit tests
                    to verify that performance is adequate
    :param backprop_through_attack: If True, backprop through adversarial
                                    example construction process during
                                    adversarial training.
    :param clean_train: if true, train on clean examples
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    sess = tf.Session()

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Use label smoothing
    assert Y_train.shape[1] == 10
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    model_path = "models/mnist"
    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    fgsm_params = {'eps': 0.3,
                   'clip_min': 0.,
                   'clip_max': 1.}
    rng = np.random.RandomState([2017, 8, 30])

    if clean_train:
        model = make_basic_cnn(nb_filters=nb_filters)
        preds = model.get_probs(x)

        def evaluate():
            # Evaluate the accuracy of the MNIST model on legitimate test
            # examples
            eval_params = {'batch_size': batch_size}
            acc = model_eval(
                sess, x, y, preds, X_test, Y_test, args=eval_params)
            report.clean_train_clean_eval = acc
            assert X_test.shape[0] == test_end - test_start, X_test.shape
            print('Test accuracy on legitimate examples: %0.4f' % acc)
        model_train(sess, x, y, preds, X_train, Y_train, evaluate=evaluate,
                    args=train_params, rng=rng)

        # Calculate training error
        if testing:
            eval_params = {'batch_size': batch_size}
            acc = model_eval(
                sess, x, y, preds, X_train, Y_train, args=eval_params)
            report.train_clean_train_clean_eval = acc

        # Initialize the Fast Gradient Sign Method (FGSM) attack object and
        # graph
        fgsm = FastGradientMethod(model, sess=sess)
        adv_x = fgsm.generate(x, **fgsm_params)
        print(adv_x)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on adversarial examples
        eval_par = {'batch_size': batch_size}
        # Define accuracy symbolically
        if LooseVersion(tf.__version__) >= LooseVersion('1.0.0'):
            correct_preds = tf.not_equal(tf.argmax(y, axis=-1),
                                     tf.argmax(preds_adv, axis=-1))
        else:
            correct_preds = tf.not_equal(tf.argmax(y, axis=tf.rank(y) - 1),
                                     tf.argmax(preds_adv,
                                               axis=tf.rank(preds_adv) - 1))
        # print("the shape of correct_preds is ", correct_preds.get_shape())
        # correct_preds is a boolean Tensor with shape (size,)
        success_adv_x = tf.boolean_mask(adv_x, correct_preds)
        success_clean_x = tf.boolean_mask(x, correct_preds)
        success_clean_y = tf.boolean_mask(y, correct_preds)
        fgsm_adv_x, fgsm_clean_x, fgsm_clean_y = sess.run([success_adv_x, success_clean_x, success_clean_y], feed_dict={x:X_test,y:Y_test})
        np.savez('adversarial_fgsm',adv_examples=fgsm_adv_x, adv_clean_labels=fgsm_clean_y, adv_clean_examples=fgsm_clean_x)
        print("the shape of adversarial examples we save is ", np.shape(fgsm_adv_x))
        print("the shape of clean targets we save is ", np.shape(fgsm_clean_y))

        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Test accuracy on adversarial examples fgsm: %0.4f\n' % acc)
        report.clean_train_adv_eval = acc
        adv_x_test_for_save = sess.run(adv_x, {x: X_test})
        np.savez("adv_test_fgsm_data.npz", adv_examples=adv_x_test_for_save, adv_clean_labels=Y_test, adv_clean_examples=X_test)
        # Calculate training error
        if testing:
            eval_par = {'batch_size': batch_size}
            acc = model_eval(sess, x, y, preds_adv, X_train,
                             Y_train, args=eval_par)
            report.train_clean_train_adv_eval = acc

        print("Repeating the process, using adversarial training")
    # Redefine TF model graph
    model_2 = make_basic_cnn(nb_filters=nb_filters)
    preds_2 = model_2(x)
    fgsm2 = FastGradientMethod(model_2, sess=sess)
    adv_x_2 = fgsm2.generate(x, **fgsm_params)
    if not backprop_through_attack:
        # For the fgsm attack used in this tutorial, the attack has zero
        # gradient so enabling this flag does not change the gradient.
        # For some other attacks, enabling this flag increases the cost of
        # training, but gives the defender the ability to anticipate how
        # the atacker will change their strategy in response to updates to
        # the defender's parameters.
        adv_x_2 = tf.stop_gradient(adv_x_2)
    preds_2_adv = model_2(adv_x_2)
    def evaluate_2():
        # Accuracy of adversarially trained model on legitimate test inputs
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess, x, y, preds_2, X_test, Y_test,
                              args=eval_params)
        print('Test accuracy on legitimate examples: %0.4f' % accuracy)
        report.adv_train_clean_eval = accuracy

        # Accuracy of the adversarially trained model on adversarial examples
        accuracy = model_eval(sess, x, y, preds_2_adv, X_test,
                              Y_test, args=eval_params)
        print('Test accuracy on adversarial examples: %0.4f' % accuracy)
        report.adv_train_adv_eval = accuracy

    # Perform and evaluate adversarial training
    print("pred_adv", preds_2_adv.get_shape())
    model_train(sess, x, y, preds_2, X_train, Y_train,
                predictions_adv=preds_2_adv, evaluate=evaluate_2,
                args=train_params, rng=rng)

    # Calculate training errors
    if testing:
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess, x, y, preds_2, X_train, Y_train,
                              args=eval_params)
        report.train_adv_train_clean_eval = accuracy
        accuracy = model_eval(sess, x, y, preds_2_adv, X_train,
                              Y_train, args=eval_params)
        report.train_adv_train_adv_eval = accuracy

    # Initialize the Fast Gradient Sign Method (FGSM) attack object and
    # graph


    ###########################################################################
    # Craft adversarial examples using the Jacobian-based saliency map approach
    ###########################################################################
    source_samples = 10000
    nb_classes = 10
    print('Crafting ' + str(source_samples) + ' * ' + str(nb_classes-1) +
          ' adversarial examples')

    # Keep track of success (adversarial example classified in target)
    results = np.zeros((nb_classes, source_samples), dtype='i')

    # Rate of perturbed features for each test set example and target class
    perturbations = np.zeros((nb_classes, source_samples), dtype='f')

    # Instantiate a SaliencyMapMethod attack object
    jsma = SaliencyMapMethod(model_2, back='tf', sess=sess)
    jsma_params = {'theta': 1., 'gamma': 0.1,
                   'clip_min': 0., 'clip_max': 1.,
                   'y_target': None}

    figure = None
    
    # create an array for storing adv examples
    adv_examples = np.empty([1,28,28,1])
    # for target labels
    adv_targets = np.empty([1,10])
    # corresponding clean/correct label
    adv_clean_labels = np.empty([1,10])
    # correspongding clean data
    adv_clean_examples = np.empty([1,28,28,1])
        
    # Loop over the samples we want to perturb into adversarial examples
    for sample_ind in xrange(0, source_samples):
        print('--------------------------------------')
        print('Attacking input %i/%i' % (sample_ind + 1, source_samples))
        sample = X_test[sample_ind:(sample_ind+1)] # generate from testing data

        # We want to find an adversarial example for each possible target class
        # (i.e. all classes that differ from the label given in the dataset)
        current_class = int(np.argmax(Y_test[sample_ind])) # generate from testing data
        target_classes = other_classes(nb_classes, current_class)

        # For the grid visualization, keep original images along the diagonal
        # grid_viz_data[current_class, current_class, :, :, :] = np.reshape(
        #     sample, (img_rows, img_cols, channels))
        
        # Loop over all target classes
        for target in target_classes:
            print('Generating adv. example for target class %i' % target)

            # This call runs the Jacobian-based saliency map approach
            one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
            #create fake target
            one_hot_target[0, target] = 1
            jsma_params['y_target'] = one_hot_target
            adv_x = jsma.generate_np(sample, **jsma_params)
            # print('adv_x\'shape is ', np.shape(adv_x)) # (1,28,28,1)
            
            # Check if success was achieved
            res = int(model_argmax(sess, x, preds, adv_x) == target)
            # if succeeds
            if res == 1:
                # append new adv_x to adv_examples array
                # append sample here, so that the number of times sample is appended mmatches number of adv_ex.
                adv_examples = np.append(adv_examples, adv_x, axis=0)
                adv_targets = np.append(adv_targets, one_hot_target, axis=0)
                adv_clean_labels = np.append(adv_clean_labels, np.expand_dims(Y_test[sample_ind],axis=0), axis=0) # generate from testing data
                adv_clean_examples = np.append(adv_clean_examples, sample, axis=0)

            adv_x_reshape = adv_x.reshape(-1)
            test_in_reshape = X_test[sample_ind].reshape(-1)
            nb_changed = np.where(adv_x_reshape != test_in_reshape)[0].shape[0]
            percent_perturb = float(nb_changed) / adv_x.reshape(-1).shape[0]

            # Update the arrays for later analysis
            results[target, sample_ind] = res
            perturbations[target, sample_ind] = percent_perturb
    print('--------------------------------------')
    adv_examples = adv_examples[1:,:,:,:]
    adv_targets = adv_targets[1:,:]
    adv_clean_labels = adv_clean_labels[1:,:]
    adv_clean_examples = adv_clean_examples[1:,:,:,:]
    np.savez('adversarial_jsma_actual_full',adv_examples=adv_examples, adv_targets=adv_targets, adv_clean_labels=adv_clean_labels,adv_clean_examples=adv_clean_examples)
    print(np.shape(adv_targets)[0], "adversarial examples have been saved.")
    
    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    nb_targets_tried = ((nb_classes - 1) * source_samples)
    succ_rate = float(np.sum(results)) / nb_targets_tried
    print('Avg. rate of successful adv. examples {0:.4f}'.format(succ_rate))
    report.clean_test_adv_eval = 1. - succ_rate

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(perturbations)
    print('Avg. rate of perturbed features {0:.4f}'.format(percent_perturbed))

    # Compute the average distortion introduced for successful samples only
    percent_perturb_succ = np.mean(perturbations * (results == 1))
    print('Avg. rate of perturbed features for successful '
          'adversarial examples {0:.4f}'.format(percent_perturb_succ))


    return report
예제 #12
0
def minist_fgsm_saliency(
    train_start=0,
    train_end=10,
    test_start=0,
    test_end=5,
    nb_epochs=2,
    batch_size=128,
    learning_rate=0.001,
    clean_train=True,
    testing=False,
    backprop_through_attack=False,
    nb_filters=64,
    nb_classes=10,
    source_samples=10,
):
    """
    MNIST cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param testing: if true, complete an AccuracyReport for unit tests
                    to verify that performance is adequate
    :param backprop_through_attack: If True, backprop through adversarial
                                    example construction process during
                                    adversarial training.
    :param clean_train: if true, train on clean examples
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    sess = tf.Session()

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Use label smoothing
    assert Y_train.shape[1] == 10
    label_smooth = .1
    # this way, all the 9 zeroes -> 0.1/9 because
    # the one-bit becomes 0.9
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # placeholder for y_target --> for saliency tensor
    y_target = tf.placeholder(tf.float32, shape=(None, 10))

    model_path = "models/mnist"
    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
    rng = np.random.RandomState([2017, 8, 30])

    ###########################################################################
    # Training the CNN model using TensorFlow: model --> base model
    ###########################################################################
    model = make_basic_cnn(nb_filters=nb_filters)
    preds = model.get_probs(x)

    if clean_train:
        # omg -> creates a cnn model
        # model = make_basic_cnn(nb_filters=nb_filters)
        # preds = model.get_probs(x)
        def evaluate():
            # Evaluate the accuracy of the MNIST model on legitimate test
            # examples
            eval_params = {'batch_size': batch_size}
            acc = model_eval(sess,
                             x,
                             y,
                             preds,
                             X_test,
                             Y_test,
                             args=eval_params)
            report.clean_train_clean_eval = acc
            assert X_test.shape[0] == test_end - test_start, X_test.shape
            print('Test accuracy on legitimate examples: %0.4f' % acc)

        ###########################################################################
        # MODEL Train!!!!!!!!!!!!
        ###########################################################################
        # training the basic model, using train_params
        model_train(sess,
                    x,
                    y,
                    preds,
                    X_train,
                    Y_train,
                    evaluate=evaluate,
                    args=train_params,
                    rng=rng)

        # Calculate training error
        if testing:
            eval_params = {'batch_size': batch_size}
            acc = model_eval(sess,
                             x,
                             y,
                             preds,
                             X_train,
                             Y_train,
                             args=eval_params)
            report.train_clean_train_clean_eval = acc

        ###########################################################################
        # Generate FGSM Adversarial based on model, and
        # Compute Base Model Accuracy
        ###########################################################################

        # Initialize the Fast Gradient Sign Method (FGSM) attack object and
        # graph
        fgsm = FastGradientMethod(model, sess=sess)

        # todo: follow the paper and run Cleverhans Output?
        fgsm_params_y = {'eps': 0.3, 'y': y, 'clip_min': 0., 'clip_max': 1.}

        #adv_x = fgsm.generate(x, **fgsm_params)
        adv_x = fgsm.generate(x, **fgsm_params_y)
        preds_adv = model.get_probs(adv_x)
        # Evaluate the accuracy of the MNIST model on adversarial examples
        eval_par = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Test accuracy on FGSM adversarial examples: %0.4f\n' % acc)
        report.clean_train_adv_eval = acc

        # Calculate training error
        if testing:
            eval_par = {'batch_size': batch_size}
            acc = model_eval(sess,
                             x,
                             y,
                             preds_adv,
                             X_train,
                             Y_train,
                             args=eval_par)
            report.train_clean_train_adv_eval = acc

        ###########################################################################
        # Generate Saliency Map Adversarial Example and
        # Compute base model accuracy (only 10)
        ###########################################################################
        print("Saliency Map Attack On The Base Model")
        print('Crafting ' + str(source_samples) + ' * ' + str(nb_classes - 1) +
              ' adversarial examples')

        # Instantiate a SaliencyMapMethod attack object --> modify y_target for each test_data again
        jsma = SaliencyMapMethod(model, back='tf', sess=sess)
        jsma_params = {
            'theta': 1.,
            'gamma': 0.1,
            'clip_min': 0.,
            'clip_max': 1.,
            'y_target': None
        }

        # Keep track of success (adversarial example classified in target)
        # Need this info to compute the success rate
        results = np.zeros((nb_classes, source_samples), dtype='i')

        # each sample will get 9 adversarial samples

        # adv_x_set: place_holder for all the x variations
        # correct_y_set: correct_y_output used for training

        adv_x_set = None
        adv_y_target = None

        # we need multi x_train_saliency / y_train_saliency
        #
        x_train_saliency = None
        y_train_saliency = None

        for sample_ind in xrange(0, source_samples):
            print('--------------------------------------')
            print('Saliency Attacking input %i/%i' %
                  (sample_ind + 1, source_samples))
            sample = X_train[sample_ind:(sample_ind + 1)]
            y_sample = Y_train[sample_ind:(sample_ind + 1)]

            current_class = int(np.argmax(Y_train[sample_ind]))
            target_classes = other_classes(nb_classes, current_class)

            # Loop over all target classes
            for target in target_classes:
                print('Generating adv. example for target class %i' % target)

                # Create x_train_saliency, corresponding to y_train_saliency
                if x_train_saliency is not None:
                    x_train_saliency = np.concatenate(
                        (x_train_saliency, sample), axis=0)
                    y_train_saliency = np.concatenate(
                        (y_train_saliency, y_sample), axis=0)
                else:
                    x_train_saliency = sample
                    y_train_saliency = y_sample
                    print("sample shape: ", x_train_saliency.shape)
                    print("y_sample shape: ", y_train_saliency.shape)

                # This call runs the Jacobian-based saliency map approach
                one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
                one_hot_target[0, target] = 1
                jsma_params['y_target'] = one_hot_target

                adv_x_np = jsma.generate_np(sample, **jsma_params)

                # Add to adv_x_set, correct_y_set
                if adv_x_set is not None:
                    adv_y_target = np.concatenate(
                        (adv_y_target, one_hot_target), axis=0)
                    adv_x_set = np.concatenate((adv_x_np, adv_x_set), axis=0)
                else:
                    adv_y_target = one_hot_target
                    adv_x_set = adv_x_np
                    print("adv_y_target shape(one-hot-encoding): ",
                          adv_y_target.shape)
                    print("adv_x_set(np) shape: ", adv_x_np.shape)

                # Check if success was achieved
                res = int(model_argmax(sess, x, preds, adv_x_np) == target)

                # Update the arrays for later analysis
                results[target, sample_ind] = res

        print('--------------------------------------')
        # Compute the number of adversarial examples that were successfully found
        nb_targets_tried = ((nb_classes - 1) * source_samples)
        succ_rate = float(np.sum(results)) / nb_targets_tried
        print('Avg. rate of successful Saliency adv. examples {0:.4f}'.format(
            succ_rate))
        report.clean_train_adv_eval = 1. - succ_rate

        # here we have successfully stacked up x_adversarial_set, y_correct_set
        # these can be used to provide training to our model now
        print("\n\n\n*****************************")
        print("Checking x_adv_set shape: ", adv_x_set.shape)
        print("Checking correct_y_set shape: ", adv_y_target.shape)

        print("x_training_saliency shape:", x_train_saliency.shape)
        print("y_training_saliency shape:", y_train_saliency.shape)

        # now construct model 3, define output -> input relationship tensor
        model_3 = make_basic_cnn(nb_filters=nb_filters)
        # define the x, the placeholder input - > preds_3 output
        preds_3 = model_3(x)

        # jsma3 = SaliencyMapMethod(model_3, sess=sess)
        #
        # jsma_params = {'theta': 1., 'gamma': 0.1,
        #                'clip_min': 0., 'clip_max': 1.,
        #                'y_target': y_target}
        #
        # # create adv_saliency set tensor, using x_train data and jsma_params containing adv_y_target
        # adv_jsma = jsma3.generate(x, jsma_params)
        # # create adv preds tensor
        # preds_jsma_adv = model_3(adv_jsma)

        # define saliency training model accuracy
        def evaluate_saliency():
            # Accuracy of adversarially trained model on legitimate test inputs
            eval_params = {'batch_size': batch_size}
            accuracy = model_eval(sess,
                                  x,
                                  y,
                                  preds_3,
                                  x_train_saliency,
                                  y_train_saliency,
                                  args=eval_params)
            print('Test accuracy on legitimate examples: %0.4f' % accuracy)
            report.adv_train_clean_eval = accuracy

        ###########################################################################
        # MODEL Train for Saliency Map
        ###########################################################################
        # Perform and evaluate adversarial training with FSGM MODEL!!!
        # Train the model with samples of normal and adversarial examples!
        model_train(sess,
                    x,
                    y,
                    model_3,
                    x_train_saliency,
                    y_train_saliency,
                    evaluate=evaluate_saliency(),
                    args=train_params,
                    rng=rng)

        #todo: use jsma to create adversarial testing??? or training???

    # Redefine TF model FGSM!!!
    model_2 = make_basic_cnn(nb_filters=nb_filters)
    preds_2 = model_2(x)
    fgsm2 = FastGradientMethod(model_2, sess=sess)

    # parameter for FGSM
    fgsm_params_y = {'eps': 0.3, 'y': y, 'clip_min': 0., 'clip_max': 1.}
    adv_x_2 = fgsm2.generate(x, **fgsm_params_y)
    if not backprop_through_attack:
        # For the fgsm attack used in this tutorial, the attack has zero
        # gradient so enabling this flag does not change the gradient.
        # For some other attacks, enabling this flag increases the cost of
        # training, but gives the defender the ability to anticipate how
        # the atacker will change their strategy in response to updates to
        # the defender's parameters.
        adv_x_2 = tf.stop_gradient(adv_x_2)
    preds_2_adv = model_2(adv_x_2)

    def evaluate_2():
        # Accuracy of adversarially trained model on legitimate test inputs
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2,
                              X_test,
                              Y_test,
                              args=eval_params)
        print('Test accuracy on legitimate examples: %0.4f' % accuracy)
        report.adv_train_clean_eval = accuracy

        # Accuracy of the adversarially trained model on adversarial examples
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2_adv,
                              X_test,
                              Y_test,
                              args=eval_params)
        print('Test accuracy on adversarial examples: %0.4f' % accuracy)
        report.adv_train_adv_eval = accuracy

    ###########################################################################
    # MODEL Train for FGSM
    ###########################################################################
    # Perform and evaluate adversarial training with FSGM MODEL!!!
    model_train(sess,
                x,
                y,
                preds_2,
                X_train,
                Y_train,
                predictions_adv=preds_2_adv,
                evaluate=evaluate_2,
                args=train_params,
                rng=rng)

    # Calculate training errors
    if testing:
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2,
                              X_train,
                              Y_train,
                              args=eval_params)
        report.train_adv_train_clean_eval = accuracy
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2_adv,
                              X_train,
                              Y_train,
                              args=eval_params)
        report.train_adv_train_adv_eval = accuracy

    return report
def mnist_tutorial(train_start=0, train_end=60000, test_start=0,
                   test_end=10000, nb_epochs=6, batch_size=128,
                   learning_rate=0.001, train_dir="train_dir",
                   filename="mnist.ckpt", load_model=False,
                   testing=False, label_smoothing=True):
    """
    MNIST CleverHans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param train_dir: Directory storing the saved model
    :param filename: Filename to save model under
    :param load_model: True for load, False for not load
    :param testing: if true, test error is calculated
    :return: an AccuracyReport object
    """
    keras.layers.core.K.set_learning_phase(0)

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    if not hasattr(backend, "tf"):
        raise RuntimeError("This tutorial requires keras to be configured"
                           " to use the TensorFlow backend.")

    if keras.backend.image_dim_ordering() != 'tf':
        keras.backend.set_image_dim_ordering('tf')
        print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to "
              "'th', temporarily setting to 'tf'")

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)

    # Get MNIST test data
    x_train, y_train, x_test, y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    if label_smoothing:
        label_smooth = .1
        y_train = y_train.clip(label_smooth / (nb_classes-1),
                               1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                          nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Define TF model graph
    model = cnn_model(img_rows=img_rows, img_cols=img_cols,
                      channels=nchannels, nb_filters=64,
                      nb_classes=nb_classes)
    preds = model(x)
    print("Defined TensorFlow model graph.")

    def evaluate():
        # Evaluate the accuracy of the MNIST model on legitimate test examples
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
        report.clean_train_clean_eval = acc
#        assert X_test.shape[0] == test_end - test_start, X_test.shape
        print('Test accuracy on legitimate examples: %0.4f' % acc)

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': train_dir,
        'filename': filename
    }

    rng = np.random.RandomState([2017, 8, 30])
    if not os.path.exists(train_dir):
        os.mkdir(train_dir)

    ckpt = tf.train.get_checkpoint_state(train_dir)
    print(train_dir, ckpt)
    ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path
    wrap = KerasModelWrapper(model)

    if load_model and ckpt_path:
        saver = tf.train.Saver()
        print(ckpt_path)
        saver.restore(sess, ckpt_path)
        print("Model loaded from: {}".format(ckpt_path))
        evaluate()
    else:
        print("Model was not loaded, training from scratch.")
        loss = LossCrossEntropy(wrap, smoothing=0.1)
        train(sess, loss, x, y, x_train, y_train, evaluate=evaluate,
              args=train_params, save=True, rng=rng)

    # Calculate training error
    if testing:
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds, x_train, y_train, args=eval_params)
        report.train_clean_train_clean_eval = acc

    # Initialize the Fast Gradient Sign Method (FGSM) attack object and graph
    fgsm = FastGradientMethod(wrap, sess=sess)
    fgsm_params = {'eps': 0.3,
                   'clip_min': 0.,
                   'clip_max': 1.}
    adv_x = fgsm.generate(x, **fgsm_params)
    # Consider the attack to be constant
    adv_x = tf.stop_gradient(adv_x)
    preds_adv = model(adv_x)

    # Evaluate the accuracy of the MNIST model on adversarial examples
    eval_par = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds_adv, x_test, y_test, args=eval_par)
    print('Test accuracy on adversarial examples: %0.4f\n' % acc)
    report.clean_train_adv_eval = acc

    # Calculating train error
    if testing:
        eval_par = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds_adv, x_train,
                         y_train, args=eval_par)
        report.train_clean_train_adv_eval = acc

    print("Repeating the process, using adversarial training")
    # Redefine TF model graph
    model_2 = cnn_model(img_rows=img_rows, img_cols=img_cols,
                        channels=nchannels, nb_filters=64,
                        nb_classes=nb_classes)
    wrap_2 = KerasModelWrapper(model_2)
    preds_2 = model_2(x)
    fgsm2 = FastGradientMethod(wrap_2, sess=sess)

    def attack(x):
        return fgsm2.generate(x, **fgsm_params)

    preds_2_adv = model_2(attack(x))
    loss_2 = LossCrossEntropy(wrap_2, smoothing=0.1, attack=attack)

    def evaluate_2():
        # Accuracy of adversarially trained model on legitimate test inputs
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess, x, y, preds_2, x_test, y_test,
                              args=eval_params)
        print('Test accuracy on legitimate examples: %0.4f' % accuracy)
        report.adv_train_clean_eval = accuracy

        # Accuracy of the adversarially trained model on adversarial examples
        accuracy = model_eval(sess, x, y, preds_2_adv, x_test,
                              y_test, args=eval_params)
        print('Test accuracy on adversarial examples: %0.4f' % accuracy)
        report.adv_train_adv_eval = accuracy

    # Perform and evaluate adversarial training
    train(sess, loss_2, x, y, x_train, y_train, evaluate=evaluate_2,
          args=train_params, save=False, rng=rng)

    # Calculate training errors
    if testing:
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess, x, y, preds_2, x_train, y_train,
                              args=eval_params)
        report.train_adv_train_clean_eval = accuracy
        accuracy = model_eval(sess, x, y, preds_2_adv, x_train,
                              y_train, args=eval_params)
        report.train_adv_train_adv_eval = accuracy

    return report
def train(train_start=0,
          train_end=60000,
          test_start=0,
          test_end=10000,
          nb_epochs=6,
          batch_size=128,
          learning_rate=0.001,
          clean_train=True,
          testing=False,
          use_rec_err=True,
          backprop_through_attack=False,
          nb_filters=64,
          num_threads=None,
          model_arch=None,
          use_cross_error=True,
          attack_name=None,
          dataset_name='mnist',
          blocking_option=None,
          opt_type='adam',
          rec_error_weight=None):
    """
    Train model
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param testing: if true, complete an AccuracyReport for unit tests
                    to verify that performance is adequate
    :param backprop_through_attack: If True, backprop through adversarial
                                    example construction process during
                                    adversarial training.
    :param clean_train: if true, train on clean examples
    :return: an AccuracyReport object
    """

    attacks = {
        'fgsm': FastGradientMethod,
        'pgd': MadryEtAl,
        'pgd_restart': MadryEtAl_WithRestarts,
        'cw': CarliniWagnerL2
    }

    attack_params = {
        'mnist': {
            'fgsm': {
                'eps': 0.3,
                'ord': np.inf,
                'clip_min': 0.,
                'clip_max': 1.
            },
            'pgd': {
                'eps': 0.3,
                'eps_iter': 0.01,
                'nb_iter': 40
            },
            'pgd_restart': {
                'eps': 0.3,
                'eps_iter': 0.01,
                'nb_iter': 40,
                'nb_restarts': 1
            },
            'cw': {
                'batch_size': batch_size
            },
        },
        'cifar': {
            'fgsm': {
                'eps': 0.1 + 0.0 * 8.0 / 255,
                'ord': np.inf,
                'clip_min': 0.,
                'clip_max': 1.
            },
            'pgd': {
                'eps': 0.1 + 0.0 * 8.0 / 255,
                'eps_iter': 0.01,
                'nb_iter': 40
            },
            'pgd_restart': {
                'eps': 8.0 / 255,
                'eps_iter': 0.01,
                'nb_iter': 40,
                'nb_restarts': 1
            },
            'cw': {
                'batch_size': batch_size
            },
        },
        'fashion_mnist': {
            'fgsm': {
                'eps': 0.1,
                'ord': np.inf,
                'clip_min': 0.,
                'clip_max': 1.
            },
            'pgd': {
                'eps': 0.3,
                'eps_iter': 0.01,
                'nb_iter': 40
            },
            'pgd_restart': {
                'eps': 0.3,
                'eps_iter': 0.01,
                'nb_iter': 40,
                'nb_restarts': 1
            },
            'cw': {
                'batch_size': batch_size
            },
        },
    }[dataset_name]

    print("attack parameters:", attack_params[attack_name])

    def get_rec_err(pre_, post_):
        if not use_rec_err:
            return None
        return compute_rec_err(pre_, post_, blocking_option)

    def weight_rec_err(err, wgt):
        if err is None:
            return err
        return err * wgt

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get datasets
    datasets = {
        'mnist': data_mnist,
        'cifar': data_cifar10,
        'fashion_mnist': fashion_mnist,
    }
    X_train, Y_train, X_test, Y_test = datasets[dataset_name](
        train_start=train_start,
        train_end=train_end,
        test_start=test_start,
        test_end=test_end)

    # Use label smoothing
    assert Y_train.shape[1] == 10
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    input_shape = {
        'mnist': (None, 28, 28, 1),
        'cifar': (None, 32, 32, 3),
        'fashion_mnist': (None, 28, 28, 1)
    }[dataset_name]
    x = tf.placeholder(tf.float32, shape=input_shape)
    #tf.summary.image('input', x)
    y = tf.placeholder(tf.float32, shape=(None, 10))
    if dataset_name == 'cifar':
        x = tf.map_fn(lambda frame: preprocess_image(frame, True), x)

    # Train a model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    model_params = {
        'nb_filters': nb_filters,
        'model_arch': model_arch,
        'blocking_option': blocking_option,
        'input_shape': input_shape
    }
    rng = np.random.RandomState([2017, 8, 30])

    merged = tf.summary.merge_all()

    if clean_train:
        print('.. using clean training')
        model = make_basic_model(**model_params)
        preds = model.get_probs(x)
        pre_ae_states, post_ae_states = model.get_ae_states()
        rec_err = get_rec_err(pre_ae_states, post_ae_states)

        class Evaluate(object):
            def __init__(self):
                self.best_accuracy = 0.

            def __call__(self):
                # Evaluate the accuracy of the model on legitimate test
                # examples
                eval_params = {'batch_size': batch_size}
                acc, rec_loss = model_eval(sess,
                                           x,
                                           y,
                                           preds,
                                           X_test,
                                           Y_test,
                                           args=eval_params,
                                           aux_loss_lst=[rec_err])
                self.best_accuracy = max(self.best_accuracy, acc)
                report.clean_train_clean_eval = acc
                assert X_test.shape[0] == test_end - test_start, X_test.shape
                print('Test accuracy on legitimate examples:   %0.4f' % acc)
                print('Best accuracy so far:                   {:0.4f}'.format(
                    self.best_accuracy))
                print('reconstruction error on legit examples: %0.4f' %
                      rec_loss)

        evaluate = Evaluate()

        model_train(sess,
                    x,
                    y,
                    preds,
                    X_train,
                    Y_train,
                    evaluate=evaluate,
                    args=train_params,
                    rng=rng,
                    aux_loss=weight_rec_err(rec_err, rec_error_weight),
                    opt_type=opt_type,
                    summary=merged)

        # Calculate training error
        if testing:
            eval_params = {'batch_size': batch_size}
            acc, rec_loss = model_eval(sess,
                                       x,
                                       y,
                                       preds,
                                       X_train,
                                       Y_train,
                                       args=eval_params,
                                       aux_loss_lst=[rec_err])
            report.train_clean_train_clean_eval = acc

        # Initialize the Fast Gradient Sign Method (FGSM) attack object and
        # graph
        attack = attacks[attack_name](model, sess=sess)
        adv_x = attack.generate(x, **attack_params[attack_name])
        preds_adv = model.get_probs(adv_x)
        pre_ae_states_adv, post_ae_states_adv = model.get_ae_states()
        rec_err_adv = get_rec_err(pre_ae_states_adv, post_ae_states_adv)

        # Evaluate the accuracy of the model on adversarial examples
        eval_par = {'batch_size': batch_size}
        acc, rec_loss = model_eval(sess,
                                   x,
                                   y,
                                   preds_adv,
                                   X_test,
                                   Y_test,
                                   args=eval_par,
                                   aux_loss_lst=[rec_err_adv])
        print('Test accuracy on adversarial examples: %0.4f\n' % acc)
        print('reconstruction error on adv examples: %0.4f' % rec_loss)
        report.clean_train_adv_eval = acc

        # Calculate training error
        if testing:
            eval_par = {'batch_size': batch_size}
            acc, rec_loss = model_eval(sess,
                                       x,
                                       y,
                                       preds_adv,
                                       X_train,
                                       Y_train,
                                       args=eval_par,
                                       aux_loss_lst=[rec_err_adv])
            report.train_clean_train_adv_eval = acc
    print('.. using adversarial training')
    # Redefine TF model graph
    model_2 = make_basic_model(**model_params)
    tf.summary.image('input', x)
    preds_2 = model_2(x)
    pre_ae_states_2, post_ae_states_2 = model_2.get_ae_states()
    rec_err_2 = get_rec_err(pre_ae_states_2, post_ae_states_2)
    attack2 = attacks[attack_name](model_2, sess=sess)
    adv_x_2 = attack2.generate(x, **attack_params[attack_name])
    tf.summary.image('adversarial', adv_x_2)
    tf.summary.image('diff', x - adv_x_2)
    if not backprop_through_attack:
        adv_x_2 = tf.stop_gradient(adv_x_2)
    preds_2_adv = model_2(adv_x_2)
    pre_ae_states_adv_2, post_ae_states_adv_2 = model_2.get_ae_states()
    rec_err_adv_2 = get_rec_err(pre_ae_states_adv_2, post_ae_states_adv_2)

    # adv -> clean reconstruction
    rec_err_cross_2 = get_rec_err(pre_ae_states_2, post_ae_states_adv_2)

    class Evaluate2(object):
        def __init__(self):
            self.best_accuracy = 0.

        def __call__(self):
            # Accuracy of adversarially trained model on legitimate test inputs
            eval_params = {'batch_size': batch_size}
            accuracy, rec_loss = model_eval(sess,
                                            x,
                                            y,
                                            preds_2,
                                            X_test,
                                            Y_test,
                                            args=eval_params,
                                            aux_loss_lst=[rec_err_2],
                                            summary=merged)
            self.best_accuracy = max(self.best_accuracy, accuracy)
            print('Test accuracy on legitimate examples:   %0.4f' % accuracy)
            print('Best test accuracy so far:              {:0.4f}'.format(
                self.best_accuracy))
            print('reconstruction error on legit examples: %0.4f' % rec_loss)
            report.adv_train_clean_eval = accuracy

            # Accuracy of the adversarially trained model on adversarial examples
            accuracy_adv, rec_loss_adv, rec_loss_cross = model_eval(
                sess,
                x,
                y,
                preds_2_adv,
                X_test,
                Y_test,
                args=eval_params,
                aux_loss_lst=[rec_err_adv_2, rec_err_cross_2],
                summary=merged)
            print('Test accuracy on adversarial examples:  %0.4f' %
                  accuracy_adv)
            print('reconstruction error on adv->adv:       %0.4f' %
                  rec_loss_adv)
            print('reconstruction error on adv->clean:     %0.4f' %
                  rec_loss_cross)
            report.adv_train_adv_eval = accuracy_adv

            with open('report.dat', 'a') as f:
                f.write(' '.join([
                    '%0.4f' % v for v in [
                        accuracy,
                        accuracy_adv,
                        rec_loss,
                        rec_loss_adv,
                        rec_loss_cross,
                    ]
                ]) + '\n')

    evaluate_2 = Evaluate2()

    # combine both errors
    if rec_err_cross_2 is None or rec_err_2 is None:
        use_cross_error = False  #XXX hack
    rec_use_train = rec_err_cross_2 + rec_err_2 if use_cross_error else rec_err_2

    # Perform and evaluate adversarial training
    model_train(sess,
                x,
                y,
                preds_2,
                X_train,
                Y_train,
                predictions_adv=preds_2_adv,
                evaluate=evaluate_2,
                args=train_params,
                rng=rng,
                aux_loss=weight_rec_err(rec_use_train, rec_error_weight),
                opt_type=opt_type,
                summary=merged)

    # Calculate training errors
    if testing:
        eval_params = {'batch_size': batch_size}
        accuracy, rec_loss = model_eval(sess,
                                        x,
                                        y,
                                        preds_2,
                                        X_train,
                                        Y_train,
                                        args=eval_params,
                                        aux_loss_lst=[rec_err_2])
        report.train_adv_train_clean_eval = accuracy
        accuracy, rec_loss = model_eval(sess,
                                        x,
                                        y,
                                        preds_2_adv,
                                        X_train,
                                        Y_train,
                                        args=eval_params,
                                        aux_loss_lst=[rec_err_adv_2])
        report.train_adv_train_adv_eval = accuracy

    return report
예제 #15
0
def run_mnist_adv(num_epochs=NUM_EPOCHS, batch_size=BATCH_SIZE,
                  testing=False, learning_rate=LEARNING_RATE):

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # set random seed
    tf.set_random_seed(42)

    # can use gpu
    config = tf.ConfigProto( device_count = {'GPU': 1 , 'CPU': 1} )

    # Create TF session and set Keras backend session as TF
    sess = tf.Session(config=config)
    keras.backend.set_session(sess)

    # Get MNIST test data
    mnist = MNIST()
    x_train, y_train = mnist.get_set("train")
    x_test, y_test = mnist.get_set("test")

    # Obtain image params
    n_rows, n_cols, n_channels = x_train.shape[1:4]
    n_classes = y_train.shape[1]

    # define TF model graph
    model = ConvNet((n_rows, n_cols, n_channels), n_classes)
    model(model.input)

    wrap = KerasModelWrapper(model)
    fgsm = FastGradientMethod(wrap, sess=sess)
    fgsm_params = {
        'eps': 0.3,
        'clip_min': 0.,
        'clip_max': 1.
    }
    adv_acc_metric = get_adversarial_acc_metric(model, fgsm, fgsm_params)
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate),
        loss='categorical_crossentropy',
        metrics=['accuracy', adv_acc_metric]
    )

    # Train an MNIST model
    model.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=num_epochs,
              validation_data=(x_test, y_test),
              verbose=1)

    # Evaluate the accuracy on legitimate and adversarial test examples
    _, acc, adv_acc = model.evaluate(x_test, y_test,
                                     batch_size=batch_size,
                                     verbose=0)
    report.clean_train_clean_eval = acc
    report.clean_train_adv_eval = adv_acc
    print('Test accuracy on legitimate examples: %0.4f' % acc)
    print('Test accuracy on adversarial examples: %0.4f\n' % adv_acc)

    # Calculate training error
    if testing:
        _, train_acc, train_adv_acc = model.evaluate(x_train, y_train,
                                                     batch_size=batch_size,
                                                     verbose=0)
        report.train_clean_train_clean_eval = train_acc
        report.train_clean_train_adv_eval = train_adv_acc

    print("Repeating the process, using adversarial training")
    # Redefine Keras model
    model_2 = ConvNet((n_rows, n_cols, n_channels), n_classes)
    model_2(model_2.input)
    wrap_2 = KerasModelWrapper(model_2)
    fgsm_2 = FastGradientMethod(wrap_2, sess=sess)

    # Use a loss function based on legitimate and adversarial examples
    adv_loss_2 = get_adversarial_loss(model_2, fgsm_2, fgsm_params)
    adv_acc_metric_2 = get_adversarial_acc_metric(model_2, fgsm_2, fgsm_params)
    model_2.compile(
        optimizer=keras.optimizers.Adam(learning_rate),
        loss=adv_loss_2,
        metrics=['accuracy', adv_acc_metric_2]
    )

    # Train an MNIST model
    model_2.fit(x_train, y_train,
                batch_size=batch_size,
                epochs=num_epochs,
                validation_data=(x_test, y_test),
                verbose=1)

    # Evaluate the accuracy on legitimate and adversarial test examples
    _, acc, adv_acc = model_2.evaluate(x_test, y_test,
                                       batch_size=batch_size,
                                       verbose=0)
    report.adv_train_clean_eval = acc
    report.adv_train_adv_eval = adv_acc
    print('Test accuracy on legitimate examples: %0.4f' % acc)
    print('Test accuracy on adversarial examples: %0.4f\n' % adv_acc)

    # Calculate training error
    if testing:
        _, train_acc, train_adv_acc = model_2.evaluate(x_train, y_train,
                                                       batch_size=batch_size,
                                                       verbose=0)
        report.train_adv_train_clean_eval = train_acc
        report.train_adv_train_adv_eval = train_adv_acc

    return report
예제 #16
0
    def test_run_single_gpu_fgsm(self):
        """
        Test the basic single GPU performance by comparing to the FGSM
        tutorial.
        """
        from cleverhans_tutorials import mnist_tutorial_tf

        # Run the MNIST tutorial on a dataset of reduced size
        flags = {
            "train_start": 0,
            "train_end": 5000,
            "test_start": 0,
            "test_end": 333,
            "nb_epochs": 5,
            "testing": True,
        }
        report = mnist_tutorial_tf.mnist_tutorial(**flags)

        # Run the multi-gpu trainer for clean training
        flags.update(
            {
                "batch_size": 128,
                "adam_lrn": 0.001,
                "dataset": "mnist",
                "only_adv_train": False,
                "eval_iters": 1,
                "ngpu": 1,
                "fast_tests": False,
                "attack_type_train": "",
                "save_dir": None,
                "save_steps": 10000,
                "attack_nb_iter_train": None,
                "save": False,
                "model_type": "basic",
                "attack_type_test": "FGSM",
            }
        )

        flags.update({"adv_train": False})
        HParams = namedtuple("HParams", flags.keys())

        hparams = HParams(**flags)
        np.random.seed(42)
        tf.set_random_seed(42)
        with tf.variable_scope(None, "runner"):
            report_dict = run_trainer(hparams)
        report_2 = AccuracyReport()
        report_2.train_clean_train_clean_eval = report_dict["train"]
        report_2.clean_train_clean_eval = report_dict["test"]
        report_2.clean_train_adv_eval = report_dict["FGSM"]

        # Run the multi-gpu trainer for adversarial training
        flags.update({"adv_train": True, "attack_type_train": "FGSM"})
        HParams = namedtuple("HParams", flags.keys())

        hparams = HParams(**flags)
        np.random.seed(42)
        tf.set_random_seed(42)
        with tf.variable_scope(None, "runner"):
            report_dict = run_trainer(hparams)
        report_2.train_adv_train_clean_eval = report_dict["train"]
        report_2.adv_train_clean_eval = report_dict["test"]
        report_2.adv_train_adv_eval = report_dict["FGSM"]

        self.assertClose(
            report.train_clean_train_clean_eval,
            report_2.train_clean_train_clean_eval,
            atol=5e-2,
        )
        self.assertClose(
            report.clean_train_clean_eval, report_2.clean_train_clean_eval, atol=2e-2
        )
        self.assertClose(
            report.clean_train_adv_eval, report_2.clean_train_adv_eval, atol=5e-2
        )
        self.assertClose(
            report.train_adv_train_clean_eval,
            report_2.train_adv_train_clean_eval,
            atol=1e-1,
        )
        self.assertClose(
            report.adv_train_clean_eval, report_2.adv_train_clean_eval, atol=2e-2
        )
        self.assertClose(
            report.adv_train_adv_eval, report_2.adv_train_adv_eval, atol=1e-1
        )
예제 #17
0
def mnist_tutorial(train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   nb_epochs=6,
                   batch_size=128,
                   learning_rate=0.001,
                   clean_train=True,
                   testing=False):
    """
    MNIST cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param testing: if true, complete an AccuracyReport for unit tests
      to verify that performance is adequate
    :param clean_train: if true, train on clean examples
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    sess = tf.Session()

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Use label smoothing
    assert Y_train.shape[1] == 10
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    model_path = "models/mnist"
    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    fgsm_params = {'eps': 0.3}

    if clean_train:
        model = make_basic_cnn()
        preds = model.get_probs(x)

        def evaluate():
            # Evaluate the accuracy of the MNIST model on legitimate test
            # examples
            eval_params = {'batch_size': batch_size}
            acc = model_eval(sess,
                             x,
                             y,
                             preds,
                             X_test,
                             Y_test,
                             args=eval_params)
            report.clean_train_clean_eval = acc
            assert X_test.shape[0] == test_end - test_start, X_test.shape
            print('Test accuracy on legitimate examples: %0.4f' % acc)

        model_train(sess,
                    x,
                    y,
                    preds,
                    X_train,
                    Y_train,
                    evaluate=evaluate,
                    args=train_params)

        # Calculate training error
        if testing:
            eval_params = {'batch_size': batch_size}
            acc = model_eval(sess,
                             x,
                             y,
                             preds,
                             X_train,
                             Y_train,
                             args=eval_params)
            report.train_clean_train_clean_eval = acc

        # Initialize the Fast Gradient Sign Method (FGSM) attack object and
        # graph
        fgsm = FastGradientMethod(model, sess=sess)

        adv_x = fgsm.generate(x, **fgsm_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on adversarial examples
        eval_par = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Test accuracy on adversarial examples: %0.4f\n' % acc)
        report.clean_train_adv_eval = acc

        # Calculate training error
        if testing:
            eval_par = {'batch_size': batch_size}
            acc = model_eval(sess,
                             x,
                             y,
                             preds_adv,
                             X_train,
                             Y_train,
                             args=eval_par)
            report.train_clean_train_adv_eval = acc

        print("Repeating the process, using adversarial training")
    # Redefine TF model graph
    model_2 = make_basic_cnn()
    preds_2 = model_2(x)
    fgsm2 = FastGradientMethod(model_2, sess=sess)
    preds_2_adv = model_2(fgsm2.generate(x, **fgsm_params))

    def evaluate_2():
        # Accuracy of adversarially trained model on legitimate test inputs
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2,
                              X_test,
                              Y_test,
                              args=eval_params)
        print('Test accuracy on legitimate examples: %0.4f' % accuracy)
        report.adv_train_clean_eval = accuracy

        # Accuracy of the adversarially trained model on adversarial examples
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2_adv,
                              X_test,
                              Y_test,
                              args=eval_params)
        print('Test accuracy on adversarial examples: %0.4f' % accuracy)
        report.adv_train_adv_eval = accuracy

    # Perform and evaluate adversarial training
    model_train(sess,
                x,
                y,
                preds_2,
                X_train,
                Y_train,
                predictions_adv=preds_2_adv,
                evaluate=evaluate_2,
                args=train_params)

    # Calculate training errors
    if testing:
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2,
                              X_train,
                              Y_train,
                              args=eval_params)
        report.train_adv_train_clean_eval = accuracy
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2_adv,
                              X_train,
                              Y_train,
                              args=eval_params)
        report.train_adv_train_adv_eval = accuracy

    return report
예제 #18
0
    def helper_run_multi_gpu_madryetal(self, extra_flags=None):
        """
    Compare the single GPU performance to multiGPU performance.
    """
        # Run the trainers on a dataset of reduced size
        flags = {
            'train_start': 0,
            'train_end': 5000,
            'test_start': 0,
            'test_end': 333,
            'nb_epochs': 5,
            'testing': True
        }

        # Run the multi-gpu trainer for adversarial training
        flags.update({
            'batch_size': 128,
            'adam_lrn': 0.001,
            'dataset': 'mnist',
            'only_adv_train': False,
            'eval_iters': 1,
            'fast_tests': True,
            'save_dir': None,
            'save_steps': 10000,
            'attack_nb_iter_train': 10,
            'sync_step': None,
            'adv_train': True,
            'save': False,
            'model_type': 'basic',
            'attack_type_test': 'MadryEtAl_y'
        })
        if extra_flags is not None:
            flags.update(extra_flags)

        # Run the multi-gpu trainer for adversarial training using 2 gpus
        # trainer_multigpu by default sets `allow_soft_placement=True`
        flags.update({
            'ngpu': 2,
            'attack_type_train': 'MadryEtAl_y_multigpu',
            'sync_step': 1
        })
        HParams = namedtuple('HParams', flags.keys())

        hparams = HParams(**flags)
        np.random.seed(42)
        tf.compat.v1.set_random_seed(42)
        with tf.compat.v1.variable_scope(None, 'runner'):
            report_dict = run_trainer(hparams)
        report_m = AccuracyReport()
        report_m.train_adv_train_clean_eval = report_dict['train']
        report_m.adv_train_clean_eval = report_dict['test']
        report_m.adv_train_adv_eval = report_dict['MadryEtAl_y']

        flags.update({'ngpu': 1, 'attack_type_train': 'MadryEtAl_y'})
        hparams = HParams(**flags)
        np.random.seed(42)
        tf.compat.v1.set_random_seed(42)
        with tf.compat.v1.variable_scope(None, 'runner'):
            report_dict = run_trainer(hparams)
        report_s = AccuracyReport()
        report_s.train_adv_train_clean_eval = report_dict['train']
        report_s.adv_train_clean_eval = report_dict['test']
        report_s.adv_train_adv_eval = report_dict['MadryEtAl_y']

        self.assertClose(report_s.train_adv_train_clean_eval,
                         report_m.train_adv_train_clean_eval,
                         atol=5e-2)
        self.assertClose(report_s.adv_train_clean_eval,
                         report_m.adv_train_clean_eval,
                         atol=2e-2)
        self.assertClose(report_s.adv_train_adv_eval,
                         report_m.adv_train_adv_eval,
                         atol=5e-2)
def mnist_tutorial(train_start=0, train_end=60000, test_start=0,
                   test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE,
                   clean_train=True,
                   testing=False,
                   backprop_through_attack=False,
                   nb_filters=NB_FILTERS, num_threads=None,
                   attack_string=None):
    """
    MNIST cleverhans tutorial
    :param train_start: index of first training set example.
    :param train_end: index of last training set example.
    :param test_start: index of first test set example.
    :param test_end: index of last test set example.
    :param nb_epochs: number of epochs to train model.
    :param batch_size: size of training batches.
    :param learning_rate: learning rate for training.
    :param clean_train: perform normal training on clean examples only
                      before performing adversarial training.
    :param testing: if true, complete an AccuracyReport for unit tests
                  to verify that performance is adequate.
    :param backprop_through_attack: If True, backprop through adversarial
                                  example construction process during
                                  adversarial training.
    :param nb_filters: number of filters in the CNN used for training.
    :param num_threads: number of threads used for running the process.
    :param attack_string: attack name for crafting adversarial attacks and
                          adversarial training, in string format.
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Get MNIST test data
    mnist = MNIST(train_start=train_start, train_end=train_end,
                  test_start=test_start, test_end=test_end)
    X_train, Y_train = mnist.get_set('train')
    X_test, Y_test = mnist.get_set('test')

    # Use label smoothing
    assert Y_train.shape[1] == 10
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }

    # Initialize the attack object
    attack_class = attack_selection(attack_string)
    attack_params = {'eps': 0.3, 'clip_min': 0.,
                     'clip_max': 1.}

    rng = np.random.RandomState([2018, 6, 18])
    if clean_train:
        model = ModelBasicCNNTFE(nb_filters=nb_filters)

        def evaluate_clean():
            """
            Evaluate the accuracy of the MNIST model on legitimate test
            examples
            """
            eval_params = {'batch_size': batch_size}
            acc = model_eval(model, X_test, Y_test, args=eval_params)
            report.clean_train_clean_eval = acc
            assert X_test.shape[0] == test_end - test_start, X_test.shape
            print('Test accuracy on legitimate examples: %0.4f' % acc)

        train(model, X_train, Y_train, evaluate=evaluate_clean,
              args=train_params, rng=rng, var_list=model.get_params())

        if testing:
            # Calculate training error
            eval_params = {'batch_size': batch_size}
            acc = model_eval(model, X_train, Y_train, args=eval_params)
            report.train_clean_train_clean_eval = acc

        # Evaluate the accuracy of the MNIST model on adversarial examples
        eval_par = {'batch_size': batch_size}
        attack = attack_class(model)
        acc = model_eval(
            model, X_test, Y_test, args=eval_par,
            attack=attack, attack_args=attack_params)
        print('Test accuracy on adversarial examples: %0.4f\n' % acc)
        report.clean_train_adv_eval = acc

        # Calculate training error
        if testing:
            eval_par = {'batch_size': batch_size}
            acc = model_eval(
                model, X_train, Y_train, args=eval_par,
                attack=attack, attack_args=attack_params)
            print('Train accuracy on adversarial examples: %0.4f\n' % acc)
            report.train_clean_train_adv_eval = acc

        attack = None
        print("Repeating the process, using adversarial training")

    model_adv_train = ModelBasicCNNTFE(nb_filters=nb_filters)
    attack = attack_class(model_adv_train)

    def evaluate_adv():
        # Accuracy of adversarially trained model on legitimate test inputs
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(
            model_adv_train, X_test, Y_test,
            args=eval_params)
        print('Test accuracy on legitimate examples: %0.4f' % accuracy)
        report.adv_train_clean_eval = accuracy
        # Accuracy of the adversarially trained model on adversarial examples
        accuracy = model_eval(
            model_adv_train, X_test, Y_test,
            args=eval_params, attack=attack,
            attack_args=attack_params)
        print('Test accuracy on adversarial examples: %0.4f' % accuracy)
        report.adv_train_adv_eval = accuracy

    # Perform and evaluate adversarial training
    train(model_adv_train, X_train, Y_train, evaluate=evaluate_adv,
          args=train_params, rng=rng,
          var_list=model_adv_train.get_params(),
          attack=attack, attack_args=attack_params)

    # Calculate training errors
    if testing:
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(
            model_adv_train, X_train, Y_train, args=eval_params,
            attack=None, attack_args=None)
        report.train_adv_train_clean_eval = accuracy
        accuracy = model_eval(
            model_adv_train, X_train, Y_train, args=eval_params,
            attack=attack, attack_args=attack_params)
        report.train_adv_train_adv_eval = accuracy
    return report
예제 #20
0
    def test_run_single_gpu_fgsm(self):
        """
    Test the basic single GPU performance by comparing to the FGSM
    tutorial.
    """
        from cleverhans_tutorials import mnist_tutorial_tf

        # Run the MNIST tutorial on a dataset of reduced size
        flags = {
            'train_start': 0,
            'train_end': 5000,
            'test_start': 0,
            'test_end': 333,
            'nb_epochs': 5,
            'testing': True
        }
        report = mnist_tutorial_tf.mnist_tutorial(**flags)

        # Run the multi-gpu trainer for clean training
        flags.update({
            'batch_size': 128,
            'adam_lrn': 0.001,
            'dataset': 'mnist',
            'only_adv_train': False,
            'eval_iters': 1,
            'ngpu': 1,
            'fast_tests': False,
            'attack_type_train': '',
            'save_dir': None,
            'save_steps': 10000,
            'attack_nb_iter_train': None,
            'save': False,
            'model_type': 'basic',
            'attack_type_test': 'FGSM'
        })

        flags.update({'adv_train': False})
        HParams = namedtuple('HParams', flags.keys())

        hparams = HParams(**flags)
        np.random.seed(42)
        tf.compat.v1.set_random_seed(42)
        with tf.compat.v1.variable_scope(None, 'runner'):
            report_dict = run_trainer(hparams)
        report_2 = AccuracyReport()
        report_2.train_clean_train_clean_eval = report_dict['train']
        report_2.clean_train_clean_eval = report_dict['test']
        report_2.clean_train_adv_eval = report_dict['FGSM']

        # Run the multi-gpu trainer for adversarial training
        flags.update({'adv_train': True, 'attack_type_train': 'FGSM'})
        HParams = namedtuple('HParams', flags.keys())

        hparams = HParams(**flags)
        np.random.seed(42)
        tf.compat.v1.set_random_seed(42)
        with tf.compat.v1.variable_scope(None, 'runner'):
            report_dict = run_trainer(hparams)
        report_2.train_adv_train_clean_eval = report_dict['train']
        report_2.adv_train_clean_eval = report_dict['test']
        report_2.adv_train_adv_eval = report_dict['FGSM']

        self.assertClose(report.train_clean_train_clean_eval,
                         report_2.train_clean_train_clean_eval,
                         atol=5e-2)
        self.assertClose(report.clean_train_clean_eval,
                         report_2.clean_train_clean_eval,
                         atol=2e-2)
        self.assertClose(report.clean_train_adv_eval,
                         report_2.clean_train_adv_eval,
                         atol=5e-2)
        self.assertClose(report.train_adv_train_clean_eval,
                         report_2.train_adv_train_clean_eval,
                         atol=1e-1)
        self.assertClose(report.adv_train_clean_eval,
                         report_2.adv_train_clean_eval,
                         atol=2e-2)
        self.assertClose(report.adv_train_adv_eval,
                         report_2.adv_train_adv_eval,
                         atol=1e-1)
예제 #21
0
def mnist_tutorial(train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   nb_epochs=6,
                   batch_size=128,
                   epsilon=0.3,
                   learning_rate=0.001,
                   train_dir="/tmp",
                   filename="mnist.ckpt",
                   load_model=False,
                   testing=False):
    """
    MNIST CleverHans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param train_dir: Directory storing the saved model
    :param filename: Filename to save model under
    :param load_model: True for load, False for not load
    :param testing: if true, test error is calculated
    :return: an AccuracyReport object
    """
    keras.layers.core.K.set_learning_phase(0)

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    if not hasattr(backend, "tf"):
        raise RuntimeError("This tutorial requires keras to be configured"
                           " to use the TensorFlow backend.")

    if keras.backend.image_dim_ordering() != 'tf':
        keras.backend.set_image_dim_ordering('tf')
        print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to "
              "'th', temporarily setting to 'tf'")

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Use label smoothing
    assert Y_train.shape[1] == 10
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph
    model = cnn_model_BIM()
    preds = model(x)
    print("Defined TensorFlow model graph.")

    def evaluate():
        # Evaluate the accuracy of the MNIST model on legitimate test examples
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params)
        report.clean_train_clean_eval = acc
        assert X_test.shape[0] == test_end - test_start, X_test.shape
        print('Test accuracy on legitimate examples: %0.4f' % acc)

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': train_dir,
        'filename': filename
    }
    ckpt = tf.train.get_checkpoint_state(train_dir)
    ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path

    rng = np.random.RandomState([2017, 8, 30])
    if load_model and ckpt_path:
        saver = tf.train.Saver()
        saver.restore(sess, ckpt_path)
        print("Model loaded from: {}".format(ckpt_path))
        evaluate()
    else:
        print("Model was not loaded, training from scratch.")
        model_train(sess,
                    x,
                    y,
                    preds,
                    X_train,
                    Y_train,
                    evaluate=evaluate,
                    args=train_params,
                    save=False,
                    rng=rng)

    # Calculate training error
    if testing:
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds, X_train, Y_train, args=eval_params)
        report.train_clean_train_clean_eval = acc

    # Initialize the Fast Gradient Sign Method (FGSM) attack object and graph
    wrap = KerasModelWrapper(model)

    print("FastGradientMethod")
    fgsm1 = FastGradientMethod(wrap, sess=sess)
    for epsilon in [0.005, 0.01, 0.05, 0.1, 0.5, 1.0]:
        print("Epsilon =", epsilon),
        fgsm_params = {'eps': epsilon, 'clip_min': None, 'clip_max': None}
        adv_x = fgsm1.generate(x, **fgsm_params)
        # Consider the attack to be constant
        adv_x = tf.stop_gradient(adv_x)
        preds_adv = model(adv_x)

        # Evaluate the accuracy of the MNIST model on adversarial examples
        eval_par = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Test accuracy on adversarial examples: %0.4f\n' % acc)
        report.clean_train_adv_eval = acc

    print("BasicIterativeMethod")
    bim = BasicIterativeMethod(wrap, sess=sess)
    for epsilon, order in zip(
        [0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 0.5, 1.0],
        [np.inf, np.inf, np.inf, np.inf, np.inf, np.inf, 2, 2]):
        print("Epsilon =", epsilon),
        fgsm_params = {
            'eps': epsilon,
            'clip_min': 0.,
            'clip_max': 1.,
            'ord': order
        }
        adv_x = bim.generate(x, **fgsm_params)
        # Consider the attack to be constant
        adv_x = tf.stop_gradient(adv_x)
        preds_adv = model(adv_x)

        # Evaluate the accuracy of the MNIST model on adversarial examples
        eval_par = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Test accuracy on adversarial examples: %0.4f\n' % acc)
        report.clean_train_adv_eval = acc

    # Calculating train error
    if testing:
        eval_par = {'batch_size': batch_size}
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv,
                         X_train,
                         Y_train,
                         args=eval_par)
        report.train_clean_train_adv_eval = acc
    return

    print("Repeating the process, using adversarial training")
    # Redefine TF model graph
    model_2 = cnn_model()
    preds_2 = model_2(x)
    wrap_2 = KerasModelWrapper(model_2)
    #fgsm2 = FastGradientMethod(wrap_2, sess=sess)
    bim2 = BasicIterativeMethod(wrap_2, sess=sess)
    preds_2_adv = model_2(bim2.generate(x, **fgsm_params))

    def evaluate_2():
        # Accuracy of adversarially trained model on legitimate test inputs
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2,
                              X_test,
                              Y_test,
                              args=eval_params)
        print('Test accuracy on legitimate examples: %0.4f' % accuracy)
        report.adv_train_clean_eval = accuracy

        # Accuracy of the adversarially trained model on adversarial examples
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2_adv,
                              X_test,
                              Y_test,
                              args=eval_params)
        print('Test accuracy on adversarial examples: %0.4f' % accuracy)
        report.adv_train_adv_eval = accuracy

    # Perform and evaluate adversarial training
    model_train(sess,
                x,
                y,
                preds_2,
                X_train,
                Y_train,
                predictions_adv=preds_2_adv,
                evaluate=evaluate_2,
                args=train_params,
                save=False,
                rng=rng)

    # Calculate training errors
    if testing:
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2,
                              X_train,
                              Y_train,
                              args=eval_params)
        report.train_adv_train_clean_eval = accuracy
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2_adv,
                              X_train,
                              Y_train,
                              args=eval_params)
        report.train_adv_train_adv_eval = accuracy

    return report
예제 #22
0
    def helper_run_multi_gpu_madryetal(self, extra_flags=None):
        """
        Compare the single GPU performance to multiGPU performance.
        """
        # Run the trainers on a dataset of reduced size
        flags = {'train_start': 0,
                 'train_end': 5000,
                 'test_start': 0,
                 'test_end': 333,
                 'nb_epochs': 5,
                 'testing': True}

        # Run the multi-gpu trainer for adversarial training
        flags.update({'batch_size': 128, 'adam_lrn': 0.001,
                      'dataset': 'mnist', 'only_adv_train': False,
                      'eval_iters': 1, 'fast_tests': True,
                      'save_dir': None, 'save_steps': 10000,
                      'attack_nb_iter_train': 10, 'sync_step': None,
                      'adv_train': True,
                      'save': False,
                      'model_type': 'basic',
                      'attack_type_test': 'MadryEtAl_y',
                      })
        if extra_flags is not None:
            flags.update(extra_flags)

        # Run the multi-gpu trainer for adversarial training using 2 gpus
        # trainer_multigpu by default sets `allow_soft_placement=True`
        flags.update({'ngpu': 2,
                      'attack_type_train': 'MadryEtAl_y_multigpu',
                      'sync_step': 1})
        HParams = namedtuple('HParams', flags.keys())

        hparams = HParams(**flags)
        np.random.seed(42)
        tf.set_random_seed(42)
        with tf.variable_scope(None, 'runner'):
            report_dict = run_trainer(hparams)
        report_m = AccuracyReport()
        report_m.train_adv_train_clean_eval = report_dict['train']
        report_m.adv_train_clean_eval = report_dict['test']
        report_m.adv_train_adv_eval = report_dict['MadryEtAl_y']

        flags.update({'ngpu': 1,
                      'attack_type_train': 'MadryEtAl_y',
                      })
        hparams = HParams(**flags)
        np.random.seed(42)
        tf.set_random_seed(42)
        with tf.variable_scope(None, 'runner'):
            report_dict = run_trainer(hparams)
        report_s = AccuracyReport()
        report_s.train_adv_train_clean_eval = report_dict['train']
        report_s.adv_train_clean_eval = report_dict['test']
        report_s.adv_train_adv_eval = report_dict['MadryEtAl_y']

        self.assertClose(report_s.train_adv_train_clean_eval,
                         report_m.train_adv_train_clean_eval,
                         atol=5e-2)
        self.assertClose(report_s.adv_train_clean_eval,
                         report_m.adv_train_clean_eval,
                         atol=2e-2)
        self.assertClose(report_s.adv_train_adv_eval,
                         report_m.adv_train_adv_eval,
                         atol=5e-2)