コード例 #1
0
ファイル: test_attacks.py プロジェクト: ysharma1126/DeepBayes
def test_attacks(batch_size=128,
                 source_samples=10,
                 model_path=os.path.join("models", "mnist"),
                 targeted=True):
    """
    Test many attacks on MNIST with deep Bayes classifier.
    :param batch_size: size of training batches
    :param source_samples: number of test inputs to attack
    :param model_path: path to the model file
    :param targeted: should we run a targeted attack? or untargeted?
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    from cleverhans.utils_mnist import data_mnist
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=0,
                                                  train_end=60000,
                                                  test_start=0,
                                                  test_end=10000)
    img_rows, img_cols, channels = X_train[0].shape
    nb_classes = Y_train.shape[1]

    # Define input TF placeholder
    batch_size = min(batch_size, source_samples)
    x = tf.placeholder(tf.float32,
                       shape=(batch_size, img_rows, img_cols, channels))
    y = tf.placeholder(tf.float32, shape=(batch_size, nb_classes))

    # Define TF model graph
    model_name = str(sys.argv[1])
    if model_name == 'bayes':
        from load_bayes_classifier import BayesModel
        conv = True
        checkpoint = 0  #int(sys.argv[1])
        K = int(sys.argv[3])
        use_mean = True
        model = BayesModel(sess,
                           'mnist',
                           conv,
                           K,
                           checkpoint=checkpoint,
                           attack_snapshot=False,
                           use_mean=use_mean)
        if use_mean:
            model_name = 'bayes_mean_mlp'
        else:
            model_name = 'bayes_K%d' % K
    if model_name == 'cnn':
        from load_cnn_classifier import CNNModel
        model = CNNModel(sess, 'mnist')
    if model_name == 'wgan':
        from load_wgan_classifier import WGANModel
        conv = True
        checkpoint = 0  #int(sys.argv[1])
        K = int(sys.argv[3])
        T = int(sys.argv[4])
        model = WGANModel(sess, 'mnist', conv, K, T, checkpoint=checkpoint)
        model_name = 'wgan_K%d_T%d' % (K, T)

    preds = model.predict(x, softmax=True)  # output probabilities
    print("Defined TensorFlow model graph.")

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params)
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    # Craft adversarial examples
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
    print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
          ' adversarial examples')
    print("This could take some time ...")

    # make adv inputs and labels for the attack if targeted
    if targeted:
        adv_inputs = np.array([[instance] * nb_classes
                               for instance in X_test[:source_samples]],
                              dtype=np.float32)

        one_hot = np.zeros((nb_classes, nb_classes))
        one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

        adv_inputs = adv_inputs.reshape(
            (source_samples * nb_classes, img_rows, img_cols, 1))
        adv_ys = np.array([one_hot] * source_samples,
                          dtype=np.float32).reshape(
                              (source_samples * nb_classes, nb_classes))
    else:
        adv_inputs = X_test[:source_samples]
        adv_ys = Y_test[:source_samples]

    # Instantiate an attack object
    attack_method = str(sys.argv[2])
    if attack_method == 'fgsm':
        from cleverhans.attacks import FastGradientMethod
        model_prob = lambda x: model.predict(x, softmax=True)
        attack = FastGradientMethod(model_prob, sess=sess)
        from attack_config import config_fgsm
        attack_params = config_fgsm(targeted, adv_ys)
    if attack_method == 'bim':
        from cleverhans.attacks import BasicIterativeMethod
        model_prob = lambda x: model.predict(x, softmax=True)
        attack = BasicIterativeMethod(model_prob, sess=sess)
        from attack_config import config_bim
        attack_params = config_bim(targeted, adv_ys)
    if attack_method == 'mim':
        from cleverhans.attacks import MomentumIterativeMethod
        model_prob = lambda x: model.predict(x, softmax=True)
        attack = MomentumIterativeMethod(model_prob, sess=sess)
        from attack_config import config_mim
        attack_params = config_mim(targeted, adv_ys)
    if attack_method == 'jsma':
        from cleverhans.attacks import SaliencyMapMethod
        model_prob = lambda x: model.predict(x, softmax=True)
        attack = SaliencyMapMethod(model_prob, sess=sess)
        from attack_config import config_jsma
        attack_params = config_jsma(targeted, adv_ys)
    if attack_method == 'vat':
        from cleverhans.attacks import VirtualAdversarialMethod
        model_logit = lambda x: model.predict(x, softmax=False)
        attack = VirtualAdversarialMethod(model_logit, sess=sess)
        from attack_config import config_vat
        attack_params = config_vat(targeted, adv_ys)
    if attack_method == 'cw':
        from cleverhans.attacks import CarliniWagnerL2
        model_logit = lambda x: model.predict(x, softmax=False)
        attack = CarliniWagnerL2(model_logit, sess=sess)
        from attack_config import config_cw
        attack_params = config_cw(targeted, adv_ys)
    if attack_method == 'elastic':
        from cleverhans.attacks import ElasticNetMethod
        model_logit = lambda x: model.predict(x, softmax=False)
        attack = ElasticNetMethod(model_logit, sess=sess)
        from attack_config import config_elastic
        attack_params = config_elastic(targeted, adv_ys)
    if attack_method == 'deepfool':
        from cleverhans.attacks import DeepFool
        model_logit = lambda x: model.predict(x, softmax=False)
        attack = DeepFool(model_logit, sess=sess)
        from attack_config import config_deepfool
        attack_params = config_deepfool(targeted, adv_ys)
    if attack_method == 'madry':
        from cleverhans.attacks import MadryEtAl
        model_prob = lambda x: model.predict(x, softmax=True)
        attack = MadryEtAl(model_prob, sess=sess)
        from attack_config import config_madry
        attack_params = config_madry(targeted, adv_ys)

    attack_params['batch_size'] = batch_size
    print('batchsize', batch_size)

    # perform the attack!
    adv = []
    n_batch = int(adv_inputs.shape[0] / batch_size)
    for i in xrange(n_batch):
        adv_batch = adv_inputs[i * batch_size:(i + 1) * batch_size]
        adv.append(attack.generate_np(adv_batch, **attack_params))
    adv = np.concatenate(adv, axis=0)

    for _ in xrange(5):
        y_adv = []
        for i in xrange(n_batch):
            adv_batch = adv[i * batch_size:(i + 1) * batch_size]
            y_adv.append(sess.run(preds, {x: adv_batch}))
        y_adv = np.concatenate(y_adv, axis=0)

        print('--------------------------------------')
        for i in xrange(10):
            print(np.argmax(y_adv[i * 10:(i + 1) * 10], 1))

    correct_pred = np.asarray(np.argmax(y_adv, 1) == np.argmax(adv_ys, 1),
                              dtype='f')
    adv_accuracy = np.mean(correct_pred)

    if not targeted:
        #        adv_accuracy, y_adv = model_eval(sess, x, y, preds, adv,
        #                                         adv_ys, args=eval_params,
        #                                         return_pred=True)
        #    else:
        #        adv_accuracy, y_adv = model_eval(sess, x, y, preds, adv,
        #                                         Y_test[:source_samples], args=eval_params,
        #                                         return_pred=True)
        adv_accuracy = 1. - adv_accuracy

    print('--------------------------------------')

    print(np.argmax(adv_ys[:10], 1))
    print(np.argmax(y_adv[:10], 1))
    for i in xrange(5):
        tmp = sess.run(preds, {x: adv[:100]})
        print(np.argmax(tmp[:10], 1))

    # Compute the number of adversarial examples that were successfully found
    print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy))
    report.clean_train_adv_eval = 1. - adv_accuracy

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(
        np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Close TF session
    sess.close()

    # visualisation
    vis_adv = True
    if vis_adv:
        N_vis = 100
        sys.path.append('../../utils')
        from visualisation import plot_images
        if channels == 1:
            shape = (img_rows, img_cols)
        else:
            shape = (img_rows, img_cols, channels)
        path = 'figs/'
        filename = model_name + '_' + attack_method
        if targeted:
            filename = filename + '_targeted'
        else:
            filename = filename + '_untargeted'
        plot_images(adv_inputs[:N_vis], shape, path, filename + '_data')
        plot_images(adv[:N_vis], shape, path, filename + '_adv')

    save_result = True
    if save_result:
        path = 'results/'
        filename = model_name + '_' + attack_method
        if targeted:
            filename = filename + '_targeted'
            y_input = adv_ys
        else:
            filename = filename + '_untargeted'
            y_input = Y_test[:source_samples]
        results = [adv_inputs, y_input, adv, y_adv]
        import pickle
        pickle.dump(results, open(path + filename + '.pkl', 'w'))
        print("results saved at %s.pkl" % filename)

    return report
def run_attack(train_start=0, train_end=60000, test_start=0,
                   test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE, testing=False,
                   label_smoothing=0.1):
  """
  MNIST CleverHans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param testing: if true, training error is calculated
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """

  # Object used to keep track of (and return) key accuracies
  report = AccuracyReport()

  # Set TF random seed to improve reproducibility
  tf.set_random_seed(1234)
  # Force TensorFlow to use single thread to improve reproducibility
  config = tf.ConfigProto(intra_op_parallelism_threads=1,
                          inter_op_parallelism_threads=1)

  if keras.backend.image_data_format() != 'channels_last':
    raise NotImplementedError("this tutorial requires keras to be configured to channels_last format")

  # Create TF session and set as Keras backend session
  sess = tf.Session(config=config)
  keras.backend.set_session(sess)


  # Define Keras model
  model = cnn_model(img_rows=32, img_cols=32,
                    channels=1, nb_filters=64,
                    nb_classes=3)
  print("Defined Keras model.")

  # To be able to call the model in the custom loss, we need to call it once
  # before, see https://github.com/tensorflow/tensorflow/issues/23769
  model(model.input)

  # Initialize the Fast Gradient Sign Method (FGSM) attack object
  wrap = KerasModelWrapper(model)
  fgsm = FastGradientMethod(wrap, sess=sess)
  fgsm_params = {'eps': 0.3,
                 'clip_min': 0.,
                 'clip_max': 1.}

  adv_acc_metric = get_adversarial_acc_metric(model, fgsm, fgsm_params)
  model.compile(
      optimizer=keras.optimizers.Adam(learning_rate),
      loss='categorical_crossentropy',
      metrics=['accuracy', keras.losses.mean_squared_error, adv_acc_metric]
  )

  # Train the model
  model.fit(X_train, y_train,
            batch_size=batch_size,
            epochs=nb_epochs,
            validation_data=(X_test, y_test),
            verbose=1)

  # Evaluate the accuracy on legitimate and adversarial test examples
  _, acc, adv_acc = model.evaluate(X_test, y_test,
                                   batch_size=batch_size,
                                   verbose=1)
  report.clean_train_clean_eval = acc
  report.clean_train_adv_eval = adv_acc
  print('Test accuracy on legitimate examples: %0.4f' % acc)
  print('Test accuracy on adversarial examples: %0.4f\n' % adv_acc)

  # Calculate training error
  if testing:
    _, train_acc, train_adv_acc = model.evaluate(X_train, y_train,
                                                 batch_size=batch_size,
                                                 verbose=1)
    report.train_clean_train_clean_eval = train_acc
    report.train_clean_train_adv_eval = train_adv_acc

  print("Repeating the process, using adversarial training")
  # Redefine Keras model
  model_2 = cnn_model(img_rows=32, img_cols=32,
                      channels=1, nb_filters=64,
                      nb_classes=3)
  model_2(model_2.input)
  wrap_2 = KerasModelWrapper(model_2)
  fgsm_2 = FastGradientMethod(wrap_2, sess=sess)

  # Use a loss function based on legitimate and adversarial examples
  adv_loss_2 = get_adversarial_loss(model_2, fgsm_2, fgsm_params)
  adv_acc_metric_2 = get_adversarial_acc_metric(model_2, fgsm_2, fgsm_params)
  model_2.compile(
      optimizer=keras.optimizers.Adam(learning_rate),
      loss=adv_loss_2,
      metrics=['accuracy', adv_acc_metric_2]
  )
コード例 #3
0
ファイル: ZOO.py プロジェクト: liuweilin17/cleverhans
def zoo(viz_enabled=VIZ_ENABLED,
        nb_epochs=NB_EPOCHS,
        batch_size=BATCH_SIZE,
        source_samples=SOURCE_SAMPLES,
        learning_rate=LEARNING_RATE,
        attack_iterations=ATTACK_ITERATIONS,
        model_path=MODEL_PATH,
        targeted=TARGETED):
    """
    :param viz_enabled: (boolean) activate plots of adversarial examples
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param source_samples: number of test inputs to attack
    :param learning_rate: learning rate for training
    :param model_path: path to the model file
    :param targeted: should we run a targeted attack? or untargeted?
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    if DATASET == 'MNIST':
        train_start = 0
        train_end = 60000
        test_start = 0
        test_end = 10000
        ds = dataset.MNIST(train_start=train_start,
                           train_end=train_end,
                           test_start=test_start,
                           test_end=test_end,
                           center=False)
    elif DATASET == 'SVHN':
        train_start = 0
        train_end = 73257
        test_start = 0
        test_end = 26032
        ds = dataset.SVHN(train_start=train_start,
                          train_end=train_end,
                          test_start=test_start,
                          test_end=test_end)
    elif DATASET == 'CIFAR10':
        train_start = 0
        train_end = 60000
        test_start = 0
        test_end = 10000
        ds = dataset.CIFAR10(train_start=train_start,
                             train_end=train_end,
                             test_start=test_start,
                             test_end=test_end,
                             center=False)

    x_train, y_train, x_test, y_test = ds.get_set('train') + ds.get_set('test')

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    nb_filters = 64

    # Define TF model graph
    model = ModelBasicCNN(DATASET, nb_classes, nb_filters,
                          (None, img_rows, img_cols, nchannels))
    preds = model.get_logits(x)
    loss = CrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'filename': os.path.split(model_path)[-1]
    }

    rng = np.random.RandomState([2018, 10, 22])
    # check if we've trained before, and if we have, use that pre-trained model
    if os.path.exists(model_path + ".meta"):
        tf_model_load(sess, model_path)
    else:
        train(sess, loss, x, y, x_train, y_train, args=train_params, rng=rng)
        saver = tf.train.Saver()
        saver.save(sess, model_path)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using Carlini and Wagner's approach
    ###########################################################################
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
    print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
          ' adversarial examples')
    print("This could take some time ...")

    # Instantiate a Zoo attack object
    zoo = Zoo(model, sess=sess)

    if viz_enabled:
        assert source_samples == nb_classes
        idxs = [
            np.where(np.argmax(y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)
        ]
    if targeted:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, nb_classes, img_rows, img_cols,
                          nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = np.array([[instance] * nb_classes
                                   for instance in x_test[idxs]],
                                  dtype=np.float32)
        else:
            adv_inputs = np.array([[instance] * nb_classes
                                   for instance in x_test[:source_samples]],
                                  dtype=np.float32)

        one_hot = np.zeros((nb_classes, nb_classes))
        one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

        adv_inputs = adv_inputs.reshape(
            (source_samples * nb_classes, img_rows, img_cols, nchannels))
        adv_ys = np.array([one_hot] * source_samples,
                          dtype=np.float32).reshape(
                              (source_samples * nb_classes, nb_classes))
        yname = "y_target"
    else:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, 2, img_rows, img_cols, nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = x_test[idxs]
        else:
            adv_inputs = x_test[:source_samples]

        adv_ys = None
        yname = "y"

    zoo_params = {
        'binary_search_steps': BINARY_SEARCH_STEPS,
        yname: adv_ys,
        'max_iterations': attack_iterations,
        'learning_rate': ZOO_LEARNING_RATE,
        'batch_size':
        source_samples * nb_classes if targeted else source_samples,
        'initial_const': INIT_CONST,
        'solver': SOLVER,
        'image_shape': [img_rows, img_cols, nchannels],
        'nb_classes': nb_classes
    }

    adv = zoo.generate_np(adv_inputs, **zoo_params)

    eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
    if targeted:
        adv_accuracy = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  adv,
                                  adv_ys,
                                  args=eval_params)
    else:
        if viz_enabled:
            adv_accuracy = 1 - model_eval(
                sess, x, y, preds, adv, y_test[idxs], args=eval_params)
        else:
            adv_accuracy = 1 - model_eval(sess,
                                          x,
                                          y,
                                          preds,
                                          adv,
                                          y_test[:source_samples],
                                          args=eval_params)

    if viz_enabled:
        for j in range(nb_classes):
            if targeted:
                for i in range(nb_classes):
                    grid_viz_data[i, j] = adv[i * nb_classes + j]
            else:
                grid_viz_data[j, 0] = adv_inputs[j]
                grid_viz_data[j, 1] = adv[j]

        print(grid_viz_data.shape)

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy))
    report.clean_train_adv_eval = 1. - adv_accuracy

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(
        np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        _ = grid_visual(grid_viz_data)

    return report
コード例 #4
0
def run_mnist_adv(num_epochs=NUM_EPOCHS, batch_size=BATCH_SIZE,
                  testing=False, learning_rate=LEARNING_RATE):

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # set random seed
    tf.set_random_seed(42)

    # can use gpu
    config = tf.ConfigProto( device_count = {'GPU': 1 , 'CPU': 1} )

    # Create TF session and set Keras backend session as TF
    sess = tf.Session(config=config)
    keras.backend.set_session(sess)

    # Get MNIST test data
    mnist = MNIST()
    x_train, y_train = mnist.get_set("train")
    x_test, y_test = mnist.get_set("test")

    # Obtain image params
    n_rows, n_cols, n_channels = x_train.shape[1:4]
    n_classes = y_train.shape[1]

    # define TF model graph
    model = ConvNet((n_rows, n_cols, n_channels), n_classes)
    model(model.input)

    wrap = KerasModelWrapper(model)
    fgsm = FastGradientMethod(wrap, sess=sess)
    fgsm_params = {
        'eps': 0.3,
        'clip_min': 0.,
        'clip_max': 1.
    }
    adv_acc_metric = get_adversarial_acc_metric(model, fgsm, fgsm_params)
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate),
        loss='categorical_crossentropy',
        metrics=['accuracy', adv_acc_metric]
    )

    # Train an MNIST model
    model.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=num_epochs,
              validation_data=(x_test, y_test),
              verbose=1)

    # Evaluate the accuracy on legitimate and adversarial test examples
    _, acc, adv_acc = model.evaluate(x_test, y_test,
                                     batch_size=batch_size,
                                     verbose=0)
    report.clean_train_clean_eval = acc
    report.clean_train_adv_eval = adv_acc
    print('Test accuracy on legitimate examples: %0.4f' % acc)
    print('Test accuracy on adversarial examples: %0.4f\n' % adv_acc)

    # Calculate training error
    if testing:
        _, train_acc, train_adv_acc = model.evaluate(x_train, y_train,
                                                     batch_size=batch_size,
                                                     verbose=0)
        report.train_clean_train_clean_eval = train_acc
        report.train_clean_train_adv_eval = train_adv_acc

    print("Repeating the process, using adversarial training")
    # Redefine Keras model
    model_2 = ConvNet((n_rows, n_cols, n_channels), n_classes)
    model_2(model_2.input)
    wrap_2 = KerasModelWrapper(model_2)
    fgsm_2 = FastGradientMethod(wrap_2, sess=sess)

    # Use a loss function based on legitimate and adversarial examples
    adv_loss_2 = get_adversarial_loss(model_2, fgsm_2, fgsm_params)
    adv_acc_metric_2 = get_adversarial_acc_metric(model_2, fgsm_2, fgsm_params)
    model_2.compile(
        optimizer=keras.optimizers.Adam(learning_rate),
        loss=adv_loss_2,
        metrics=['accuracy', adv_acc_metric_2]
    )

    # Train an MNIST model
    model_2.fit(x_train, y_train,
                batch_size=batch_size,
                epochs=num_epochs,
                validation_data=(x_test, y_test),
                verbose=1)

    # Evaluate the accuracy on legitimate and adversarial test examples
    _, acc, adv_acc = model_2.evaluate(x_test, y_test,
                                       batch_size=batch_size,
                                       verbose=0)
    report.adv_train_clean_eval = acc
    report.adv_train_adv_eval = adv_acc
    print('Test accuracy on legitimate examples: %0.4f' % acc)
    print('Test accuracy on adversarial examples: %0.4f\n' % adv_acc)

    # Calculate training error
    if testing:
        _, train_acc, train_adv_acc = model_2.evaluate(x_train, y_train,
                                                       batch_size=batch_size,
                                                       verbose=0)
        report.train_adv_train_clean_eval = train_acc
        report.train_adv_train_adv_eval = train_adv_acc

    return report
コード例 #5
0
def mnist_tutorial_fgsm(train_start=0, train_end=60000, test_start=0,
                      test_end=10000, viz_enabled=VIZ_ENABLED,
                      nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                      source_samples=SOURCE_SAMPLES,
                      learning_rate=LEARNING_RATE,
                      attack_iterations=ATTACK_ITERATIONS,
                      model_path=MODEL_PATH,
                      targeted=TARGETED,
                      noise_output=NOISE_OUTPUT):
  """
  MNIST tutorial for Fast Gradient Method's attack
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param viz_enabled: (boolean) activate plots of adversarial examples
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param nb_classes: number of output classes
  :param source_samples: number of test inputs to attack
  :param learning_rate: learning rate for training
  :param model_path: path to the model file
  :param targeted: should we run a targeted attack? or untargeted?
  :return: an AccuracyReport object
  """
  # Object used to keep track of (and return) key accuracies
  report = AccuracyReport()

  # Set TF random seed to improve reproducibility
  tf.set_random_seed(1234)

  # Create TF session
  sess = tf.Session()
  print("Created TensorFlow session.")

  set_log_level(logging.DEBUG)

  # Get MNIST test data
  mnist = MNIST(train_start=train_start, train_end=train_end,
                test_start=test_start, test_end=test_end)
  x_train, y_train = mnist.get_set('train')
  x_test, y_test = mnist.get_set('test')

  # Obtain Image Parameters
  img_rows, img_cols, nchannels = x_train.shape[1:4]
  nb_classes = y_train.shape[1]

  # Define input TF placeholder
  x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
  y = tf.placeholder(tf.float32, shape=(None, nb_classes))
  nb_filters = 64

  # Define TF model graph
  model = ModelBasicCNN('model1', nb_classes, nb_filters)
  preds = model.get_logits(x)
  loss = CrossEntropy(model, smoothing=0.1)
  print("Defined TensorFlow model graph.")

  ###########################################################################
  # Training the model using TensorFlow
  ###########################################################################

  # Train an MNIST model
  train_params = {
      'nb_epochs': nb_epochs,
      'batch_size': batch_size,
      'learning_rate': learning_rate,
      'filename': os.path.split(model_path)[-1]
  }

  rng = np.random.RandomState([2017, 8, 30])
  # check if we've trained before, and if we have, use that pre-trained model
  if os.path.exists(model_path + ".meta"):
    tf_model_load(sess, model_path)
  else:
    train(sess, loss, x_train, y_train, args=train_params, rng=rng)
    saver = tf.train.Saver()
    saver.save(sess, model_path)

  # Evaluate the accuracy of the MNIST model on legitimate test examples
  eval_params = {'batch_size': batch_size}
  accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
  assert x_test.shape[0] == test_end - test_start, x_test.shape
  print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
  report.clean_train_clean_eval = accuracy

  ###########################################################################
  # Craft adversarial examples using Carlini and Wagner's approach
  ###########################################################################
  nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
  print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
        ' adversarial examples')
  print("This could take some time ...")

  # Instantiate a FGSM attack object
  fgsm = FastGradientMethod(model, sess=sess)

  if viz_enabled:
    assert source_samples == nb_classes
    idxs = [np.where(np.argmax(y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)]
  if targeted:
    if viz_enabled:
      # Initialize our array for grid visualization
      grid_shape = (nb_classes, 1, img_rows, img_cols,
                    nchannels)
      grid_viz_data = np.zeros(grid_shape, dtype='f')

      adv_inputs = np.array(
          [[instance] * nb_classes for instance in x_test[idxs]],
          dtype=np.float32)
    else:
      adv_inputs = np.array(
          [[instance] * nb_classes for
           instance in x_test[:source_samples]], dtype=np.float32)

    one_hot = np.zeros((nb_classes, nb_classes))
    one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

    adv_inputs = adv_inputs.reshape(
        (source_samples * nb_classes, img_rows, img_cols, nchannels))
    adv_ys = np.array([one_hot] * source_samples,
                      dtype=np.float32).reshape((source_samples *
                                                 nb_classes, nb_classes))
    yname = "y_target"
  else:
    if viz_enabled:
      # Initialize our array for grid visualization
      grid_shape = (nb_classes, nb_classes, img_rows, img_cols, nchannels)
      grid_viz_data = np.zeros(grid_shape, dtype='f')

      adv_inputs = x_test[idxs]
    else:
      adv_inputs = x_test[:source_samples]

    adv_ys = None
    yname = "y"

  if targeted:
    fgsm_params_batch_size = source_samples * nb_classes
  else:
    fgsm_params_batch_size = source_samples
  fgsm_params = {'eps': 0.3,
                 'clip_min': 0.,
                 'clip_max': 1.}

  adv = fgsm.generate_np(adv_inputs,
                       **fgsm_params)

  eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
  if targeted:
    adv_accuracy = model_eval(
        sess, x, y, preds, adv, adv_ys, args=eval_params)
  else:
    if viz_enabled:
      err = model_eval(sess, x, y, preds, adv, y_test[idxs], args=eval_params)
      adv_accuracy = 1 - err
    else:
      err = model_eval(sess, x, y, preds, adv, y_test[:source_samples],
                       args=eval_params)
      adv_accuracy = 1 - err

  if viz_enabled:
    for i in range(nb_classes):
      if noise_output:
        image = adv[i * nb_classes] - adv_inputs[i * nb_classes]
      else:
        image = adv[i * nb_classes]
      grid_viz_data[i, 0] = image

  print('--------------------------------------')

  # Compute the number of adversarial examples that were successfully found
  print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy))
  report.clean_train_adv_eval = 1. - adv_accuracy

  # Compute the average distortion introduced by the algorithm
  percent_perturbed = np.mean(np.sum((adv - adv_inputs)**2,
                                     axis=(1, 2, 3))**.5)
  print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))


  ###########################################################################
  # Adversarial Training
  ###########################################################################

  model2 = ModelBasicCNN('model2', nb_classes, nb_filters) 
  
  fgsm2 = FastGradientMethod(model2, sess=sess)

  def attack_fgsm(x):
    return fgsm2.generate(adv_inputs, **fgsm_params)

  preds2 = model2.get_logits(x)
  loss2 = CrossEntropy(model2, smoothing=0.1, attack=attack_fgsm)

  train(sess, loss2, x_train, y_train, args=train_params, rng=rng)
  eval_params = {'batch_size': batch_size}
  accuracy = model_eval(sess, x, y, preds2, x_test, y_test, args=eval_params)
  assert x_test.shape[0] == test_end - test_start, x_test.shape
  print('Test accuracy on adversarial fgsm test examples: {0}'.format(accuracy))
  report.clean_train_clean_eval = accuracy
  print("Defined TensorFlow model graph.")

  eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
  if targeted:
    adv_accuracy = model_eval(
        sess, x, y, preds, adv, adv_ys, args=eval_params)
  else:
    if viz_enabled:
      err = model_eval(sess, x, y, preds, adv, y_test[idxs], args=eval_params)
      adv_accuracy = 1 - err
    else:
      err = model_eval(sess, x, y, preds, adv, y_test[:source_samples],
                       args=eval_params)
      adv_accuracy = 1 - err

  if viz_enabled:
    for i in range(nb_classes):
      if noise_output:
        image = adv[i * nb_classes] - adv_inputs[i * nb_classes]
      else:
        image = adv[i * nb_classes]
      grid_viz_data[i, 0] = image

  print('--------------------------------------')

  # Compute the number of adversarial examples that were successfully found
  print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy))
  report.clean_train_adv_eval = 1. - adv_accuracy

  # Compute the average distortion introduced by the algorithm
  percent_perturbed = np.mean(np.sum((adv - adv_inputs)**2,
                                     axis=(1, 2, 3))**.5)
  print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

  # Close TF session
  sess.close()
  def save_visual(data, path):
    """
    Modified version of cleverhans.plot.pyplot
    """
    figure = plt.figure()
    # figure.canvas.set_window_title('Cleverhans: Grid Visualization')

    # Add the images to the plot
    num_cols = data.shape[0]
    num_rows = data.shape[1]
    num_channels = data.shape[4]
    for y in range(num_rows):
      for x in range(num_cols):
        figure.add_subplot(num_rows, num_cols, (x + 1) + (y * num_cols))
        plt.axis('off')

        if num_channels == 1:
          plt.imshow(data[x, y, :, :, 0], cmap='gray')
        else:
          plt.imshow(data[x, y, :, :, :])

    # Draw the plot and return
    plt.savefig(path)
    return figure

  # Finally, block & display a grid of all the adversarial examples
  if viz_enabled:
    # _ = grid_visual(grid_viz_data)
    # cleverhans_image.save("output", grid_viz_data)
    if noise_output:
      image_name = "output/fgsm_mnist_noise.png"
    else:
      image_name = "output/fgsm_mnist.png"
    _ = save_visual(grid_viz_data, image_name)

  return report
コード例 #6
0
    def test_run_single_gpu_fgsm(self):
        """
    Test the basic single GPU performance by comparing to the FGSM
    tutorial.
    """
        from cleverhans_tutorials import mnist_tutorial_tf

        # Run the MNIST tutorial on a dataset of reduced size
        flags = {
            'train_start': 0,
            'train_end': 5000,
            'test_start': 0,
            'test_end': 333,
            'nb_epochs': 5,
            'testing': True
        }
        report = mnist_tutorial_tf.mnist_tutorial(**flags)

        # Run the multi-gpu trainer for clean training
        flags.update({
            'batch_size': 128,
            'adam_lrn': 0.001,
            'dataset': 'mnist',
            'only_adv_train': False,
            'eval_iters': 1,
            'ngpu': 1,
            'fast_tests': False,
            'attack_type_train': '',
            'save_dir': None,
            'save_steps': 10000,
            'attack_nb_iter_train': None,
            'save': False,
            'model_type': 'basic',
            'attack_type_test': 'FGSM'
        })

        flags.update({'adv_train': False})
        HParams = namedtuple('HParams', flags.keys())

        hparams = HParams(**flags)
        np.random.seed(42)
        tf.compat.v1.set_random_seed(42)
        with tf.compat.v1.variable_scope(None, 'runner'):
            report_dict = run_trainer(hparams)
        report_2 = AccuracyReport()
        report_2.train_clean_train_clean_eval = report_dict['train']
        report_2.clean_train_clean_eval = report_dict['test']
        report_2.clean_train_adv_eval = report_dict['FGSM']

        # Run the multi-gpu trainer for adversarial training
        flags.update({'adv_train': True, 'attack_type_train': 'FGSM'})
        HParams = namedtuple('HParams', flags.keys())

        hparams = HParams(**flags)
        np.random.seed(42)
        tf.compat.v1.set_random_seed(42)
        with tf.compat.v1.variable_scope(None, 'runner'):
            report_dict = run_trainer(hparams)
        report_2.train_adv_train_clean_eval = report_dict['train']
        report_2.adv_train_clean_eval = report_dict['test']
        report_2.adv_train_adv_eval = report_dict['FGSM']

        self.assertClose(report.train_clean_train_clean_eval,
                         report_2.train_clean_train_clean_eval,
                         atol=5e-2)
        self.assertClose(report.clean_train_clean_eval,
                         report_2.clean_train_clean_eval,
                         atol=2e-2)
        self.assertClose(report.clean_train_adv_eval,
                         report_2.clean_train_adv_eval,
                         atol=5e-2)
        self.assertClose(report.train_adv_train_clean_eval,
                         report_2.train_adv_train_clean_eval,
                         atol=1e-1)
        self.assertClose(report.adv_train_clean_eval,
                         report_2.adv_train_clean_eval,
                         atol=2e-2)
        self.assertClose(report.adv_train_adv_eval,
                         report_2.adv_train_adv_eval,
                         atol=1e-1)
コード例 #7
0
ファイル: detector.py プロジェクト: newwhitecheng/cleverhans
def mnist_tutorial_cw(train_start=0,
                      train_end=60000,
                      test_start=0,
                      test_end=10000,
                      viz_enabled=False,
                      nb_epochs=6,
                      batch_size=128,
                      nb_classes=10,
                      source_samples=1,
                      learning_rate=0.001,
                      attack_iterations=100,
                      model_path=os.path.join("models", "mnist"),
                      targeted=True):
    """
    MNIST tutorial for Carlini and Wagner's attack
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param source_samples: number of test inputs to attack
    :param learning_rate: learning rate for training
    :param model_path: path to the model file
    :param targeted: should we run a targeted attack? or untargeted?
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()
    file = read_mat_file(filename)
    label = file["label"]
    data = file["data"]
    #data[data>1]= 1
    #data[data<0]= 0
    adv_data = data[10000:80000, :, :, :]
    cw = adv_data[0::7, :, :, :]
    fgsm01 = adv_data[1::7, :, :, :]
    fgsm03 = adv_data[2::7, :, :, :]
    fgsm05 = adv_data[3::7, :, :, :]
    gaussian01 = adv_data[4::7, :, :, :]
    gaussian03 = adv_data[5::7, :, :, :]
    gaussian05 = adv_data[6::7, :, :, :]

    # MNIST-specific dimensions
    img_rows = 28
    img_cols = 28
    channels = 1

    # Set TF random seed to improve reproducibility
    #tf.set_random_seed(1234)

    # Create TF session
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    #config.log_device_placement=True
    sess = tf.Session(config=config)
    print("Created TensorFlow session.")
    set_log_level(logging.DEBUG)

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, channels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Define TF model graph
    model = make_basic_cnn()
    preds = model(x)
    print("Defined TensorFlow model graph.")
    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': os.path.join(*os.path.split(model_path)[:-1]),
        'filename': os.path.split(model_path)[-1]
    }

    rng = np.random.RandomState([2017, 8, 30])
    # check if we've trained before, and if we have, use that pre-trained model
    if os.path.exists(model_path + ".meta"):
        tf_model_load(sess, model_path)
    else:
        model_train(sess,
                    x,
                    y,
                    preds,
                    X_train,
                    Y_train,
                    args=train_params,
                    save=os.path.exists("models"),
                    rng=rng)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    pdb.set_trace()
    accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params)
    assert X_test.shape[0] == test_end - test_start, X_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy
コード例 #8
0
def mnist_tutorial_cw(
    train_start=0,
    train_end=60000,
    test_start=0,
    test_end=10000,
    viz_enabled=VIZ_ENABLED,
    nb_epochs=NB_EPOCHS,
    batch_size=BATCH_SIZE,
    source_samples=SOURCE_SAMPLES,
    learning_rate=LEARNING_RATE,
    attack_iterations=ATTACK_ITERATIONS,
    model_path=MODEL_PATH,
    targeted=TARGETED,
):
    """
    MNIST tutorial for Carlini and Wagner's attack
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param source_samples: number of test inputs to attack
    :param learning_rate: learning rate for training
    :param model_path: path to the model file
    :param targeted: should we run a targeted attack? or untargeted?
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    mnist = MNIST(
        train_start=train_start,
        train_end=train_end,
        test_start=test_start,
        test_end=test_end,
    )
    x_train, y_train = mnist.get_set("train")
    x_test, y_test = mnist.get_set("test")

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    nb_filters = 64

    # Define TF model graph
    model = ModelBasicCNN("model1", nb_classes, nb_filters)
    preds = model.get_logits(x)
    loss = CrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        "nb_epochs": nb_epochs,
        "batch_size": batch_size,
        "learning_rate": learning_rate,
        "filename": os.path.split(model_path)[-1],
    }

    rng = np.random.RandomState([2017, 8, 30])
    # check if we've trained before, and if we have, use that pre-trained model
    if os.path.exists(model_path + ".meta"):
        tf_model_load(sess, model_path)
    else:
        train(sess, loss, x_train, y_train, args=train_params, rng=rng)
        saver = tf.train.Saver()
        saver.save(sess, model_path)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {"batch_size": batch_size}
    accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print("Test accuracy on legitimate test examples: {0}".format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using Carlini and Wagner's approach
    ###########################################################################
    nb_adv_per_sample = str(nb_classes - 1) if targeted else "1"
    print(
        "Crafting "
        + str(source_samples)
        + " * "
        + nb_adv_per_sample
        + " adversarial examples"
    )
    print("This could take some time ...")

    # Instantiate a CW attack object
    cw = CarliniWagnerL2(model, sess=sess)

    if viz_enabled:
        assert source_samples == nb_classes
        idxs = [
            np.where(np.argmax(y_test, axis=1) == i)[0][0] for i in range(nb_classes)
        ]
    if targeted:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, nb_classes, img_rows, img_cols, nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype="f")

            adv_inputs = np.array(
                [[instance] * nb_classes for instance in x_test[idxs]], dtype=np.float32
            )
        else:
            adv_inputs = np.array(
                [[instance] * nb_classes for instance in x_test[:source_samples]],
                dtype=np.float32,
            )

        one_hot = np.zeros((nb_classes, nb_classes))
        one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

        adv_inputs = adv_inputs.reshape(
            (source_samples * nb_classes, img_rows, img_cols, nchannels)
        )
        adv_ys = np.array([one_hot] * source_samples, dtype=np.float32).reshape(
            (source_samples * nb_classes, nb_classes)
        )
        yname = "y_target"
    else:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, 2, img_rows, img_cols, nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype="f")

            adv_inputs = x_test[idxs]
        else:
            adv_inputs = x_test[:source_samples]

        adv_ys = None
        yname = "y"

    if targeted:
        cw_params_batch_size = source_samples * nb_classes
    else:
        cw_params_batch_size = source_samples
    cw_params = {
        "binary_search_steps": 1,
        yname: adv_ys,
        "max_iterations": attack_iterations,
        "learning_rate": CW_LEARNING_RATE,
        "batch_size": cw_params_batch_size,
        "initial_const": 10,
    }

    adv = cw.generate_np(adv_inputs, **cw_params)

    eval_params = {"batch_size": np.minimum(nb_classes, source_samples)}
    if targeted:
        adv_accuracy = model_eval(sess, x, y, preds, adv, adv_ys, args=eval_params)
    else:
        if viz_enabled:
            err = model_eval(sess, x, y, preds, adv, y_test[idxs], args=eval_params)
            adv_accuracy = 1 - err
        else:
            err = model_eval(
                sess, x, y, preds, adv, y_test[:source_samples], args=eval_params
            )
            adv_accuracy = 1 - err

    if viz_enabled:
        for j in range(nb_classes):
            if targeted:
                for i in range(nb_classes):
                    grid_viz_data[i, j] = adv[i * nb_classes + j]
            else:
                grid_viz_data[j, 0] = adv_inputs[j]
                grid_viz_data[j, 1] = adv[j]

        print(grid_viz_data.shape)

    print("--------------------------------------")

    # Compute the number of adversarial examples that were successfully found
    print("Avg. rate of successful adv. examples {0:.4f}".format(adv_accuracy))
    report.clean_train_adv_eval = 1.0 - adv_accuracy

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(np.sum((adv - adv_inputs) ** 2, axis=(1, 2, 3)) ** 0.5)
    print("Avg. L_2 norm of perturbations {0:.4f}".format(percent_perturbed))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        _ = grid_visual(grid_viz_data)

    return report
コード例 #9
0
def mnist_attack(train_start=0,
                 train_end=60000,
                 test_start=0,
                 test_end=10000,
                 viz_enabled=True,
                 nb_epochs=6,
                 batch_size=128,
                 nb_filters=64,
                 nb_samples=10,
                 learning_rate=0.001,
                 eps=0.3,
                 attack=0,
                 attack_iterations=100,
                 model_path=None,
                 targeted=False,
                 binary=False,
                 scale=False,
                 rand=False,
                 debug=None,
                 test=False,
                 data_dir=None,
                 delay=0,
                 adv=0,
                 nb_iter=40):
    """
    MNIST tutorial for generic attack
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param nb_samples: number of test inputs to attack
    :param learning_rate: learning rate for training
    :param model_path: path to the model file
    :param targeted: should we run a targeted attack? or untargeted?
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # MNIST-specific dimensions
    img_rows = 28
    img_cols = 28
    channels = 1
    nb_classes = 10

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1237)

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    if debug:
        set_log_level(logging.DEBUG)
    else:
        set_log_level(logging.WARNING)  # for running on sharcnet

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(datadir=data_dir,
                                                  train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, channels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    phase = tf.placeholder(tf.bool, name='phase')

    # for attempting to break unscaled network.
    logits_scalar = tf.placeholder_with_default(INIT_T,
                                                shape=(),
                                                name="logits_temperature")

    save = False
    train_from_scratch = False
    if model_path is not None:
        if os.path.exists(model_path):
            # check for existing model in immediate subfolder
            if any(f.endswith('.meta') for f in os.listdir(model_path)):
                binary, scale, nb_filters, batch_size, learning_rate, nb_epochs, adv = parse_model_settings(
                    model_path)
                train_from_scratch = False
            else:
                model_path = build_model_save_path(model_path, binary,
                                                   batch_size, nb_filters,
                                                   learning_rate, nb_epochs,
                                                   adv, delay, scale)
                print(model_path)
                save = True
                train_from_scratch = True
    else:
        train_from_scratch = True  # train from scratch, but don't save since no path given

    # Define TF model graph
    if binary:
        print('binary=True')
        if scale:
            print('scale=True')
            if rand:
                print('rand=True')
                from cleverhans_tutorials.tutorial_models import make_scaled_binary_rand_cnn
                model = make_scaled_binary_rand_cnn(
                    phase,
                    logits_scalar,
                    'binsc_',
                    input_shape=(None, img_rows, img_cols, channels),
                    nb_filters=nb_filters)
            else:
                from cleverhans_tutorials.tutorial_models import make_scaled_binary_cnn
                model = make_scaled_binary_cnn(phase,
                                               logits_scalar,
                                               'binsc_',
                                               input_shape=(None, img_rows,
                                                            img_cols,
                                                            channels),
                                               nb_filters=nb_filters)
        else:
            from cleverhans_tutorials.tutorial_models import make_basic_binary_cnn
            model = make_basic_binary_cnn(phase,
                                          logits_scalar,
                                          'bin_',
                                          nb_filters=nb_filters)
    else:
        if rand:
            print('rand=True')
            from cleverhans_tutorials.tutorial_models import make_scaled_rand_cnn
            model = make_scaled_rand_cnn(phase,
                                         logits_scalar,
                                         'fp_rand',
                                         nb_filters=nb_filters)
        else:
            from cleverhans_tutorials.tutorial_models import make_basic_cnn
            model = make_basic_cnn(phase,
                                   logits_scalar,
                                   'fp_',
                                   nb_filters=nb_filters)

    preds = model(x, reuse=False)  # * logits_scalar
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################
    rng = np.random.RandomState([2017, 8, 30])

    # Train an MNIST model
    train_params = {
        'binary': binary,
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'loss_name': 'train loss',
        'filename': 'model',
        'reuse_global_step': False,
        'train_scope': 'train',
        'is_training': True
    }

    if adv != 0:
        if adv == ADVERSARIAL_TRAINING_MADRYETAL:
            from cleverhans.attacks import MadryEtAl
            train_attack_params = {
                'eps': MAX_EPS,
                'eps_iter': 0.01,
                'nb_iter': nb_iter
            }
            train_attacker = MadryEtAl(model, sess=sess)

        elif adv == ADVERSARIAL_TRAINING_FGSM:
            from cleverhans.attacks import FastGradientMethod
            stddev = int(np.ceil((MAX_EPS * 255) // 2))
            train_attack_params = {
                'eps':
                tf.abs(
                    tf.truncated_normal(shape=(batch_size, 1, 1, 1),
                                        mean=0,
                                        stddev=stddev))
            }
            train_attacker = FastGradientMethod(model, back='tf', sess=sess)
        # create the adversarial trainer
        train_attack_params.update({'clip_min': 0., 'clip_max': 1.})
        adv_x_train = train_attacker.generate(x, phase, **train_attack_params)
        preds_adv_train = model.get_probs(adv_x_train)

        eval_attack_params = {'eps': MAX_EPS, 'clip_min': 0., 'clip_max': 1.}
        adv_x_eval = train_attacker.generate(x, phase, **eval_attack_params)
        preds_adv_eval = model.get_probs(adv_x_eval)  # * logits_scalar

    def evaluate():
        # Evaluate the accuracy of the MNIST model on clean test examples
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess,
                         x,
                         y,
                         preds,
                         X_test,
                         Y_test,
                         phase=phase,
                         args=eval_params)
        report.clean_train_clean_eval = acc
        assert X_test.shape[0] == test_end - test_start, X_test.shape
        print('Test accuracy on legitimate examples: %0.4f' % acc)

        if adv != 0:
            # Accuracy of the adversarially trained model on adversarial
            # examples
            acc = model_eval(sess,
                             x,
                             y,
                             preds_adv_eval,
                             X_test,
                             Y_test,
                             phase=phase,
                             args=eval_params)
            print('Test accuracy on adversarial examples: %0.4f' % acc)

            acc = model_eval(sess,
                             x,
                             y,
                             preds_adv_eval,
                             X_test,
                             Y_test,
                             phase=phase,
                             args=eval_params,
                             feed={logits_scalar: ATTACK_T})
            print('Test accuracy on adversarial examples (scaled): %0.4f' %
                  acc)

    if train_from_scratch:
        if save:
            train_params.update({'log_dir': model_path})
            if adv and delay > 0:
                train_params.update({'nb_epochs': delay})

        # do clean training for 'nb_epochs' or 'delay' epochs
        if test:
            model_train(sess,
                        x,
                        y,
                        preds,
                        X_train,
                        Y_train,
                        phase=phase,
                        evaluate=evaluate,
                        args=train_params,
                        save=save,
                        rng=rng)
        else:
            model_train(sess,
                        x,
                        y,
                        preds,
                        X_train,
                        Y_train,
                        phase=phase,
                        args=train_params,
                        save=save,
                        rng=rng)

        # optionally do additional adversarial training
        if adv:
            print("Adversarial training for %d epochs" % (nb_epochs - delay))
            train_params.update({'nb_epochs': nb_epochs - delay})
            train_params.update({'reuse_global_step': True})
            if test:
                model_train(sess,
                            x,
                            y,
                            preds,
                            X_train,
                            Y_train,
                            phase=phase,
                            predictions_adv=preds_adv_train,
                            evaluate=evaluate,
                            args=train_params,
                            save=save,
                            rng=rng)
            else:
                model_train(sess,
                            x,
                            y,
                            preds,
                            X_train,
                            Y_train,
                            phase=phase,
                            predictions_adv=preds_adv_train,
                            args=train_params,
                            save=save,
                            rng=rng)
    else:
        tf_model_load(sess, model_path)
        print('Restored model from %s' % model_path)
        evaluate()

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess,
                          x,
                          y,
                          preds,
                          X_test,
                          Y_test,
                          phase=phase,
                          feed={phase: False},
                          args=eval_params)
    assert X_test.shape[0] == test_end - test_start, X_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Build dataset
    ###########################################################################
    if viz_enabled:
        assert nb_samples == nb_classes
        idxs = [
            np.where(np.argmax(Y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)
        ]
        viz_rows = nb_classes if targeted else 2
        # Initialize our array for grid visualization
        grid_shape = (nb_classes, viz_rows, img_rows, img_cols, channels)
        grid_viz_data = np.zeros(grid_shape, dtype='f')

    if targeted:
        from cleverhans.utils import build_targeted_dataset
        if viz_enabled:
            from cleverhans.utils import grid_visual
            adv_inputs, true_labels, adv_ys = build_targeted_dataset(
                X_test, Y_test, idxs, nb_classes, img_rows, img_cols, channels)
        else:
            adv_inputs, true_labels, adv_ys = build_targeted_dataset(
                X_test, Y_test, np.arange(nb_samples), nb_classes, img_rows,
                img_cols, channels)
    else:
        if viz_enabled:
            from cleverhans.utils import pair_visual
            adv_inputs = X_test[idxs]
        else:
            adv_inputs = X_test[:nb_samples]

    ###########################################################################
    # Craft adversarial examples using generic approach
    ###########################################################################
    if targeted:
        att_batch_size = np.clip(nb_samples * (nb_classes - 1),
                                 a_max=MAX_BATCH_SIZE,
                                 a_min=1)
        nb_adv_per_sample = nb_classes - 1
        yname = "y_target"

    else:
        att_batch_size = np.minimum(nb_samples, MAX_BATCH_SIZE)
        nb_adv_per_sample = 1
        adv_ys = None
        yname = "y"

    print('Crafting ' + str(nb_samples) + ' * ' + str(nb_adv_per_sample) +
          ' adversarial examples')
    print("This could take some time ...")

    if attack == ATTACK_CARLINI_WAGNER_L2:
        print('Attack: CarliniWagnerL2')
        from cleverhans.attacks import CarliniWagnerL2
        attacker = CarliniWagnerL2(model, back='tf', sess=sess)
        attack_params = {
            'binary_search_steps': 1,
            'max_iterations': attack_iterations,
            'learning_rate': 0.1,
            'batch_size': att_batch_size,
            'initial_const': 10,
        }
    elif attack == ATTACK_JSMA:
        print('Attack: SaliencyMapMethod')
        from cleverhans.attacks import SaliencyMapMethod
        attacker = SaliencyMapMethod(model, back='tf', sess=sess)
        attack_params = {'theta': 1., 'gamma': 0.1}
    elif attack == ATTACK_FGSM:
        print('Attack: FastGradientMethod')
        from cleverhans.attacks import FastGradientMethod
        attacker = FastGradientMethod(model, back='tf', sess=sess)
        attack_params = {'eps': eps}
    elif attack == ATTACK_MADRYETAL:
        print('Attack: MadryEtAl')
        from cleverhans.attacks import MadryEtAl
        attacker = MadryEtAl(model, back='tf', sess=sess)
        attack_params = {'eps': eps, 'eps_iter': 0.01, 'nb_iter': nb_iter}
    elif attack == ATTACK_BASICITER:
        print('Attack: BasicIterativeMethod')
        from cleverhans.attacks import BasicIterativeMethod
        attacker = BasicIterativeMethod(model, back='tf', sess=sess)
        attack_params = {'eps': eps, 'eps_iter': 0.01, 'nb_iter': nb_iter}
    else:
        print("Attack undefined")
        sys.exit(1)

    attack_params.update({yname: adv_ys, 'clip_min': 0., 'clip_max': 1.})
    adv_np = attacker.generate_np(adv_inputs, phase, **attack_params)
    '''
    name = 'm_fgsm_eps%s_n%s.npy' % (eps, nb_samples)
    fpath = os.path.join(
        '/scratch/gallowaa/mnist/adversarial_examples/cleverhans/', name)
    np.savez(fpath, x=adv_np, y=Y_test[:nb_samples])
    '''
    '''
    adv_x = attacker.generate(x, phase, **attack_params)
    adv_np, = batch_eval(sess, [x], [adv_x], [adv_inputs], feed={
                         phase: False}, args=eval_params)
    '''
    eval_params = {'batch_size': att_batch_size}
    if targeted:
        print("Evaluating targeted results")
        adv_accuracy = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  adv_np,
                                  true_labels,
                                  phase=phase,
                                  args=eval_params)

    else:
        print("Evaluating untargeted results")
        if viz_enabled:
            adv_accuracy = model_eval(sess,
                                      x,
                                      y,
                                      preds,
                                      adv_np,
                                      Y_test[idxs],
                                      phase=phase,
                                      args=eval_params)
        else:
            adv_accuracy = model_eval(sess,
                                      x,
                                      y,
                                      preds,
                                      adv_np,
                                      Y_test[:nb_samples],
                                      phase=phase,
                                      args=eval_params)

    if viz_enabled:
        n = nb_classes - 1
        for i in range(nb_classes):
            if targeted:
                for j in range(nb_classes):
                    if i != j:
                        if j != 0 and i != n:
                            grid_viz_data[i, j] = adv_np[j * n + i]
                        if j == 0 and i > 0 or i == n and j > 0:
                            grid_viz_data[i, j] = adv_np[j * n + i - 1]
                    else:
                        grid_viz_data[i, j] = adv_inputs[j * n]
            else:
                grid_viz_data[j, 0] = adv_inputs[j]
                grid_viz_data[j, 1] = adv_np[j]
        print(grid_viz_data.shape)

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    print('Test accuracy on adversarial examples {0:.4f}'.format(adv_accuracy))
    report.clean_train_adv_eval = 1. - adv_accuracy

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(
        np.sum((adv_np - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Compute number of modified features (L_0 norm)
    nb_changed = np.where(adv_np != adv_inputs)[0].shape[0]
    percent_perturb = np.mean(float(nb_changed) / adv_np.reshape(-1).shape[0])

    # Compute the average distortion introduced by the algorithm
    print('Avg. rate of perturbed features {0:.4f}'.format(percent_perturb))

    # Friendly output for pasting into spreadsheet
    print('{0:.4f}'.format(accuracy))
    print('{0:.4f}'.format(adv_accuracy))
    print('{0:.4f}'.format(percent_perturbed))
    print('{0:.4f}'.format(percent_perturb))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        import matplotlib.pyplot as plt
        _ = grid_visual(grid_viz_data)

    return report
コード例 #10
0
def mnist_tutorial(nb_epochs=6, batch_size=128, train_end=-1, test_end=-1,
                   learning_rate=0.001):
    """
    MNIST cleverhans tutorial
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :return: an AccuracyReport object
    """
    # Train a pytorch MNIST model
    torch_model = PytorchMnistModel()
    if torch.cuda.is_available():
        torch_model = torch_model.cuda()
    report = AccuracyReport()

    train_loader = torch.utils.data.DataLoader(
        datasets.MNIST('data', train=True, download=True,
                       transform=transforms.ToTensor()),
        batch_size=batch_size, shuffle=True)
    test_loader = torch.utils.data.DataLoader(
        datasets.MNIST('data', train=False, transform=transforms.ToTensor()),
        batch_size=batch_size)

    # Truncate the datasets so that our test run more quickly
    train_loader.dataset.train_data = train_loader.dataset.train_data[
                                      :train_end]
    test_loader.dataset.test_data = test_loader.dataset.test_data[:test_end]

    # Train our model
    optimizer = optim.Adam(torch_model.parameters(), lr=learning_rate)
    train_loss = []

    total = 0
    correct = 0
    step = 0
    for epoch in range(nb_epochs):
        for xs, ys in train_loader:
            xs, ys = Variable(xs), Variable(ys)
            if torch.cuda.is_available():
                xs, ys = xs.cuda(), ys.cuda()
            optimizer.zero_grad()
            preds = torch_model(xs)
            loss = F.nll_loss(preds, ys)
            loss.backward()  # calc gradients
            train_loss.append(loss.data.item())
            optimizer.step()  # update gradients

            preds_np = preds.data.cpu().numpy()
            correct += (np.argmax(preds_np, axis=1) == ys).sum()
            total += len(xs)
            step += 1
            if total % 1000 == 0:
                acc = float(correct) / total
                print('[%s] Training accuracy: %.2f%%' % (step, acc * 100))
                total = 0
                correct = 0

    # Evaluate on clean data
    total = 0
    correct = 0
    for xs, ys in test_loader:
        xs, ys = Variable(xs), Variable(ys)
        if torch.cuda.is_available():
            xs, ys = xs.cuda(), ys.cuda()

        preds = torch_model(xs)
        preds_np = preds.data.cpu().numpy()

        correct += (np.argmax(preds_np, axis=1) == ys).sum()
        total += len(xs)

    acc = float(correct) / total
    report.clean_train_clean_eval = acc
    print('[%s] Clean accuracy: %.2f%%' % (step, acc * 100))

    # We use tf for evaluation on adversarial data
    sess = tf.Session()
    x_op = tf.placeholder(tf.float32, shape=(None, 1, 28, 28,))

    # Convert pytorch model to a tf_model and wrap it in cleverhans
    tf_model_fn = convert_pytorch_model_to_tf(torch_model)
    cleverhans_model = CallableModelWrapper(tf_model_fn, output_layer='logits')

    # Create an FGSM attack
    fgsm_op = FastGradientMethod(cleverhans_model, sess=sess)
    fgsm_params = {'eps': 0.3,
                   'clip_min': 0.,
                   'clip_max': 1.}
    adv_x_op = fgsm_op.generate(x_op, **fgsm_params)
    adv_preds_op = tf_model_fn(adv_x_op)

    # Run an evaluation of our model against fgsm
    total = 0
    correct = 0
    for xs, ys in test_loader:
        adv_preds = sess.run(adv_preds_op, feed_dict={x_op: xs})
        correct += (np.argmax(adv_preds, axis=1) == ys).sum()
        total += len(xs)

    acc = float(correct) / total
    print('Adv accuracy: {:.3f}'.format(acc * 100))
    report.clean_train_adv_eval = acc
    return report
コード例 #11
0
def mnist_tutorial_cw(train_start=0,
                      train_end=60000,
                      test_start=0,
                      test_end=10000,
                      viz_enabled=True,
                      nb_epochs=6,
                      batch_size=128,
                      nb_classes=10,
                      source_samples=10,
                      learning_rate=0.001,
                      attack_iterations=100,
                      model_path=os.path.join("models", "mnist"),
                      targeted=True):
    """
    MNIST tutorial for Carlini and Wagner's attack
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param source_samples: number of test inputs to attack
    :param learning_rate: learning rate for training
    :param model_path: path to the model file
    :param targeted: should we run a targeted attack? or untargeted?
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # MNIST-specific dimensions
    img_rows = 28
    img_cols = 28
    channels = 1

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, channels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Define TF model graph
    model = make_basic_cnn()
    preds = model.get_probs(x)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': os.path.join(*os.path.split(model_path)[:-1]),
        'filename': os.path.split(model_path)[-1]
    }

    rng = np.random.RandomState([2017, 8, 30])
    # check if we've trained before, and if we have, use that pre-trained model
    if os.path.exists(model_path + ".meta"):
        tf_model_load(sess, model_path)
    else:
        model_train(sess,
                    x,
                    y,
                    preds,
                    X_train,
                    Y_train,
                    args=train_params,
                    save=os.path.exists("models"),
                    rng=rng)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params)
    assert X_test.shape[0] == test_end - test_start, X_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))

    s = []
    for i in range(0, len(X_test), 1):
        pred = sess.run(preds, {x: X_test[i:i + 1]})
        print(pred)
        print(Y_test[i:i + 1])
        s.append(np.sort(pred)[0, -1] - np.sort(pred)[0, -2])

    #Draw a histogram
    def draw_hist(myList, Title, Xlabel, Ylabel):
        plt.hist(myList,
                 np.arange(0, 1, 0.01),
                 normed=True,
                 stacked=True,
                 facecolor='blue')
        plt.xlabel(Xlabel)
        plt.ylabel(Ylabel)
        plt.title(Title)
        plt.show()

    draw_hist(myList=s,
              Title='legitimate',
              Xlabel='difference between max and second largest',
              Ylabel='Probability')

    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using Carlini and Wagner's approach
    ###########################################################################
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
    print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
          ' adversarial examples')
    print("This could take some time ...")

    # Instantiate a CW attack object
    cw = CarliniWagnerL2(model, back='tf', sess=sess)

    if viz_enabled:
        assert source_samples == nb_classes
        idxs = [
            np.where(np.argmax(Y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)
        ]
    if targeted:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, nb_classes, img_rows, img_cols, channels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = np.array([[instance] * nb_classes
                                   for instance in X_test[idxs]],
                                  dtype=np.float32)
        else:
            adv_inputs = np.array([[instance] * nb_classes
                                   for instance in X_test[:source_samples]],
                                  dtype=np.float32)

        one_hot = np.zeros((nb_classes, nb_classes))
        one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

        adv_inputs = adv_inputs.reshape(
            (source_samples * nb_classes, img_rows, img_cols, 1))
        adv_ys = np.array([one_hot] * source_samples,
                          dtype=np.float32).reshape(
                              (source_samples * nb_classes, nb_classes))
        yname = "y_target"
    else:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, 2, img_rows, img_cols, channels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = X_test[idxs]
        else:
            adv_inputs = X_test[:source_samples]

        adv_ys = None
        yname = "y"

    cw_params = {
        'binary_search_steps': 1,
        yname: adv_ys,
        'max_iterations': attack_iterations,
        'learning_rate': 0.1,
        'batch_size':
        source_samples * nb_classes if targeted else source_samples,
        'initial_const': 10
    }

    adv = cw.generate_np(adv_inputs, **cw_params)
    preds_adv = model.get_probs(adv)
    pred = sess.run(preds_adv, {x: adv_inputs})
    '''
    s = []
    for i in range(0,len(adv_inputs),1):
        print(pred[i])
        s.append((np.sort(pred[i])[-1])-(np.sort(pred[i])[-2]))
    
   #Draw a histogram
    def draw_hist(myList,Title,Xlabel,Ylabel):
         plt.hist(myList,np.arange(0,1,0.01),normed=True,stacked=True,facecolor='red')
         plt.xlabel(Xlabel)       
         plt.ylabel(Ylabel)
         plt.title(Title)
         plt.show()
    draw_hist(myList=s,Title='adversarial',Xlabel='difference between max and second largest',
               Ylabel='Probability')
    '''

    eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
    if targeted:
        adv_accuracy = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  adv,
                                  adv_ys,
                                  args=eval_params)
    else:
        if viz_enabled:
            adv_accuracy = 1 - \
                model_eval(sess, x, y, preds, adv, Y_test[
                           idxs], args=eval_params)
        else:
            adv_accuracy = 1 - \
                model_eval(sess, x, y, preds, adv, Y_test[
                           :source_samples], args=eval_params)

    if viz_enabled:
        for j in range(nb_classes):
            if targeted:
                for i in range(nb_classes):
                    grid_viz_data[i, j] = adv[i * nb_classes + j]
            else:
                grid_viz_data[j, 0] = adv_inputs[j]
                grid_viz_data[j, 1] = adv[j]

        print(grid_viz_data.shape)

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy))
    report.clean_train_adv_eval = 1. - adv_accuracy

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(
        np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Close TF session
    sess.close()

    return report
コード例 #12
0
def gen_adv(sess,
            dataset,
            dataset_name,
            attack_method,
            attack_params,
            attack_name,
            testing=False,
            adv_range=range(0, 20),
            output_dir='./adv_output',
            show_prediction=False):
    # Object used to keep track of (and return) key accuracies
    print("========= Start attack with method {} on {} =========".format(
        attack_name, dataset_name))
    report = AccuracyReport()
    model = CNNModel(dataset)

    # Initialize the Fast Gradient Sign Method (FGSM) attack object
    wrap = KerasModelWrapper(model.model)
    attack = attack_method(wrap, sess=sess)
    # if fgsm_params is None:
    #     fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1., 'y_target': None}

    adv_acc_metric = get_adversarial_acc_metric(model.model, attack,
                                                attack_params)
    model.compile(loss='categorical_crossentropy',
                  metrics=['accuracy', adv_acc_metric])

    # Train an MNIST model
    model.fit()

    # Evaluate the accuracy on legitimate and adversarial test examples
    _, acc, adv_acc = model.evaluate()
    report.clean_train_clean_eval = acc
    report.clean_train_adv_eval = adv_acc

    print('Test accuracy on legitimate examples: %0.4f' % acc)
    print('Test accuracy on adversarial examples: %0.4f\n' % adv_acc)

    for sample_ind in adv_range:
        sample = model.x_test[sample_ind:(sample_ind + 1)]
        current_class = int(np.argmax(model.y_test[sample_ind]))
        target_classes = other_classes(model.nb_classes, current_class)
        if not osp.isdir(osp.join(output_dir, dataset_name, attack_name)):
            os.makedirs(osp.join(output_dir, dataset_name, attack_name), )
        fn = osp.join(output_dir, dataset_name, attack_name,
                      str(sample_ind) + "_input.tiff")
        imageio.imwrite(fn, np.reshape(sample,
                                       (model.img_rows, model.img_cols)))
        if show_prediction:
            print("Prediction for the input is: \n", model.predict_one(sample))
        for target in target_classes:
            one_hot_target = np.zeros((1, model.nb_classes), dtype=np.float32)
            one_hot_target[0, target] = 1
            attack_params['y_target'] = one_hot_target
            adv_x = attack.generate_np(sample, **attack_params)
            fn = osp.join(output_dir, dataset_name, attack_name,
                          str(sample_ind) + "_adv{}.tiff".format(target))
            imageio.imwrite(
                fn, np.reshape(adv_x, (model.img_rows, model.img_cols)))
            if show_prediction:
                print("Prediction for the target {} is: \n".format(target),
                      model.predict_one(adv_x))

    # Calculate training error
    if testing:
        _, train_acc, train_adv_acc = model.evaluate()
        report.train_clean_train_clean_eval = train_acc
        report.train_clean_train_adv_eval = train_adv_acc

    print("========= Finish attack with method {} on {} =========".format(
        attack_name, dataset_name))
    return report
コード例 #13
0
def mnist_tutorial_jsma(
    train_start=0,
    train_end=60000,
    test_start=0,
    test_end=10000,
    viz_enabled=VIZ_ENABLED,
    nb_epochs=NB_EPOCHS,
    batch_size=BATCH_SIZE,
    source_samples=SOURCE_SAMPLES,
    learning_rate=LEARNING_RATE,
):
    """
    MNIST tutorial for the Jacobian-based saliency map approach (JSMA)
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param source_samples: number of test inputs to attack
    :param learning_rate: learning rate for training
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    mnist = MNIST(
        train_start=train_start,
        train_end=train_end,
        test_start=test_start,
        test_end=test_end,
    )
    x_train, y_train = mnist.get_set("train")
    x_test, y_test = mnist.get_set("test")

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    nb_filters = 64
    # Define TF model graph
    model = ModelBasicCNN("model1", nb_classes, nb_filters)
    preds = model.get_logits(x)
    loss = CrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        "nb_epochs": nb_epochs,
        "batch_size": batch_size,
        "learning_rate": learning_rate,
    }
    sess.run(tf.global_variables_initializer())
    rng = np.random.RandomState([2017, 8, 30])
    train(sess, loss, x_train, y_train, args=train_params, rng=rng)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {"batch_size": batch_size}
    accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print("Test accuracy on legitimate test examples: {0}".format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using the Jacobian-based saliency map approach
    ###########################################################################
    print("Crafting " + str(source_samples) + " * " + str(nb_classes - 1) +
          " adversarial examples")

    # Keep track of success (adversarial example classified in target)
    results = np.zeros((nb_classes, source_samples), dtype="i")

    # Rate of perturbed features for each test set example and target class
    perturbations = np.zeros((nb_classes, source_samples), dtype="f")

    # Initialize our array for grid visualization
    grid_shape = (nb_classes, nb_classes, img_rows, img_cols, nchannels)
    grid_viz_data = np.zeros(grid_shape, dtype="f")

    # Instantiate a SaliencyMapMethod attack object
    jsma = SaliencyMapMethod(model, sess=sess)
    jsma_params = {
        "theta": 1.0,
        "gamma": 0.1,
        "clip_min": 0.0,
        "clip_max": 1.0,
        "y_target": None,
    }

    figure = None
    # Loop over the samples we want to perturb into adversarial examples
    for sample_ind in xrange(0, source_samples):
        print("--------------------------------------")
        print("Attacking input %i/%i" % (sample_ind + 1, source_samples))
        sample = x_test[sample_ind:(sample_ind + 1)]

        # We want to find an adversarial example for each possible target class
        # (i.e. all classes that differ from the label given in the dataset)
        current_class = int(np.argmax(y_test[sample_ind]))
        target_classes = other_classes(nb_classes, current_class)

        # For the grid visualization, keep original images along the diagonal
        grid_viz_data[current_class, current_class, :, :, :] = np.reshape(
            sample, (img_rows, img_cols, nchannels))

        # Loop over all target classes
        for target in target_classes:
            print("Generating adv. example for target class %i" % target)

            # This call runs the Jacobian-based saliency map approach
            one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
            one_hot_target[0, target] = 1
            jsma_params["y_target"] = one_hot_target
            adv_x = jsma.generate_np(sample, **jsma_params)

            # Check if success was achieved
            res = int(model_argmax(sess, x, preds, adv_x) == target)

            # Compute number of modified features
            adv_x_reshape = adv_x.reshape(-1)
            test_in_reshape = x_test[sample_ind].reshape(-1)
            nb_changed = np.where(adv_x_reshape != test_in_reshape)[0].shape[0]
            percent_perturb = float(nb_changed) / adv_x.reshape(-1).shape[0]

            # Display the original and adversarial images side-by-side
            if viz_enabled:
                figure = pair_visual(
                    np.reshape(sample, (img_rows, img_cols, nchannels)),
                    np.reshape(adv_x, (img_rows, img_cols, nchannels)),
                    figure,
                )

            # Add our adversarial example to our grid data
            grid_viz_data[target, current_class, :, :, :] = np.reshape(
                adv_x, (img_rows, img_cols, nchannels))

            # Update the arrays for later analysis
            results[target, sample_ind] = res
            perturbations[target, sample_ind] = percent_perturb

    print("--------------------------------------")

    # Compute the number of adversarial examples that were successfully found
    nb_targets_tried = (nb_classes - 1) * source_samples
    succ_rate = float(np.sum(results)) / nb_targets_tried
    print("Avg. rate of successful adv. examples {0:.4f}".format(succ_rate))
    report.clean_train_adv_eval = 1.0 - succ_rate

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(perturbations[np.where(perturbations != 0)])
    print("Avg. rate of perturbed features {0:.4f}".format(percent_perturbed))

    # Compute the average distortion introduced for successful samples only
    percent_perturb_succ = np.mean(
        perturbations[np.where(perturbations != 0)] *
        (results[np.where(perturbations != 0)] == 1))
    print("Avg. rate of perturbed features for successful "
          "adversarial examples {0:.4f}".format(percent_perturb_succ))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        import matplotlib.pyplot as plt

        plt.close(figure)
        _ = grid_visual(grid_viz_data)

    return report
コード例 #14
0
    def test_run_single_gpu_fgsm(self):
        """
        Test the basic single GPU performance by comparing to the FGSM
        tutorial.
        """
        from cleverhans_tutorials import mnist_tutorial_tf

        # Run the MNIST tutorial on a dataset of reduced size
        flags = {'train_start': 0,
                 'train_end': 5000,
                 'test_start': 0,
                 'test_end': 333,
                 'nb_epochs': 5,
                 'testing': True}
        report = mnist_tutorial_tf.mnist_tutorial(**flags)

        # Run the multi-gpu trainer for clean training
        flags.update({'batch_size': 128, 'adam_lrn': 0.001,
                      'dataset': 'mnist', 'only_adv_train': False,
                      'eval_iters': 1, 'ngpu': 1, 'fast_tests': False,
                      'attack_type_train': '',
                      'save_dir': None, 'save_steps': 10000,
                      'attack_nb_iter_train': None, 'save': False,
                      'model_type': 'basic', 'attack_type_test': 'FGSM'})

        flags.update({'adv_train': False})
        HParams = namedtuple('HParams', flags.keys())

        hparams = HParams(**flags)
        np.random.seed(42)
        tf.set_random_seed(42)
        with tf.variable_scope(None, 'runner'):
            report_dict = run_trainer(hparams)
        report_2 = AccuracyReport()
        report_2.train_clean_train_clean_eval = report_dict['train']
        report_2.clean_train_clean_eval = report_dict['test']
        report_2.clean_train_adv_eval = report_dict['FGSM']

        # Run the multi-gpu trainer for adversarial training
        flags.update({'adv_train': True,
                      'attack_type_train': 'FGSM',
                      })
        HParams = namedtuple('HParams', flags.keys())

        hparams = HParams(**flags)
        np.random.seed(42)
        tf.set_random_seed(42)
        with tf.variable_scope(None, 'runner'):
            report_dict = run_trainer(hparams)
        report_2.train_adv_train_clean_eval = report_dict['train']
        report_2.adv_train_clean_eval = report_dict['test']
        report_2.adv_train_adv_eval = report_dict['FGSM']

        self.assertClose(report.train_clean_train_clean_eval,
                         report_2.train_clean_train_clean_eval,
                         atol=5e-2)
        self.assertClose(report.clean_train_clean_eval,
                         report_2.clean_train_clean_eval,
                         atol=2e-2)
        self.assertClose(report.clean_train_adv_eval,
                         report_2.clean_train_adv_eval,
                         atol=5e-2)
        self.assertClose(report.train_adv_train_clean_eval,
                         report_2.train_adv_train_clean_eval,
                         atol=1e-1)
        self.assertClose(report.adv_train_clean_eval,
                         report_2.adv_train_clean_eval,
                         atol=2e-2)
        self.assertClose(report.adv_train_adv_eval,
                         report_2.adv_train_adv_eval,
                         atol=1e-1)
コード例 #15
0
    def test_run_single_gpu_fgsm(self):
        """
        Test the basic single GPU performance by comparing to the FGSM
        tutorial.
        """
        from cleverhans_tutorials import mnist_tutorial_tf

        # Run the MNIST tutorial on a dataset of reduced size
        flags = {
            "train_start": 0,
            "train_end": 5000,
            "test_start": 0,
            "test_end": 333,
            "nb_epochs": 5,
            "testing": True,
        }
        report = mnist_tutorial_tf.mnist_tutorial(**flags)

        # Run the multi-gpu trainer for clean training
        flags.update(
            {
                "batch_size": 128,
                "adam_lrn": 0.001,
                "dataset": "mnist",
                "only_adv_train": False,
                "eval_iters": 1,
                "ngpu": 1,
                "fast_tests": False,
                "attack_type_train": "",
                "save_dir": None,
                "save_steps": 10000,
                "attack_nb_iter_train": None,
                "save": False,
                "model_type": "basic",
                "attack_type_test": "FGSM",
            }
        )

        flags.update({"adv_train": False})
        HParams = namedtuple("HParams", flags.keys())

        hparams = HParams(**flags)
        np.random.seed(42)
        tf.set_random_seed(42)
        with tf.variable_scope(None, "runner"):
            report_dict = run_trainer(hparams)
        report_2 = AccuracyReport()
        report_2.train_clean_train_clean_eval = report_dict["train"]
        report_2.clean_train_clean_eval = report_dict["test"]
        report_2.clean_train_adv_eval = report_dict["FGSM"]

        # Run the multi-gpu trainer for adversarial training
        flags.update({"adv_train": True, "attack_type_train": "FGSM"})
        HParams = namedtuple("HParams", flags.keys())

        hparams = HParams(**flags)
        np.random.seed(42)
        tf.set_random_seed(42)
        with tf.variable_scope(None, "runner"):
            report_dict = run_trainer(hparams)
        report_2.train_adv_train_clean_eval = report_dict["train"]
        report_2.adv_train_clean_eval = report_dict["test"]
        report_2.adv_train_adv_eval = report_dict["FGSM"]

        self.assertClose(
            report.train_clean_train_clean_eval,
            report_2.train_clean_train_clean_eval,
            atol=5e-2,
        )
        self.assertClose(
            report.clean_train_clean_eval, report_2.clean_train_clean_eval, atol=2e-2
        )
        self.assertClose(
            report.clean_train_adv_eval, report_2.clean_train_adv_eval, atol=5e-2
        )
        self.assertClose(
            report.train_adv_train_clean_eval,
            report_2.train_adv_train_clean_eval,
            atol=1e-1,
        )
        self.assertClose(
            report.adv_train_clean_eval, report_2.adv_train_clean_eval, atol=2e-2
        )
        self.assertClose(
            report.adv_train_adv_eval, report_2.adv_train_adv_eval, atol=1e-1
        )
コード例 #16
0
def mnist_tutorial_jsma(train_start=0,
                        train_end=60000,
                        test_start=0,
                        test_end=10000,
                        viz_enabled=VIZ_ENABLED,
                        nb_epochs=NB_EPOCHS,
                        batch_size=BATCH_SIZE,
                        source_samples=SOURCE_SAMPLES,
                        learning_rate=LEARNING_RATE):
    """
  MNIST tutorial for the Jacobian-based saliency map approach (JSMA)
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param viz_enabled: (boolean) activate plots of adversarial examples
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param nb_classes: number of output classes
  :param source_samples: number of test inputs to attack
  :param learning_rate: learning rate for training
  :return: an AccuracyReport object
  """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session and set as Keras backend session
    #replace
    num_threads = None
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))
    #with sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    mnist = MNIST(train_start=train_start,
                  train_end=train_end,
                  test_start=test_start,
                  test_end=test_end)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]
    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    nb_filters = 64

    # Define TF model graph
    model = make_basic_picklable_cnn()

    preds = model.get_logits(x)
    loss = CrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    dataset = tf.data.Dataset.from_tensor_slices(
        (tf.reshape(x_train, [60000, 28, 28]), y_train))
    dataset = dataset.batch(32)
    val_dataset = tf.data.Dataset.from_tensor_slices(
        (tf.reshape(x_test, [10000, 28, 28]), y_test))
    val_dataset = val_dataset.batch(32)

    sess.run(tf.global_variables_initializer())
    rng = np.random.RandomState([2017, 8, 30])
    if TRAIN_NEW == 1:
        with sess.as_default():
            train(sess, loss, x_train, y_train, args=train_params, rng=rng)
            save("test.joblib", model)
    else:
        with sess.as_default():
            model = load("test.joblib")  #changed
        assert len(model.get_params()) > 0
        preds = model.get_logits(x)
        loss = CrossEntropy(model, smoothing=0.1)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using the Jacobian-based saliency map approach
    ###########################################################################
    print('Crafting ' + str(source_samples) + ' * ' + str(nb_classes - 1) +
          ' adversarial examples')

    # Keep track of success (adversarial example classified in target)
    results = np.zeros((nb_classes, source_samples), dtype='i')

    # Rate of perturbed features for each test set example and target class
    perturbations = np.zeros((nb_classes, source_samples), dtype='f')

    # Initialize our array for grid visualization
    grid_shape = (nb_classes, nb_classes, img_rows, img_cols, nchannels)
    grid_viz_data = np.zeros(grid_shape, dtype='f')

    # Instantiate a SaliencyMapMethod attack object
    jsma = SaliencyMapMethod(model, sess=sess)
    jsma_params = {
        'theta': 1.,
        'gamma': 0.1,
        'clip_min': 0.,
        'clip_max': 1.,
        'y_target': None
    }

    figure = None
    # Loop over the samples we want to perturb into adversarial examples
    seed(SEED)
    for sample_ind in xrange(0, source_samples):
        img = randint(0, 10000)
        print('--------------------------------------')
        print('Attacking input %i/%i' % (sample_ind + 1, source_samples))
        sample = x_test[img:(img +
                             1)]  #sample = x_test[sample_ind:(sample_ind + 1)]

        # We want to find an adversarial example for each possible target class
        # (i.e. all classes that differ from the label given in the dataset)
        current_class = int(np.argmax(
            y_test[img]))  #current_class = int(np.argmax(y_test[sample_ind]))
        target_classes = other_classes(nb_classes, current_class)

        # For the grid visualization, keep original images along the diagonal
        grid_viz_data[current_class, current_class, :, :, :] = np.reshape(
            sample, (img_rows, img_cols, nchannels))
        tn = 0
        totc = 0
        # Loop over all target classes
        for target in target_classes:
            print('Generating adv. example for target class %i' % target)

            # This call runs the Jacobian-based saliency map approach
            one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
            one_hot_target[0, target] = 1
            jsma_params['y_target'] = one_hot_target
            adv_x = jsma.generate_np(sample, **jsma_params)

            # Check if success was achieved
            res = int(model_argmax(sess, x, preds, adv_x) == target)

            # Compute number of modified features
            adv_x_reshape = adv_x.reshape(-1)
            test_in_reshape = x_test[sample_ind].reshape(-1)
            nb_changed = np.where(adv_x_reshape != test_in_reshape)[0].shape[0]
            percent_perturb = float(nb_changed) / adv_x.reshape(-1).shape[0]
            diff = np.array(adv_x - sample)
            #print(np.sum(diff))
            diff = np.reshape(diff, (28, 28))
            diff = diff * 255
            cv2.imwrite("test.png", diff)
            diff = cv2.imread("test.png")
            diff = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY)
            nieghbors = 0
            tc = 0
            for i in range(0, 28, 1):
                for j in range(0, 28, 1):
                    if diff[i, j] > 0:
                        tc = tc + 1
                        totc = totc + 1
                        if i > 0 and i < 27 and j > 0 and j < 27:  #main grid not edges or corners
                            if diff[i - 1, j - 1] > 0:
                                nieghbors = nieghbors + 1
                            if diff[i - 1, j] > 0:
                                nieghbors = nieghbors + 1
                            if diff[i - 1, j + 1] > 0:
                                nieghbors = nieghbors + 1
                            if diff[i, j - 1] > 0:
                                nieghbors = nieghbors + 1
                            if diff[i, j + 1] > 0:
                                nieghbors = nieghbors + 1
                            if diff[i + 1, j - 1] > 0:
                                nieghbors = nieghbors + 1
                            if diff[i + 1, j] > 0:
                                nieghbors = nieghbors + 1
                            if diff[i + 1, j + 1] > 0:
                                nieghbors = nieghbors + 1
                        else:
                            #corners
                            if i == 0 and j == 0:
                                if diff[i, j + 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i + 1, j] > 0:
                                    nieghbors = nieghbors + 1
                            if i == 27 and j == 0:
                                if diff[i, j + 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i - 1, j] > 0:
                                    nieghbors = nieghbors + 1
                            if i == 0 and j == 27:
                                if diff[i, j - 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i + 1, j] > 0:
                                    nieghbors = nieghbors + 1
                            if i == 27 and j == 27:
                                if diff[i, j - 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i - 1, j] > 0:
                                    nieghbors = nieghbors + 1
                            #edges
                            if i == 0 and j > 0 and j < 27:  #left side
                                if diff[i, j - 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i, j + 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i + 1, j - 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i + 1, j] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i + 1, j + 1] > 0:
                                    nieghbors = nieghbors + 1
                            if i == 27 and j > 0 and j < 27:  #right side
                                if diff[i, j - 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i, j + 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i - 1, j - 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i - 1, j] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i - 1, j + 1] > 0:
                                    nieghbors = nieghbors + 1
                            if j == 0 and i > 0 and i < 27:  #top side
                                if diff[i - 1, j] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i + 1, j] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i - 1, j + 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i, j + 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i + 1, j + 1] > 0:
                                    nieghbors = nieghbors + 1
                            if j == 27 and i > 0 and i < 27:  #bot side
                                if diff[i - 1, j] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i + 1, j] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i - 1, j - 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i, j - 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i + 1, j - 1] > 0:
                                    nieghbors = nieghbors + 1

            # print(tc)
            # print(nieghbors)
            tn = tn + nieghbors
            # if tc > 0:
            # print(nieghbors/tc)
            # Display the original and adversarial images side-by-side
            if viz_enabled:
                figure = pair_visual(
                    np.reshape(sample, (img_rows, img_cols, nchannels)),
                    np.reshape(adv_x, (img_rows, img_cols, nchannels)), figure)
            # Add our adversarial example to our grid data
            grid_viz_data[target, current_class, :, :, :] = np.reshape(
                adv_x, (img_rows, img_cols, nchannels))

            # Update the arrays for later analysis
            results[target, sample_ind] = res
            perturbations[target, sample_ind] = percent_perturb
            #print(perturbations[target, sample_ind])

    print('--------------------------------------')

    print("average neighbors per modified pixel ", tn / totc)
    # Compute the number of adversarial examples that were successfully found
    nb_targets_tried = ((nb_classes - 1) * source_samples)
    succ_rate = float(np.sum(results)) / nb_targets_tried
    print('Avg. rate of successful adv. examples {0:.8f}'.format(succ_rate))
    report.clean_train_adv_eval = 1. - succ_rate

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(perturbations)

    s = perturbations.shape
    myPert = np.empty(0)
    myResults = np.empty(0)
    for i in range(s[0]):
        for j in range(s[1]):
            if perturbations[i][j] > 0:
                myPert = np.append(myPert, perturbations[i][j])
                myResults = np.append(myResults, results[i][j])
    min_perturbed = np.min(myPert)
    max_perturbed = np.max(myPert)

    s2 = myResults.shape
    final = np.empty(0)
    for i in range(s2[0]):
        if myResults[i] > 0:
            final = np.append(final, myPert[i])

    print('Avg. rate of perturbed features {0:.8f}'.format(percent_perturbed))
    print('MIN of perturbed features {0:.8f}'.format(min_perturbed))
    print('MAX of perturbed features {0:.8f}'.format(max_perturbed))

    # Compute the average distortion introduced for successful samples only
    percent_perturb_succ = np.mean(perturbations * (results == 1))
    min_perturb_succ = np.min(final)
    max_perturb_succ = np.max(final)
    print('Avg. rate of perturbed features for successful '
          'adversarial examples {0:.8f}'.format(percent_perturb_succ))
    print('Min of perturbed features for successful '
          'adversarial examples {0:.8f}'.format(min_perturb_succ))
    print('Max of perturbed features for successful '
          'adversarial examples {0:.8f}'.format(max_perturb_succ))

    #Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        import matplotlib.pyplot as plt
        plt.close(figure)
        _ = grid_visual(grid_viz_data)

    return report
コード例 #17
0
def mnist_tutorial_cw(train_start=0, train_end=60000, test_start=0,
                      test_end=10000, viz_enabled=True, nb_epochs=6,
                      batch_size=128, source_samples=10,
                      learning_rate=0.001, attack_iterations=100,
                      model_path=os.path.join("models", "mnist"),
                      targeted=True):
    """
    MNIST tutorial for Carlini and Wagner's attack
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param source_samples: number of test inputs to attack
    :param learning_rate: learning rate for training
    :param model_path: path to the model file
    :param targeted: should we run a targeted attack? or untargeted?
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    x_train, y_train, x_test, y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                          nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    nb_filters = 64

    # Define TF model graph
    model = ModelBasicCNN('model1', nb_classes, nb_filters)
    preds = model.get_logits(x)
    loss = LossCrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': os.path.join(*os.path.split(model_path)[:-1]),
        'filename': os.path.split(model_path)[-1]
    }

    rng = np.random.RandomState([2017, 8, 30])
    # check if we've trained before, and if we have, use that pre-trained model
    if os.path.exists(model_path + ".meta"):
        tf_model_load(sess, model_path)
    else:
        train(sess, loss, x, y, x_train, y_train, args=train_params,
              save=os.path.exists("models"), rng=rng)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using Carlini and Wagner's approach
    ###########################################################################
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
    print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
          ' adversarial examples')
    print("This could take some time ...")

    # Instantiate a CW attack object
    cw = CarliniWagnerL2(model, back='tf', sess=sess)

    if viz_enabled:
        assert source_samples == nb_classes
        idxs = [np.where(np.argmax(y_test, axis=1) == i)[0][0]
                for i in range(nb_classes)]
    if targeted:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, nb_classes, img_rows, img_cols,
                          nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = np.array(
                [[instance] * nb_classes for instance in x_test[idxs]],
                dtype=np.float32)
        else:
            adv_inputs = np.array(
                [[instance] * nb_classes for
                 instance in x_test[:source_samples]], dtype=np.float32)

        one_hot = np.zeros((nb_classes, nb_classes))
        one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

        adv_inputs = adv_inputs.reshape(
            (source_samples * nb_classes, img_rows, img_cols, nchannels))
        adv_ys = np.array([one_hot] * source_samples,
                          dtype=np.float32).reshape((source_samples *
                                                     nb_classes, nb_classes))
        yname = "y_target"
    else:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, 2, img_rows, img_cols, nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = x_test[idxs]
        else:
            adv_inputs = x_test[:source_samples]

        adv_ys = None
        yname = "y"

    cw_params = {'binary_search_steps': 1,
                 yname: adv_ys,
                 'max_iterations': attack_iterations,
                 'learning_rate': 0.1,
                 'batch_size': source_samples * nb_classes if
                 targeted else source_samples,
                 'initial_const': 10}

    adv = cw.generate_np(adv_inputs,
                         **cw_params)

    eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
    if targeted:
        adv_accuracy = model_eval(
            sess, x, y, preds, adv, adv_ys, args=eval_params)
    else:
        if viz_enabled:
            adv_accuracy = 1 - \
                model_eval(sess, x, y, preds, adv, y_test[
                           idxs], args=eval_params)
        else:
            adv_accuracy = 1 - \
                model_eval(sess, x, y, preds, adv, y_test[
                           :source_samples], args=eval_params)

    if viz_enabled:
        for j in range(nb_classes):
            if targeted:
                for i in range(nb_classes):
                    grid_viz_data[i, j] = adv[i * nb_classes + j]
            else:
                grid_viz_data[j, 0] = adv_inputs[j]
                grid_viz_data[j, 1] = adv[j]

        print(grid_viz_data.shape)

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy))
    report.clean_train_adv_eval = 1. - adv_accuracy

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(np.sum((adv - adv_inputs)**2,
                                       axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        import matplotlib.pyplot as plt
        _ = grid_visual(grid_viz_data)

    return report
コード例 #18
0
ファイル: DeepFool.py プロジェクト: flame0409/DNN_Classcify
def mnist_tutorial_deepfool(train_start=0, train_end=60000, #读60000训练
                            test_start=0,test_end=10000, #读10000测试
                            viz_enabled=True, nb_epochs=6,
                            batch_size=128, nb_classes=2, source_samples=10,
                            learning_rate=0.001, attack_iterations=100,
                            model_path=os.path.join("models", "mnist")):
    """
    MNIST tutorial for Deepfool's attack
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples激活对抗例子
    :param nb_epochs: number of epochs to train model(一个epoch指代所有的数据送入网络中完成一次前向计算及反向传播的过程。)
    :param batch_size: size of training batches
    :param nb_classes: number of output classes(输出几类)
    :param source_samples: number of test inputs to attack(测试输入用于攻击的数量)
    :param learning_rate: learning rate for training(学习率)
    :param model_path: path to the model file(文件路径)
    :param attack_iterations: 攻击迭代次数
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies精确度报告
    report = AccuracyReport()

    # MNIST-specific dimensions图像尺寸28*28*1
    img_rows = 28
    img_cols = 28
    channels = 1

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph
    model = make_basic_picklable_cnn()
    preds = model(x)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow(构建训练模型)
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': os.path.join(*os.path.split(model_path)[:-1]),
        'filename': os.path.split(model_path)[-1]
    }

    rng = np.random.RandomState([2018, 8, 9])
    # check if we've trained before, and if we have, use that pre-trained model
    if os.path.exists(model_path+".meta"):
        tf_model_load(sess, model_path)
    else:
        model_train(sess, x, y, preds, X_train, Y_train, args=train_params,
                    save=os.path.exists("models"), rng=rng)
        print("save success")

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params)
    assert X_test.shape[0] == test_end - test_start, X_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using Carlini and Wagner's approach
    ###########################################################################
    print('Crafting ' + str(source_samples) + ' * ' + str(nb_classes-1) +
          ' adversarial examples')
    print("This could take some time ...")

    # Instantiate a DeepFool attack object
    deepfool = DeepFool(model, back='tf', sess=sess)


    idxs = [np.where(np.argmax(Y_test, axis=1) == i)[0][1] for i in range(10)]
    print("idxs:",idxs)

    # construct adv_inputs
    grid_shape = (nb_classes, 2, img_rows, img_cols, channels)
    grid_viz_data = np.zeros(grid_shape, dtype='f')
    print("grid_viz_data",grid_viz_data.shape)
    adv_inputs = X_test[idxs].reshape([-1,28,28,1])

    deepfool_params = {'nb_candidate': 10,
                       'overshoot': 0.02,
                       'max_iter': attack_iterations,
                       'nb_classes': 10,
                       'clip_min': 0.,
                       'clip_max': 1.}

    adv = deepfool.generate_np(adv_inputs, **deepfool_params)

    print("adv success")

    adv_accuracy = 1-model_eval(sess, x, y, preds, adv, Y_test[idxs],
                                args={'batch_size': 10})

    for j in range(10):
        grid_viz_data[j, 0] = adv_inputs[j]
        grid_viz_data[j, 1] = adv[j]

    print(grid_viz_data.shape)

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy))
    report.clean_train_adv_eval = 1.-adv_accuracy

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(np.sum((adv - adv_inputs)**2,
                                       axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        import matplotlib.pyplot as plt
        _ = grid_visual(grid_viz_data)

    return report
コード例 #19
0
y = tf.placeholder(tf.float32, shape=(None, nb_classes))

# Load and set up all models
from stn.conv_model import build_cnn_no_stn
clf = build_cnn_no_stn()
clf.load_weights("./keras_weights/cnn_v4.hdf5")
# clf = conv_model_no_color_adjust()
# clf.load_weights("./keras_weights/stn_v5.hdf5")

wrap_clf = KerasModelWrapper(clf)
preds = clf(x)

eval_par = {'batch_size': 128}
acc = model_eval(sess, x, y, preds, X_test, y_test, args=eval_par)
print('Test accuracy on legitimate test examples: {0}'.format(acc))
report.clean_train_clean_eval = acc

# FGSM
# fgsm = FastGradientMethod(wrap_clf, sess=sess)
# fgsm_params = {'eps': 0.1,
#                'clip_min': 0.,
#                'clip_max': 1.}
# adv_x = fgsm.generate_np(X_atk, **fgsm_params)

# # Evaluate the accuracy of the MNIST model on adversarial examples
# acc = model_eval(sess, x, y, preds_adv, X_test, y_test, args=eval_par)
# print('Test accuracy on adversarial examples: %0.4f\n' % acc)

# CarliniWagner attack
# attack_iterations = 200
# cw_params = {'binary_search_steps': 3,
コード例 #20
0
def cifar10_tutorial_jsma(train_start=0,
                          train_end=60000,
                          test_start=0,
                          test_end=10000,
                          viz_enabled=VIZ_ENABLED,
                          nb_epochs=NB_EPOCHS,
                          batch_size=BATCH_SIZE,
                          source_samples=SOURCE_SAMPLES,
                          learning_rate=LEARNING_RATE,
                          model_path=MODEL_PATH,
                          noise_output=NOISE_OUTPUT):
    """
  CIFAR10 tutorial for the Jacobian-based saliency map approach (JSMA)
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param viz_enabled: (boolean) activate plots of adversarial examples
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param nb_classes: number of output classes
  :param source_samples: number of test inputs to attack
  :param learning_rate: learning rate for training
  :return: an AccuracyReport object
  """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get CIFAR10 test data
    cifar10 = CIFAR10(train_start=train_start,
                      train_end=train_end,
                      test_start=test_start,
                      test_end=test_end)
    x_train, y_train = cifar10.get_set('train')
    x_test, y_test = cifar10.get_set('test')

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    nb_filters = 64
    # Define TF model graph
    model = ModelAllConvolutional('model1',
                                  nb_classes,
                                  nb_filters,
                                  input_shape=[32, 32, 3])
    preds = model.get_logits(x)
    loss = CrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an CIFAR10 model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'filename': os.path.split(model_path)[-1]
    }
    sess.run(tf.global_variables_initializer())
    rng = np.random.RandomState([2017, 8, 30])
    train(sess, loss, x_train, y_train, args=train_params, rng=rng)

    # Evaluate the accuracy of the CIFAR10 model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using the Jacobian-based saliency map approach
    ###########################################################################
    print('Crafting ' + str(source_samples) + ' * ' + str(nb_classes - 1) +
          ' adversarial examples')

    # Keep track of success (adversarial example classified in target)
    results = np.zeros((nb_classes, source_samples), dtype='i')

    # Rate of perturbed features for each test set example and target class
    perturbations = np.zeros((nb_classes, source_samples), dtype='f')

    # Initialize our array for grid visualization
    grid_shape = (nb_classes, 1, img_rows, img_cols, nchannels)
    grid_viz_data = np.zeros(grid_shape, dtype='f')

    # Instantiate a SaliencyMapMethod attack object
    jsma = SaliencyMapMethod(model, sess=sess)
    jsma_params = {
        'theta': 1.,
        'gamma': 0.1,
        'clip_min': 0.,
        'clip_max': 1.,
        'y_target': None
    }
    # Loop over the samples we want to perturb into adversarial examples
    adv_all = np.zeros((nb_classes, img_rows, img_cols, nchannels), dtype='f')
    sample_all = np.zeros((nb_classes, img_rows, img_cols, nchannels),
                          dtype='f')
    for sample_ind in xrange(0, source_samples):
        print('--------------------------------------')
        print('Attacking input %i/%i' % (sample_ind + 1, source_samples))
        sample = x_test[sample_ind:(sample_ind + 1)]

        # We want to find an adversarial example for each possible target class
        # (i.e. all classes that differ from the label given in the dataset)
        current_class = int(np.argmax(y_test[sample_ind]))
        target_classes = other_classes(nb_classes, current_class)

        # For the grid visualization, keep original images along the diagonal
        # grid_viz_data[current_class, current_class, :, :, :] = np.reshape(
        #     sample, (img_rows, img_cols, nchannels))

        # Loop over all target classes
        for target in target_classes:
            print('Generating adv. example for target class %i' % target)
            # This call runs the Jacobian-based saliency map approach
            one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
            one_hot_target[0, target] = 1
            jsma_params['y_target'] = one_hot_target
            adv_x = jsma.generate_np(sample, **jsma_params)
            adv_all[current_class] = adv_x
            sample_all[current_class] = sample

            # Check if success was achieved
            res = int(model_argmax(sess, x, preds, adv_x) == target)

            # Computer number of modified features
            adv_x_reshape = adv_x.reshape(-1)
            test_in_reshape = x_test[sample_ind].reshape(-1)
            nb_changed = np.where(adv_x_reshape != test_in_reshape)[0].shape[0]
            percent_perturb = float(nb_changed) / adv_x.reshape(-1).shape[0]
            # Display the original and adversarial images side-by-side
            # if viz_enabled:
            #   figure = pair_visual(
            #       np.reshape(sample, (img_rows, img_cols, nchannels)),
            #       np.reshape(adv_x, (img_rows, img_cols, nchannels)), figure)

            # # Add our adversarial example to our grid data
            # grid_viz_data[target, current_class, :, :, :] = np.reshape(
            #     adv_x, (img_rows, img_cols, nchannels))

            # Update the arrays for later analysis
            results[target, sample_ind] = res
            perturbations[target, sample_ind] = percent_perturb

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    nb_targets_tried = ((nb_classes - 1) * source_samples)
    succ_rate = float(np.sum(results)) / nb_targets_tried
    print('Avg. rate of successful adv. examples {0:.4f}'.format(succ_rate))
    report.clean_train_adv_eval = 1. - succ_rate

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(perturbations)
    print('Avg. rate of perturbed features {0:.4f}'.format(percent_perturbed))

    # Compute the average distortion introduced for successful samples only
    percent_perturb_succ = np.mean(perturbations * (results == 1))
    print('Avg. rate of perturbed features for successful '
          'adversarial examples {0:.4f}'.format(percent_perturb_succ))

    # Compute the average distortion introduced by the algorithm
    l2_norm = np.mean(np.sum((adv_all - sample_all)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(l2_norm))

    for i in range(nb_classes):
        if noise_output:
            image = adv_all[i] - sample_all[i]
        else:
            image = adv_all[i]
        grid_viz_data[i, 0] = image

    # Close TF session
    sess.close()

    def save_visual(data, path):
        """
    Modified version of cleverhans.plot.pyplot
    """
        import matplotlib.pyplot as plt

        figure = plt.figure()
        # figure.canvas.set_window_title('Cleverhans: Grid Visualization')

        # Add the images to the plot
        num_cols = data.shape[0]
        num_rows = data.shape[1]
        num_channels = data.shape[4]
        for y in range(num_rows):
            for x in range(num_cols):
                figure.add_subplot(num_rows, num_cols,
                                   (x + 1) + (y * num_cols))
                plt.axis('off')

                if num_channels == 1:
                    plt.imshow(data[x, y, :, :, 0], cmap='gray')
                else:
                    plt.imshow(data[x, y, :, :, :])

        # Draw the plot and return
        plt.savefig(path)

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        if noise_output:
            image_name = "output/jsma_cifar10_noise.png"
        else:
            image_name = "output/jsma_cifar10.png"
        _ = save_visual(grid_viz_data, image_name)

    return report
コード例 #21
0
def mnist_tutorial_jsma(train_start=0, train_end=60000, test_start=0,
                        test_end=10000, viz_enabled=True, nb_epochs=6,
                        batch_size=128, source_samples=10,
                        learning_rate=0.001):
    """
    MNIST tutorial for the Jacobian-based saliency map approach (JSMA)
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param source_samples: number of test inputs to attack
    :param learning_rate: learning rate for training
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    x_train, y_train, x_test, y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                          nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    nb_filters = 64
    # Define TF model graph
    model = ModelBasicCNN('model1', nb_classes, nb_filters)
    preds = model.get_logits(x)
    loss = LossCrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    sess.run(tf.global_variables_initializer())
    rng = np.random.RandomState([2017, 8, 30])
    train(sess, loss, x, y, x_train, y_train, args=train_params,
          rng=rng)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using the Jacobian-based saliency map approach
    ###########################################################################
    print('Crafting ' + str(source_samples) + ' * ' + str(nb_classes-1) +
          ' adversarial examples')

    # Keep track of success (adversarial example classified in target)
    results = np.zeros((nb_classes, source_samples), dtype='i')

    # Rate of perturbed features for each test set example and target class
    perturbations = np.zeros((nb_classes, source_samples), dtype='f')

    # Initialize our array for grid visualization
    grid_shape = (nb_classes, nb_classes, img_rows, img_cols, nchannels)
    grid_viz_data = np.zeros(grid_shape, dtype='f')

    # Instantiate a SaliencyMapMethod attack object
    jsma = SaliencyMapMethod(model, back='tf', sess=sess)
    jsma_params = {'theta': 1., 'gamma': 0.1,
                   'clip_min': 0., 'clip_max': 1.,
                   'y_target': None}

    figure = None
    # Loop over the samples we want to perturb into adversarial examples
    for sample_ind in xrange(0, source_samples):
        print('--------------------------------------')
        print('Attacking input %i/%i' % (sample_ind + 1, source_samples))
        sample = x_test[sample_ind:(sample_ind+1)]

        # We want to find an adversarial example for each possible target class
        # (i.e. all classes that differ from the label given in the dataset)
        current_class = int(np.argmax(y_test[sample_ind]))
        target_classes = other_classes(nb_classes, current_class)

        # For the grid visualization, keep original images along the diagonal
        grid_viz_data[current_class, current_class, :, :, :] = np.reshape(
            sample, (img_rows, img_cols, nchannels))

        # Loop over all target classes
        for target in target_classes:
            print('Generating adv. example for target class %i' % target)

            # This call runs the Jacobian-based saliency map approach
            one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
            one_hot_target[0, target] = 1
            jsma_params['y_target'] = one_hot_target
            adv_x = jsma.generate_np(sample, **jsma_params)

            # Check if success was achieved
            res = int(model_argmax(sess, x, preds, adv_x) == target)

            # Computer number of modified features
            adv_x_reshape = adv_x.reshape(-1)
            test_in_reshape = x_test[sample_ind].reshape(-1)
            nb_changed = np.where(adv_x_reshape != test_in_reshape)[0].shape[0]
            percent_perturb = float(nb_changed) / adv_x.reshape(-1).shape[0]

            # Display the original and adversarial images side-by-side
            if viz_enabled:
                figure = pair_visual(
                    np.reshape(sample, (img_rows, img_cols, nchannels)),
                    np.reshape(adv_x, (img_rows, img_cols, nchannels)), figure)

            # Add our adversarial example to our grid data
            grid_viz_data[target, current_class, :, :, :] = np.reshape(
                adv_x, (img_rows, img_cols, nchannels))

            # Update the arrays for later analysis
            results[target, sample_ind] = res
            perturbations[target, sample_ind] = percent_perturb

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    nb_targets_tried = ((nb_classes - 1) * source_samples)
    succ_rate = float(np.sum(results)) / nb_targets_tried
    print('Avg. rate of successful adv. examples {0:.4f}'.format(succ_rate))
    report.clean_train_adv_eval = 1. - succ_rate

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(perturbations)
    print('Avg. rate of perturbed features {0:.4f}'.format(percent_perturbed))

    # Compute the average distortion introduced for successful samples only
    percent_perturb_succ = np.mean(perturbations * (results == 1))
    print('Avg. rate of perturbed features for successful '
          'adversarial examples {0:.4f}'.format(percent_perturb_succ))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        import matplotlib.pyplot as plt
        plt.close(figure)
        _ = grid_visual(grid_viz_data)

    return report
コード例 #22
0
def mnist_tutorial_cw(train_start=0, train_end=60000, test_start=0,
                      test_end=10000, viz_enabled=True, nb_epochs=6,
                      batch_size=128, nb_classes=10, source_samples=10,
                      learning_rate=0.001, attack_iterations=100,
                      model_path=os.path.join("models", "mnist"),
                      targeted=True):
    """
    MNIST tutorial for Carlini and Wagner's attack
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param source_samples: number of test inputs to attack
    :param learning_rate: learning rate for training
    :param model_path: path to the model file
    :param targeted: should we run a targeted attack? or untargeted?
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # MNIST-specific dimensions
    img_rows = 28
    img_cols = 28
    channels = 1

    # Disable Keras learning phase since we will be serving through tensorflow
    keras.layers.core.K.set_learning_phase(0)

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Image dimensions ordering should follow the TensorFlow convention
    if keras.backend.image_dim_ordering() != 'tf':
        keras.backend.set_image_dim_ordering('tf')
        print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' "
              "to 'th', temporarily setting to 'tf'")

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)
    print("Created TensorFlow session and set Keras backend.")

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph
    model = cnn_model()
    preds = model(x)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': os.path.join(*os.path.split(model_path)[:-1]),
        'filename': os.path.split(model_path)[-1]
    }

    # check if we've trained before, and if we have, use that pre-trained model
    if os.path.exists(model_path+".meta"):
        tf_model_load(sess, model_path)
    else:
        model_train(sess, x, y, preds, X_train, Y_train, args=train_params,
                    save=os.path.exists("models"))

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params)
    assert X_test.shape[0] == test_end - test_start, X_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using Carlini and Wagner's approach
    ###########################################################################
    print('Crafting ' + str(source_samples) + ' * ' + str(nb_classes-1) +
          ' adversarial examples')
    print("This could take some time ...")

    # Instantiate a CW attack object
    wrap = KerasModelWrapper(model)
    cw = CarliniWagnerL2(wrap, back='tf', sess=sess)

    idxs = [np.where(np.argmax(Y_test, axis=1) == i)[0][0] for i in range(10)]
    if targeted:
        # Initialize our array for grid visualization
        grid_shape = (nb_classes, nb_classes, img_rows, img_cols, channels)
        grid_viz_data = np.zeros(grid_shape, dtype='f')

        one_hot = np.zeros((10, 10))
        one_hot[np.arange(10), np.arange(10)] = 1

        adv_inputs = np.array([[instance] * 10 for instance in X_test[idxs]],
                              dtype=np.float32)
        adv_inputs = adv_inputs.reshape((100, 28, 28, 1))
        adv_ys = np.array([one_hot] * 10, dtype=np.float32).reshape((100, 10))
        yname = "y_target"
    else:
        # Initialize our array for grid visualization
        grid_shape = (nb_classes, 2, img_rows, img_cols, channels)
        grid_viz_data = np.zeros(grid_shape, dtype='f')

        adv_inputs = X_test[idxs]
        adv_ys = None
        yname = "y"

    cw_params = {'binary_search_steps': 1,
                 yname: adv_ys,
                 'max_iterations': attack_iterations,
                 'learning_rate': 0.1,
                 'batch_size': 100 if targeted else 10,
                 'initial_const': 10}

    adv = cw.generate_np(adv_inputs,
                         **cw_params)

    if targeted:
        adv_accuracy = model_eval(sess, x, y, preds, adv, adv_ys,
                                  args={'batch_size': 10})
    else:
        adv_accuracy = 1-model_eval(sess, x, y, preds, adv, Y_test[idxs],
                                    args={'batch_size': 10})

    for j in range(10):
        if targeted:
            for i in range(10):
                grid_viz_data[i, j] = adv[i * 10 + j]
        else:
            grid_viz_data[j, 0] = adv_inputs[j]
            grid_viz_data[j, 1] = adv[j]

    print(grid_viz_data.shape)

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy))
    report.clean_train_adv_eval = 1.-adv_accuracy

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(np.sum((adv - adv_inputs)**2,
                                       axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        import matplotlib.pyplot as plt
        _ = grid_visual(grid_viz_data)

    return report
コード例 #23
0
def test_attacks(data_name,
                 batch_size=128,
                 source_samples=10,
                 model_path=os.path.join("models", "mnist"),
                 targeted=True):
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    print("Created TensorFlow session.")
    set_log_level(logging.DEBUG)

    if data_name == 'mnist':
        from cleverhans.utils_mnist import data_mnist
        X_train, Y_train, X_test, Y_test = data_mnist(train_start=0,
                                                      train_end=60000,
                                                      test_start=0,
                                                      test_end=10000)
    if data_name in ['cifar10', 'plane_frog']:
        from import_data_cifar10 import load_data_cifar10
        labels = None
        if data_name == 'plane_frog':
            labels = [0, 6]
        datapath = '../cifar_data/'
        X_train, X_test, Y_train, Y_test = load_data_cifar10(datapath,
                                                             labels=labels)

    img_rows, img_cols, channels = X_test[0].shape
    nb_classes = Y_test.shape[1]

    # Define input TF placeholder
    batch_size = min(batch_size, source_samples)
    x = tf.placeholder(tf.float32,
                       shape=(batch_size, img_rows, img_cols, channels))
    y = tf.placeholder(tf.float32, shape=(batch_size, nb_classes))

    # Define TF model graph
    model_name = str(sys.argv[1])
    model = load_classifier(sess, model_name, data_name)
    if 'bayes' in model_name:
        model_name = model_name + '_cnn'

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    if 'bnn' not in model_name:
        keras.backend.set_learning_phase(0)
    else:
        keras.backend.set_learning_phase(1)

    preds = model.predict(x, softmax=False)  # output logits
    eval_params = {'batch_size': batch_size}
    accuracy, y_pred_clean = model_eval(sess,
                                        x,
                                        y,
                                        preds,
                                        X_test,
                                        Y_test,
                                        args=eval_params,
                                        return_pred=True)
    print('Test accuracy on legitimate test examples: %.2f' % (accuracy * 100))
    report.clean_train_clean_eval = accuracy
    y_pred_clean = y_pred_clean[:Y_test.shape[0]]
    correct_prediction = (np.argmax(Y_test, 1) == np.argmax(y_pred_clean, 1))
    ind = np.where(correct_prediction == 1)[0]
    print('crafting adversarial examples only on correctly prediced images...')
    print('%d / %d in total' % (len(ind), X_test.shape[0]))

    path = 'data_ind/'
    if not os.path.isdir(path):
        os.mkdir(path)
        print('create path ' + path)
    filename = data_name + '_' + model_name
    import pickle
    pickle.dump(ind, open(path + filename + '.pkl', 'wb'))
    print("results saved at %s.pkl" % (path + filename))

    return report
コード例 #24
0
def mnist(nb_epochs=NB_EPOCHS,
          batch_size=BATCH_SIZE,
          train_end=-1,
          test_end=-1,
          learning_rate=LEARNING_RATE):
    """
  MNIST cleverhans tutorial
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :return: an AccuracyReport object
  """
    # Train a pytorch MNIST model
    torch_model = MNIST_arch_0()
    if torch.cuda.is_available():
        torch_model = torch_model.cuda()
    report = AccuracyReport()
    data_dir = '/scratch/etv21/conv_gp_data/MNIST_data/expA'
    training_dataset, test_dataset = mnist_sevens_vs_twos(data_dir, noisy=True)
    train_loader = torch.utils.data.DataLoader(training_dataset,
                                               batch_size=batch_size,
                                               shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=batch_size)
    #adversarial_loader = torch.utils.data.DataLoader(Adversarial_MNIST_Dataset(), batch_size=batch_size)

    # Train our model
    optimizer = optim.Adam(torch_model.parameters(), lr=learning_rate)
    train_loss = []

    total = 0
    correct = 0
    step = 0
    for _epoch in range(nb_epochs):
        for xs, ys in train_loader:
            xs, ys = Variable(xs), Variable(ys)
            if torch.cuda.is_available():
                xs, ys = xs.cuda(), ys.cuda()
            optimizer.zero_grad()
            preds = torch_model(xs)
            loss = F.nll_loss(preds, ys)
            loss.backward()  # calc gradients
            train_loss.append(loss.data.item())
            optimizer.step()  # update gradients

            preds_np = preds.data.cpu().numpy()
            correct += (np.argmax(preds_np, axis=1) == ys.cpu().numpy()).sum()
            total += len(xs)
            step += 1

            if total % 200 == 0:
                acc = float(correct) / total
                print('[%s] Training accuracy: %.2f%%' % (step, acc * 100))
                total = 0
                correct = 0

    #examine_weights_biases(torch_model)
    # Evaluate on clean data
    total = 0
    correct = 0

    for xs, ys in test_loader:
        xs, ys = Variable(xs), Variable(ys)
        if torch.cuda.is_available():
            xs, ys = xs.cuda(), ys.cuda()
        preds = torch_model(xs)
        preds_np = preds.data.cpu().numpy()

        correct += (np.argmax(preds_np, axis=1) == ys.cpu().numpy()).sum()
        total += len(xs)

    acc = float(correct) / total
    report.clean_train_clean_eval = acc
    print('[%s] Clean accuracy: %.2f%%' % (step, acc * 100))
    '''
  For transfer from GP examples to CNN:
  total = 0
  correct = 0
  
  #import pdb; pdb.set_trace()
  c = 0
  for xs, ys in adversarial_loader:
    xs, ys = Variable(xs), Variable(ys)
    if torch.cuda.is_available():
      xs, ys = xs.cuda(), ys.cuda()
    
    preds = torch_model(xs)
    preds_np = preds.data.cpu().numpy()

    correct += (np.argmax(preds_np, axis=1) == ys.cpu().numpy()).sum()
    total += len(xs)

  acc = float(correct) / total
  print('[%s] Adversarial accuracy: %.2f%%' % (step, acc * 100))

 '''

    # We use tf for evaluation on adversarial data
    sess = tf.Session()
    x_op = tf.placeholder(tf.float32, shape=(
        None,
        1,
        28,
        28,
    ))

    # Convert pytorch model to a tf_model and wrap it in cleverhans
    tf_model_fn = convert_pytorch_model_to_tf(torch_model)
    cleverhans_model = CallableModelWrapper(tf_model_fn, output_layer='logits')

    # Create an FGSM attack
    fgsm_op = FastGradientMethod(cleverhans_model, sess=sess)
    epsilon = 10
    norm = 2
    fgsm_params = {'eps': epsilon, 'clip_min': 0., 'clip_max': 1., 'ord': norm}

    attack_name = 'CNN_FGSM_eps={}_norm={}'.format(epsilon, norm)
    attack_dir = os.path.join(data_dir, attack_name)
    if not os.path.exists(attack_dir):
        os.makedirs(attack_dir)
        print("Directory ", attack_dir, " Created ")

    adv_x_op = fgsm_op.generate(x_op, **fgsm_params)
    adv_preds_op = tf_model_fn(adv_x_op)

    # Run an evaluation of our model against fgsm
    total = 0
    correct = 0

    all_adv_preds = np.array(0)
    for xs, ys in test_loader:
        adv_preds = sess.run(adv_preds_op, feed_dict={x_op: xs})
        all_adv_preds = np.append(all_adv_preds, adv_preds)
        correct += (np.argmax(adv_preds, axis=1) == ys.cpu().numpy()).sum()
        total += len(xs)

    np.save('adv_predictions', all_adv_preds)
    acc = float(correct) / total
    print('Adv accuracy: {:.3f}'.format(acc * 100))
    report.clean_train_adv_eval = acc

    single_adv_x_op = tf.placeholder(tf.float32, shape=(1, 28, 28))
    encode_op = tf.image.encode_png(
        tf.reshape(tf.cast(single_adv_x_op * 255, tf.uint8), (28, 28, 1)))

    adv_images, clean_images, adv_labels = None, None, None

    #Print the first and 8th batches of images i.e. a batch of 2s and a batch of 7s
    b = 0
    for xs, ys in test_loader:
        adv_xs = sess.run(adv_x_op, feed_dict={x_op: xs})

        if b == 0 or b == 10:
            c = b * batch_size
            for i in range(0, adv_xs.shape[0]):
                enc_img = sess.run(encode_op,
                                   feed_dict={single_adv_x_op: adv_xs[i]})

                f = open(
                    '/scratch/etv21/conv_gp_data/MNIST_data/expA/{}/{}.png'.
                    format(attack_name, c), "wb+")
                f.write(enc_img)
                f.close()
                c += 1

        if adv_images is None:
            adv_images = np.array(adv_xs.reshape(adv_xs.shape[0], 28, 28))
            clean_images = np.array(xs.reshape(xs.shape[0], 28, 28))
            adv_labels = np.array(ys)
        else:
            adv_images = np.append(adv_images,
                                   adv_xs.reshape(adv_xs.shape[0], 28, 28), 0)
            clean_images = np.append(clean_images,
                                     xs.reshape(xs.shape[0], 28, 28), 0)
            adv_labels = np.append(adv_labels, ys, 0)
        b += 1

    np.save('/scratch/etv21/conv_gp_data/MNIST_data/expA/two_vs_seven_adv_{}'.
            format(attack_name),
            adv_images,
            allow_pickle=False)
    np.save('/scratch/etv21/conv_gp_data/MNIST_data/expA/two_vs_seven_labels',
            adv_labels,
            allow_pickle=False)

    return report
def mnist_tutorial(nb_epochs=6,
                   batch_size=128,
                   train_end=-1,
                   test_end=-1,
                   learning_rate=0.001):
    """
    MNIST cleverhans tutorial
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :return: an AccuracyReport object
    """
    # Train a pytorch MNIST model
    torch_model = PytorchMnistModel()
    if torch.cuda.is_available():
        torch_model = torch_model.cuda()
    report = AccuracyReport()

    train_loader = torch.utils.data.DataLoader(datasets.MNIST(
        'data', train=True, download=True, transform=transforms.ToTensor()),
                                               batch_size=batch_size,
                                               shuffle=True)
    test_loader = torch.utils.data.DataLoader(datasets.MNIST(
        'data', train=False, transform=transforms.ToTensor()),
                                              batch_size=batch_size)

    # Truncate the datasets so that our test run more quickly
    train_loader.dataset.train_data = train_loader.dataset.train_data[:
                                                                      train_end]
    test_loader.dataset.test_data = test_loader.dataset.test_data[:test_end]

    # Train our model
    optimizer = optim.Adam(torch_model.parameters(), lr=learning_rate)
    train_loss = []

    total = 0
    correct = 0
    step = 0
    for epoch in range(nb_epochs):
        for xs, ys in train_loader:
            xs, ys = Variable(xs), Variable(ys)
            if torch.cuda.is_available():
                xs, ys = xs.cuda(), ys.cuda()
            optimizer.zero_grad()
            preds = torch_model(xs)
            loss = F.nll_loss(preds, ys)
            loss.backward()  # calc gradients
            train_loss.append(loss.data.item())
            optimizer.step()  # update gradients

            preds_np = preds.data.cpu().numpy()
            correct += (np.argmax(preds_np, axis=1) == ys).sum()
            total += len(xs)
            step += 1
            if total % 1000 == 0:
                acc = float(correct) / total
                print('[%s] Training accuracy: %.2f%%' % (step, acc * 100))
                total = 0
                correct = 0

    # Evaluate on clean data
    total = 0
    correct = 0
    for xs, ys in test_loader:
        xs, ys = Variable(xs), Variable(ys)
        if torch.cuda.is_available():
            xs, ys = xs.cuda(), ys.cuda()

        preds = torch_model(xs)
        preds_np = preds.data.cpu().numpy()

        correct += (np.argmax(preds_np, axis=1) == ys).sum()
        total += len(xs)

    acc = float(correct) / total
    report.clean_train_clean_eval = acc
    print('[%s] Clean accuracy: %.2f%%' % (step, acc * 100))

    # We use tf for evaluation on adversarial data
    sess = tf.Session()
    x_op = tf.placeholder(tf.float32, shape=(
        None,
        1,
        28,
        28,
    ))

    # Convert pytorch model to a tf_model and wrap it in cleverhans
    tf_model_fn = convert_pytorch_model_to_tf(torch_model)
    cleverhans_model = CallableModelWrapper(tf_model_fn, output_layer='logits')

    # Create an FGSM attack
    fgsm_op = FastGradientMethod(cleverhans_model, sess=sess)
    fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
    adv_x_op = fgsm_op.generate(x_op, **fgsm_params)
    adv_preds_op = tf_model_fn(adv_x_op)

    # Run an evaluation of our model against fgsm
    total = 0
    correct = 0
    for xs, ys in test_loader:
        adv_preds = sess.run(adv_preds_op, feed_dict={x_op: xs})
        correct += (np.argmax(adv_preds, axis=1) == ys).sum()
        total += len(xs)

    acc = float(correct) / total
    print('Adv accuracy: {:.3f}'.format(acc * 100))
    report.clean_train_adv_eval = acc
    return report
コード例 #26
0
def mnist_tutorial_adv_train(train_start=0,
                             train_end=60000,
                             test_start=0,
                             test_end=10000,
                             viz_enabled=VIZ_ENABLED,
                             nb_epochs=NB_EPOCHS,
                             batch_size=BATCH_SIZE,
                             source_samples=SOURCE_SAMPLES,
                             learning_rate=LEARNING_RATE,
                             attack_iterations=ATTACK_ITERATIONS,
                             model_path=MODEL_PATH,
                             targeted=TARGETED,
                             noise_output=NOISE_OUTPUT):
    """
  MNIST tutorial for Adversarial Training
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param viz_enabled: (boolean) activate plots of adversarial examples
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param nb_classes: number of output classes
  :param source_samples: number of test inputs to attack
  :param learning_rate: learning rate for training
  :param model_path: path to the model file
  :param targeted: should we run a targeted attack? or untargeted?
  :return: an AccuracyReport object
  """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    mnist = MNIST(train_start=train_start,
                  train_end=train_end,
                  test_start=test_start,
                  test_end=test_end)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    nb_filters = 64

    # Define TF model graph
    model = ModelBasicCNN('model1', nb_classes, nb_filters)
    preds = model.get_logits(x)
    loss = CrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'filename': os.path.split(model_path)[-1]
    }

    rng = np.random.RandomState([2017, 8, 30])
    # check if we've trained before, and if we have, use that pre-trained model
    if os.path.exists(model_path + ".meta"):
        tf_model_load(sess, model_path)
    else:
        train(sess, loss, x_train, y_train, args=train_params, rng=rng)
        saver = tf.train.Saver()
        saver.save(sess, model_path)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using FGSM - BIM - MIM approach
    ###########################################################################
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
    print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
          ' adversarial examples')
    print("This could take some time ...")

    # Instantiate a CW attack object
    fgsm = FastGradientMethod(model, sess=sess)
    bim = BasicIterativeMethod(model, sess=sess)
    mim = MomentumIterativeMethod(model, sess=sess)

    if viz_enabled:
        assert source_samples == nb_classes
        idxs = [
            np.where(np.argmax(y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)
        ]
    if targeted:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, 1, img_rows, img_cols, nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = np.array([[instance] * nb_classes
                                   for instance in x_test[idxs]],
                                  dtype=np.float32)
        else:
            adv_inputs = np.array([[instance] * nb_classes
                                   for instance in x_test[:source_samples]],
                                  dtype=np.float32)

        one_hot = np.zeros((nb_classes, nb_classes))
        one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

        adv_inputs = adv_inputs.reshape(
            (source_samples * nb_classes, img_rows, img_cols, nchannels))
        adv_ys = np.array([one_hot] * source_samples,
                          dtype=np.float32).reshape(
                              (source_samples * nb_classes, nb_classes))
        yname = "y_target"
    else:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, nb_classes, img_rows, img_cols,
                          nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = x_test[idxs]
        else:
            adv_inputs = x_test[:source_samples]

        adv_ys = None
        yname = "y"

    fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
    bim_params = {
        'eps': 0.3,
        'clip_min': 0.,
        'clip_max': 1.,
        'nb_iter': 50,
        'eps_iter': .01
    }
    mim_params = {
        'eps': 0.3,
        'clip_min': 0.,
        'clip_max': 1.,
        'nb_iter': 50,
        'eps_iter': .01
    }

    adv_fgsm = fgsm.generate_np(adv_inputs, **fgsm_params)
    adv_bim = bim.generate_np(adv_inputs, **bim_params)
    adv_mim = mim.generate_np(adv_inputs, **mim_params)
    eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
    if targeted:
        adv_fgsm_accuracy = model_eval(sess,
                                       x,
                                       y,
                                       preds,
                                       adv_fgsm,
                                       adv_ys,
                                       args=eval_params)
        adv_bim_accuracy = model_eval(sess,
                                      x,
                                      y,
                                      preds,
                                      adv_bim,
                                      adv_ys,
                                      args=eval_params)
        adv_mim_accuracy = model_eval(sess,
                                      x,
                                      y,
                                      preds,
                                      adv_mim,
                                      adv_ys,
                                      args=eval_params)

    else:
        if viz_enabled:
            err_fgsm = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  adv_fgsm,
                                  y_test[idxs],
                                  args=eval_params)
            err_bim = model_eval(sess,
                                 x,
                                 y,
                                 preds,
                                 adv_bim,
                                 y_test[idxs],
                                 args=eval_params)
            err_mim = model_eval(sess,
                                 x,
                                 y,
                                 preds,
                                 adv_mim,
                                 y_test[idxs],
                                 args=eval_params)
            adv_fgsm_accuracy = 1 - err_fgsm
            adv_bim_accuracy = 1 - err_bim
            adv_mim_accuracy = 1 - err_mim
        else:
            err_fgsm = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  adv_fgsm,
                                  y_test[:source_samples],
                                  args=eval_params)
            err_bim = model_eval(sess,
                                 x,
                                 y,
                                 preds,
                                 adv_bim,
                                 y_test[:source_samples],
                                 args=eval_params)
            err_mim = model_eval(sess,
                                 x,
                                 y,
                                 preds,
                                 adv_mim,
                                 y_test[:source_samples],
                                 args=eval_params)

            adv_fgsm_accuracy = 1 - err_fgsm
            adv_bim_accuracy = 1 - err_bim
            adv_mim_accuracy = 1 - err_mim

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    print('Avg. rate of successful adv. (FGSM) examples {0:.4f}'.format(
        adv_fgsm_accuracy))
    report.clean_train_adv_fgsm_eval = 1. - adv_fgsm_accuracy
    print('Avg. rate of successful adv. (BIM) examples {0:.4f}'.format(
        adv_bim_accuracy))
    report.clean_train_adv_bim_eval = 1. - adv_bim_accuracy
    print('Avg. rate of successful adv. (MIM) examples {0:.4f}'.format(
        adv_mim_accuracy))
    report.clean_train_adv_mim_eval = 1. - adv_mim_accuracy

    # Compute the average distortion introduced by the algorithm
    percent_perturbed_fgsm = np.mean(
        np.sum((adv_fgsm - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of (FGSM) perturbations {0:.4f}'.format(
        percent_perturbed_fgsm))
    percent_perturbed_bim = np.mean(
        np.sum((adv_bim - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of (BIM) perturbations {0:.4f}'.format(
        percent_perturbed_bim))
    percent_perturbed_mim = np.mean(
        np.sum((adv_mim - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of (MIM) perturbations {0:.4f}'.format(
        percent_perturbed_mim))

    ###########################################################################
    # Adversarial Training
    ###########################################################################

    model2 = ModelBasicCNN('model2', nb_classes, nb_filters)

    fgsm2 = FastGradientMethod(model, sess=sess)

    # bim2 = BasicIterativeMethod(model, sess=sess)
    # mim2 = MomentumIterativeMethod(model, sess=sess)

    def attack_fgsm(x):
        return fgsm2.generate(adv_inputs, **fgsm_params)

    # def attack_bim(x):
    #   return bim2.generate(adv_inputs, **bim_params)
    # def attack_mim(x):
    #   return mim2.generate(adv_inputs, **mim_params)

    preds2 = model2.get_logits(x)
    loss2_fgsm = CrossEntropy(model2, smoothing=0.1, attack=attack_fgsm)
    # loss2_bim = CrossEntropy(model2, smoothing=0.1, attack=attack_bim)
    # loss2_mim = CrossEntropy(model2, smoothing=0.1, attack=attack_mim)

    train(sess, loss2_fgsm, x_train, y_train, args=train_params, rng=rng)
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds2, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on adversarial fgsm test examples: {0}'.format(
        accuracy))
    report.clean_train_clean_eval = accuracy
    print("Defined TensorFlow model graph.")

    adv_fgsm_accuracy = model_eval(sess,
                                   x,
                                   y,
                                   preds,
                                   adv_fgsm,
                                   adv_ys,
                                   args=eval_params)
    adv_bim_accuracy = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  adv_bim,
                                  adv_ys,
                                  args=eval_params)
    adv_mim_accuracy = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  adv_mim,
                                  adv_ys,
                                  args=eval_params)

    # Close TF session
    sess.close()

    return report
コード例 #27
0
def mnist_tutorial_cw(train_start=0,
                      train_end=60000,
                      test_start=0,
                      test_end=10000,
                      viz_enabled=True,
                      nb_epochs=6,
                      batch_size=128,
                      source_samples=10,
                      learning_rate=0.001,
                      attack_iterations=100,
                      model_path=os.path.join("models", "mnist"),
                      targeted=True):
    """
    MNIST tutorial for Carlini and Wagner's attack
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param source_samples: number of test inputs to attack
    :param learning_rate: learning rate for training
    :param model_path: path to the model file
    :param targeted: should we run a targeted attack? or untargeted?
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    x_train, y_train, x_test, y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    nb_filters = 64

    # Define TF model graph
    model = ModelBasicCNN('model1', nb_classes, nb_filters)
    preds = model.get_logits(x)
    loss = LossCrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': os.path.join(*os.path.split(model_path)[:-1]),
        'filename': os.path.split(model_path)[-1]
    }

    rng = np.random.RandomState([2017, 8, 30])
    # check if we've trained before, and if we have, use that pre-trained model
    if os.path.exists(model_path + ".meta"):
        tf_model_load(sess, model_path)
    else:
        train(sess,
              loss,
              x,
              y,
              x_train,
              y_train,
              args=train_params,
              save=os.path.exists("models"),
              rng=rng)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using Carlini and Wagner's approach
    ###########################################################################
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
    print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
          ' adversarial examples')
    print("This could take some time ...")

    # Instantiate a CW attack object
    cw = CarliniWagnerL2(model, back='tf', sess=sess)

    if viz_enabled:
        assert source_samples == nb_classes
        idxs = [
            np.where(np.argmax(y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)
        ]
    if targeted:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, nb_classes, img_rows, img_cols,
                          nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = np.array([[instance] * nb_classes
                                   for instance in x_test[idxs]],
                                  dtype=np.float32)
        else:
            adv_inputs = np.array([[instance] * nb_classes
                                   for instance in x_test[:source_samples]],
                                  dtype=np.float32)

        one_hot = np.zeros((nb_classes, nb_classes))
        one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

        adv_inputs = adv_inputs.reshape(
            (source_samples * nb_classes, img_rows, img_cols, nchannels))
        adv_ys = np.array([one_hot] * source_samples,
                          dtype=np.float32).reshape(
                              (source_samples * nb_classes, nb_classes))
        yname = "y_target"
    else:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, 2, img_rows, img_cols, nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = x_test[idxs]
        else:
            adv_inputs = x_test[:source_samples]

        adv_ys = None
        yname = "y"

    cw_params = {
        'binary_search_steps': 1,
        yname: adv_ys,
        'max_iterations': attack_iterations,
        'learning_rate': 0.1,
        'batch_size':
        source_samples * nb_classes if targeted else source_samples,
        'initial_const': 10
    }

    adv = cw.generate_np(adv_inputs, **cw_params)

    eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
    if targeted:
        adv_accuracy = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  adv,
                                  adv_ys,
                                  args=eval_params)
    else:
        if viz_enabled:
            adv_accuracy = 1 - \
                model_eval(sess, x, y, preds, adv, y_test[
                           idxs], args=eval_params)
        else:
            adv_accuracy = 1 - \
                model_eval(sess, x, y, preds, adv, y_test[
                           :source_samples], args=eval_params)

    if viz_enabled:
        for j in range(nb_classes):
            if targeted:
                for i in range(nb_classes):
                    grid_viz_data[i, j] = adv[i * nb_classes + j]
            else:
                grid_viz_data[j, 0] = adv_inputs[j]
                grid_viz_data[j, 1] = adv[j]

        print(grid_viz_data.shape)

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy))
    report.clean_train_adv_eval = 1. - adv_accuracy

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(
        np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        import matplotlib.pyplot as plt
        _ = grid_visual(grid_viz_data)

    return report
コード例 #28
0
ファイル: save_adv.py プロジェクト: newwhitecheng/cleverhans
def mnist_tutorial_cw(train_start=0, train_end=60000, test_start=0,
                      test_end=10000, viz_enabled=False, nb_epochs=6,
                      batch_size=128, nb_classes=10, source_samples=1,
                      learning_rate=0.001, attack_iterations=100,
                      model_path=os.path.join("models", "mnist"),
                      targeted=True):
    """
    MNIST tutorial for Carlini and Wagner's attack
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param source_samples: number of test inputs to attack
    :param learning_rate: learning rate for training
    :param model_path: path to the model file
    :param targeted: should we run a targeted attack? or untargeted?
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # MNIST-specific dimensions
    img_rows = 28
    img_cols = 28
    channels = 1

    # Set TF random seed to improve reproducibility
    #tf.set_random_seed(1234)

    # Create TF session
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    #config.log_device_placement=True
    sess = tf.Session(config=config)
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)
    
    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, channels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Define TF model graph
    model = make_basic_cnn()
    preds = model(x)
    print("Defined TensorFlow model graph.")
    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': os.path.join(*os.path.split(model_path)[:-1]),
        'filename': os.path.split(model_path)[-1]
    }

    rng = np.random.RandomState([2017, 8, 30])
    # check if we've trained before, and if we have, use that pre-trained model
    if os.path.exists(model_path + ".meta"):
        tf_model_load(sess, model_path)
    else:
        model_train(sess, x, y, preds, X_train, Y_train, args=train_params,
                    save=os.path.exists("models"), rng=rng)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params)
    assert X_test.shape[0] == test_end - test_start, X_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy
    ###########################################################################
    # Craft adversarial examples using Carlini and Wagner's approach
    ###########################################################################
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
    # Instantiate a CW attack object
    cw = CarliniWagnerL2(model, back='tf', sess=sess)
    fgsm = FastGradientMethod(model, sess=sess)

    result = np.zeros((5,len(X_test)))
    strength = np.zeros((3,len(X_test)))

    adv_ys = None
    yname = "y"

    cw_params = {'binary_search_steps': 1,
                 'max_iterations': attack_iterations,
                 'learning_rate': 0.1,
                 'batch_size': source_samples,
                 'initial_const': 10}
    fgsm_eps = [0.1,0.3, 0.5]
    for j in fgsm_eps:
        fgsm_params = {'eps': j,
                   'clip_min': 0.,
                   'clip_max': 1.}  
                
        for i in range(len(X_test)):
            feed_dict = {x: X_test[i].reshape((1,28,28,1))}
            Classes0 = preds.eval(feed_dict=feed_dict,session=sess)
            Class0 = np.argmax(Classes0)
            result[0,i] = Class0
            adv_inputs = X_test[i]
            adv_inputs = adv_inputs.reshape((1,28,28,1))
            #adv = cw.generate_np(adv_inputs,**cw_params)
            
            adv = fgsm.generate_np(adv_inputs, **fgsm_params)
            pdb.set_trace()
	    feed_dict = {x: adv}
            Classes1 = preds.eval(feed_dict=feed_dict,session=sess)
            Class1 = np.argmax(Classes1)
            result[1,i] = Class1
            # Compute the average distortion introduced by the algorithm
            percent_perturbed = np.mean(np.sum((adv - adv_inputs)**2,
                                           axis=(1, 2, 3))**.5)
            strength[0,i] = percent_perturbed

            adv2 = cw.generate_np(adv,**cw_params)
            feed_dict = {x: adv2}
            Classes2 = preds.eval(feed_dict=feed_dict,session=sess)
            Class2 = np.argmax(Classes2)
            result[2,i] = Class2
            # Compute the average distortion introduced by the algorithm
            percent_perturbed2 = np.mean(np.sum((adv2 - adv)**2,
                                           axis=(1, 2, 3))**.5)
            strength[1,i] = percent_perturbed2
            
            adv_f = sig.medfilt(adv,(1,3,3,1))
            feed_dict = {x: adv_f}
            Classes1 = preds.eval(feed_dict=feed_dict,session=sess)
            Class1 = np.argmax(Classes1)
            result[3,i] = Class1
            # Compute the average distortion introduced by the algorithm
            #percent_perturbed = np.mean(np.sum((adv - adv_inputs)**2,
            #                               axis=(1, 2, 3))**.5)
            #strength[0,i] = percent_perturbed

            adv2_f = cw.generate_np(adv_f,**cw_params)
            feed_dict = {x: adv2_f}
            Classes2 = preds.eval(feed_dict=feed_dict,session=sess)
            Class2 = np.argmax(Classes2)
            result[4,i] = Class2
            # Compute the average distortion introduced by the algorithm
            percent_perturbed2 = np.mean(np.sum((adv2_f - adv_f)**2,
                                           axis=(1, 2, 3))**.5)
            strength[2,i] = percent_perturbed2
            if i%100 == 0:
                print(i)
       # exit()
    # Close TF session
    sess.close()
    sio.savemat('fgsm_mnist.mat',{'adv_01':adv_01,'adv_03':adv_03, 'adv_05':adv_05 'strength':strength})
def mnist_tutorial_jsma(train_start=0, train_end=60000, test_start=0,
                        test_end=10000, viz_enabled=True, nb_epochs=6,
                        batch_size=128, nb_classes=10, source_samples=10,
                        learning_rate=0.001):
    """
    MNIST tutorial for the Jacobian-based saliency map approach (JSMA)
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param source_samples: number of test inputs to attack
    :param learning_rate: learning rate for training
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # MNIST-specific dimensions
    img_rows = 28
    img_cols = 28
    channels = 1

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph
    model = make_basic_cnn()
    preds = model(x)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    sess.run(tf.global_variables_initializer())
    rng = np.random.RandomState([2017, 8, 30])
    model_train(sess, x, y, preds, X_train, Y_train, args=train_params,
                rng=rng)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params)
    assert X_test.shape[0] == test_end - test_start, X_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using the Jacobian-based saliency map approach
    ###########################################################################
    print('Crafting ' + str(source_samples) + ' * ' + str(nb_classes-1) +
          ' adversarial examples')

    # Keep track of success (adversarial example classified in target)
    results = np.zeros((nb_classes, source_samples), dtype='i')

    # Rate of perturbed features for each test set example and target class
    perturbations = np.zeros((nb_classes, source_samples), dtype='f')

    # Initialize our array for grid visualization
    grid_shape = (nb_classes, nb_classes, img_rows, img_cols, channels)
    grid_viz_data = np.zeros(grid_shape, dtype='f')

    # Instantiate a SaliencyMapMethod attack object
    jsma = SaliencyMapMethod(model, back='tf', sess=sess)
    jsma_params = {'theta': 1., 'gamma': 0.1,
                   'clip_min': 0., 'clip_max': 1.,
                   'y_target': None}

    figure = None
    # Loop over the samples we want to perturb into adversarial examples
    for sample_ind in xrange(0, source_samples):
        print('--------------------------------------')
        print('Attacking input %i/%i' % (sample_ind + 1, source_samples))
        sample = X_test[sample_ind:(sample_ind+1)]

        # We want to find an adversarial example for each possible target class
        # (i.e. all classes that differ from the label given in the dataset)
        current_class = int(np.argmax(Y_test[sample_ind]))
        target_classes = other_classes(nb_classes, current_class)

        # For the grid visualization, keep original images along the diagonal
        grid_viz_data[current_class, current_class, :, :, :] = np.reshape(
            sample, (img_rows, img_cols, channels))

        # Loop over all target classes
        for target in target_classes:
            print('Generating adv. example for target class %i' % target)

            # This call runs the Jacobian-based saliency map approach
            one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
            one_hot_target[0, target] = 1
            jsma_params['y_target'] = one_hot_target
            adv_x = jsma.generate_np(sample, **jsma_params)

            # Check if success was achieved
            res = int(model_argmax(sess, x, preds, adv_x) == target)

            # Computer number of modified features
            adv_x_reshape = adv_x.reshape(-1)
            test_in_reshape = X_test[sample_ind].reshape(-1)
            nb_changed = np.where(adv_x_reshape != test_in_reshape)[0].shape[0]
            percent_perturb = float(nb_changed) / adv_x.reshape(-1).shape[0]

            # Display the original and adversarial images side-by-side
            if viz_enabled:
                figure = pair_visual(
                    np.reshape(sample, (img_rows, img_cols, channels)),
                    np.reshape(adv_x, (img_rows, img_cols, channels)), figure)

            # Add our adversarial example to our grid data
            grid_viz_data[target, current_class, :, :, :] = np.reshape(
                adv_x, (img_rows, img_cols, channels))

            # Update the arrays for later analysis
            results[target, sample_ind] = res
            perturbations[target, sample_ind] = percent_perturb

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    nb_targets_tried = ((nb_classes - 1) * source_samples)
    succ_rate = float(np.sum(results)) / nb_targets_tried
    print('Avg. rate of successful adv. examples {0:.4f}'.format(succ_rate))
    report.clean_train_adv_eval = 1. - succ_rate

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(perturbations)
    print('Avg. rate of perturbed features {0:.4f}'.format(percent_perturbed))

    # Compute the average distortion introduced for successful samples only
    percent_perturb_succ = np.mean(perturbations * (results == 1))
    print('Avg. rate of perturbed features for successful '
          'adversarial examples {0:.4f}'.format(percent_perturb_succ))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        import matplotlib.pyplot as plt
        plt.close(figure)
        _ = grid_visual(grid_viz_data)

    return report
コード例 #30
0
def mnist_tutorial(train_start=0,
                   train_end=600,
                   test_start=0,
                   test_end=200,
                   batch_size=128,
                   model_name="",
                   model_dir="",
                   print_examples=False):
    #-----------------------------------------------------------------------------
    # I) Setup (load data / model /etc.)
    #------------------------------------------------------------------------------
    """
    MNIST CleverHans tutorial
    :param model_name: filename for desired classifier model
    :param model_dir: location of classifier models
    :return: an AccuracyReport object
    """
    keras.layers.core.K.set_learning_phase(0)

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    if not hasattr(backend, "tf"):
        raise RuntimeError("This tutorial requires keras to be configured"
                           " to use the TensorFlow backend.")

    if keras.backend.image_dim_ordering() != 'tf':
        keras.backend.set_image_dim_ordering('tf')
        print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to "
              "'th', temporarily setting to 'tf'")

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Use label smoothing
    assert Y_train.shape[1] == 10
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Load Model
    filepath = model_dir + model_name
    model = keras.models.load_model(filepath)
    print("Model loaded from:")
    print(filepath)
    preds = model(x)  #predictions
    print("Defined TensorFlow model graph.")

    # Evaluate the accuracy of the MNIST model on legitimate examples (for comparison)
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params)
    report.clean_train_clean_eval = accuracy
    print('Test accuracy on legitimate examples: %0.4f' % accuracy)

    # Print Normal Examples
    if (print_examples):
        fig2 = plt.figure()
        plt.title("Normal")
        for i in range(0, 10):
            example = X_test[i:i + 1, :, :, :]
            img = example[0, :, :, 0]
            plt.subplot(2, 5, i + 1)
            plt.imshow(img, cmap="gray")

    #-----------------------------------------------------------------------------
    # II) Adversarial operations / evaluation
    #------------------------------------------------------------------------------

    # Initialize the Fast Gradient Sign Method (FGSM) attack object and graph
    wrap = KerasModelWrapper(model)
    fgsm = FastGradientMethod(wrap, sess=sess)
    fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
    adv_x = fgsm.generate(x, **fgsm_params)

    # Consider the attack to be constant
    adv_x = tf.stop_gradient(adv_x)
    preds_adv = model(adv_x)

    # Evaluate the accuracy of the MNIST model on adversarial examples
    eval_par = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
    print('Test accuracy on adversarial examples: %0.4f\n' % acc)
    report.clean_train_adv_eval = acc

    # Print Adversarial Examples
    if (print_examples):
        fig1 = plt.figure()
        for i in range(0, 10):
            adversarial_example = sess.run(
                adv_x, feed_dict={x: X_test[i:i + 1, :, :, :]})
            img = adversarial_example[0, :, :, 0]
            plt.subplot(2, 5, i + 1)
            plt.imshow(img, cmap="gray")
        plt.show()

    return report