def mnist_tutorial_cw(train_start=0,
                      train_end=60000,
                      test_start=0,
                      test_end=10000,
                      viz_enabled=VIZ_ENABLED,
                      nb_epochs=NB_EPOCHS,
                      batch_size=BATCH_SIZE,
                      source_samples=SOURCE_SAMPLES,
                      learning_rate=LEARNING_RATE,
                      attack_iterations=ATTACK_ITERATIONS,
                      model_path=MODEL_PATH,
                      targeted=TARGETED):
    """
  MNIST tutorial for Carlini and Wagner's attack
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param viz_enabled: (boolean) activate plots of adversarial examples
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param nb_classes: number of output classes
  :param source_samples: number of test inputs to attack
  :param learning_rate: learning rate for training
  :param model_path: path to the model file
  :param targeted: should we run a targeted attack? or untargeted?
  :return: an AccuracyReport object
  """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    mnist = MNIST(train_start=train_start,
                  train_end=train_end,
                  test_start=test_start,
                  test_end=test_end)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    nb_filters = 64

    # Define TF model graph
    model = ModelBasicCNN('model1', nb_classes, nb_filters)
    preds = model.get_logits(x)
    loss = CrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'filename': os.path.split(model_path)[-1]
    }

    rng = np.random.RandomState([2017, 8, 30])
    # check if we've trained before, and if we have, use that pre-trained model
    if os.path.exists(model_path + ".meta"):
        tf_model_load(sess, model_path)
    else:
        train(sess, loss, x_train, y_train, args=train_params, rng=rng)
        saver = tf.train.Saver()
        saver.save(sess, model_path)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using Carlini and Wagner's approach
    ###########################################################################
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
    print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
          ' adversarial examples')
    print("This could take some time ...")

    # Instantiate a CW attack object
    cw = CarliniWagnerL2(model, sess=sess)

    if viz_enabled:
        assert source_samples == nb_classes
        idxs = [
            np.where(np.argmax(y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)
        ]
    if targeted:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, nb_classes, img_rows, img_cols,
                          nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = np.array([[instance] * nb_classes
                                   for instance in x_test[idxs]],
                                  dtype=np.float32)
        else:
            adv_inputs = np.array([[instance] * nb_classes
                                   for instance in x_test[:source_samples]],
                                  dtype=np.float32)

        one_hot = np.zeros((nb_classes, nb_classes))
        one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

        adv_inputs = adv_inputs.reshape(
            (source_samples * nb_classes, img_rows, img_cols, nchannels))
        adv_ys = np.array([one_hot] * source_samples,
                          dtype=np.float32).reshape(
                              (source_samples * nb_classes, nb_classes))
        yname = "y_target"
    else:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, 2, img_rows, img_cols, nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = x_test[idxs]
        else:
            adv_inputs = x_test[:source_samples]

        adv_ys = None
        yname = "y"

    if targeted:
        cw_params_batch_size = source_samples * nb_classes
    else:
        cw_params_batch_size = source_samples
    cw_params = {
        'binary_search_steps': 1,
        yname: adv_ys,
        'max_iterations': attack_iterations,
        'learning_rate': CW_LEARNING_RATE,
        'batch_size': cw_params_batch_size,
        'initial_const': 10
    }

    adv = cw.generate_np(adv_inputs, **cw_params)

    eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
    if targeted:
        adv_accuracy = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  adv,
                                  adv_ys,
                                  args=eval_params)
    else:
        if viz_enabled:
            err = model_eval(sess,
                             x,
                             y,
                             preds,
                             adv,
                             y_test[idxs],
                             args=eval_params)
            adv_accuracy = 1 - err
        else:
            err = model_eval(sess,
                             x,
                             y,
                             preds,
                             adv,
                             y_test[:source_samples],
                             args=eval_params)
            adv_accuracy = 1 - err

    if viz_enabled:
        for j in range(nb_classes):
            if targeted:
                for i in range(nb_classes):
                    grid_viz_data[i, j] = adv[i * nb_classes + j]
            else:
                grid_viz_data[j, 0] = adv_inputs[j]
                grid_viz_data[j, 1] = adv[j]

        print(grid_viz_data.shape)

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy))
    report.clean_train_adv_eval = 1. - adv_accuracy

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(
        np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        _ = grid_visual(grid_viz_data)

    return report
def train_zero_knowledge_gandef_model(train_start=0,
                                      train_end=60000,
                                      test_start=0,
                                      test_end=10000,
                                      smoke_test=True,
                                      save=False,
                                      testing=False,
                                      backprop_through_attack=False,
                                      num_threads=None):
    """
    MNIST cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param train_batch_size: size of training batches
    :param test_batch_size: size of testing batches
    :param learning_rate: learning rate for training
    :param save: if true, the final model will be saved
    :param testing: if true, complete an AccuracyReport for unit tests
                    to verify that performance is adequate
    :param backprop_through_attack: If True, backprop through adversarial
                                    example construction process during
                                    adversarial training.
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_cifar10(train_start=train_start,
                                                    train_end=train_end,
                                                    test_start=test_start,
                                                    test_end=test_end)
    if smoke_test:
        X_train, Y_train, X_test, Y_test = X_train[:
                                                   256], Y_train[:
                                                                 256], X_test[:
                                                                              256], Y_test[:
                                                                                           256]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3))
    y_soft = tf.placeholder(tf.float32, shape=(None, 10))

    # Adversarial training
    print("Start adversarial training")
    zero_knowledge_gandef_model = make_zero_knowledge_gandef_model(
        name="model_zero_knowledge_gandef")
    aug_x_1 = cifar10_augment(x)
    aug_x_2 = gaussian_augment(aug_x_1)
    preds_clean = zero_knowledge_gandef_model(aug_x_1)
    preds_aug = zero_knowledge_gandef_model(aug_x_2)

    def cross_entropy(truth, preds, mean=True):
        # Get the logits operator
        op = preds.op
        if op.type == "Softmax":
            logits, = op.inputs
        else:
            logits = preds

        # Calculate cross entropy loss
        out = tf.nn.softmax_cross_entropy_with_logits(logits=logits,
                                                      labels=truth)

        # Take average loss and return
        if mean:
            out = tf.reduce_mean(out)
        return out

    def sigmoid_entropy(truth, preds, mean=True):
        # Get the logits operator
        op = preds.op
        if op.type == "Softmax":
            logits, = op.inputs
        else:
            logits = preds

        # Calculate cross entropy loss
        out = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits,
                                                      labels=truth)

        # Take average loss and return
        if mean:
            out = tf.reduce_mean(out)
        return out

    # Prepare training settings
    rng = np.random.RandomState([2017, 8, 30])

    # Prepare optimizer
    learning_rate = 1e-4
    clf_opt = tf.train.AdamOptimizer(learning_rate)
    dic_opt = tf.train.AdamOptimizer(learning_rate * 10)

    # Prepare train params
    train_params = {
        'nb_epochs': 350,
        'batch_size': 128,
        'weight_decay': 0.001,
        'trade_off': 2,
        'pretrain_epochs': 0,
        'inner_epochs': 1
    }

    # Perform and evaluate adversarial training
    gan_train_v2(sess,
                 x,
                 y_soft,
                 preds_clean,
                 X_train,
                 Y_train,
                 loss_func=[cross_entropy, sigmoid_entropy],
                 optimizer=[clf_opt, dic_opt],
                 predictions_adv=preds_aug,
                 evaluate=None,
                 args=train_params,
                 rng=rng,
                 var_list=zero_knowledge_gandef_model.get_gan_params())

    # Evaluate the accuracy of the MNIST model on Clean examples
    preds_clean = zero_knowledge_gandef_model(x)
    eval_params = {
        'batch_size': 128,
        'use_dic': False,
        'is_clean': True,
        'reject_threshold': 0.5
    }
    clean_acc = confident_model_eval(sess,
                                     x,
                                     y_soft,
                                     preds_clean,
                                     X_test,
                                     Y_test,
                                     args=eval_params)
    print('Test accuracy on Clean test examples: %0.4f\n' % clean_acc)
    report.adv_train_clean_eval = clean_acc

    # Evaluate the accuracy of the MNIST model on FGSM examples
    fgsm_params = {'eps': float(8 / 255) * 2., 'clip_min': -1., 'clip_max': 1.}
    fgsm_att = FastGradientMethod(zero_knowledge_gandef_model, sess=sess)
    fgsm_adv = fgsm_att.generate(x, **fgsm_params)
    preds_fgsm_adv = zero_knowledge_gandef_model(fgsm_adv)
    eval_params = {
        'batch_size': 128,
        'use_dic': False,
        'is_clean': False,
        'reject_threshold': 0.5
    }
    fgsm_acc = confident_model_eval(sess,
                                    x,
                                    y_soft,
                                    preds_fgsm_adv,
                                    X_test,
                                    Y_test,
                                    args=eval_params)
    print('Test accuracy on FGSM test examples: %0.4f\n' % fgsm_acc)
    report.adv_train_adv_eval = fgsm_acc

    # Evaluate the accuracy of the MNIST model on BIM examples
    bim_params = {
        'eps': float(8 / 255) * 2.,
        'eps_iter': float(2 / 255) * 2.,
        'clip_min': -1.,
        'clip_max': 1.
    }
    bim_att = BasicIterativeMethod(zero_knowledge_gandef_model, sess=sess)
    bim_adv = bim_att.generate(x, **bim_params)
    preds_bim_adv = zero_knowledge_gandef_model(bim_adv)
    eval_params = {
        'batch_size': 128,
        'use_dic': False,
        'is_clean': False,
        'reject_threshold': 0.5
    }
    bim_acc = confident_model_eval(sess,
                                   x,
                                   y_soft,
                                   preds_bim_adv,
                                   X_test,
                                   Y_test,
                                   args=eval_params)
    print('Test accuracy on BIM test examples: %0.4f\n' % bim_acc)
    report.adv_train_adv_eval = bim_acc

    # Evaluate the accuracy of the MNIST model on PGD examples
    pgd_params = {
        'eps': float(8 / 255) * 2.,
        'eps_iter': float(2 / 255) * 2.,
        'nb_iter': 7,
        'clip_min': -1.,
        'clip_max': 1.,
        'rand_init': True
    }
    pgd_att = MadryEtAl(zero_knowledge_gandef_model, sess=sess)
    pgd_adv = pgd_att.generate(x, **bim_params)
    preds_pgd_adv = zero_knowledge_gandef_model(pgd_adv)
    eval_params = {
        'batch_size': 128,
        'use_dic': False,
        'is_clean': False,
        'reject_threshold': 0.5
    }
    pgd_acc = confident_model_eval(sess,
                                   x,
                                   y_soft,
                                   preds_pgd_adv,
                                   X_test,
                                   Y_test,
                                   args=eval_params)
    print('Test accuracy on PGD test examples: %0.4f\n' % pgd_acc)
    report.adv_train_adv_eval = pgd_acc

    # Save model
    if save:
        model_path = "models/zero_knowledge_gandef"
        vars_to_save = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                         scope='model_zero_knowledge_gandef*')
        assert len(vars_to_save) > 0
        saver = tf.train.Saver(var_list=vars_to_save)
        saver.save(sess, model_path)
        print('Model saved\n')
    else:
        print('Model not saved\n')
예제 #3
0
def mnist_tutorial(train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   nb_epochs=6,
                   batch_size=128,
                   learning_rate=0.001,
                   clean_train=True,
                   testing=False,
                   backprop_through_attack=False,
                   nb_filters=64,
                   num_threads=None):
    """
    MNIST cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param testing: if true, complete an AccuracyReport for unit tests
                    to verify that performance is adequate
    :param backprop_through_attack: If True, backprop through adversarial
                                    example construction process during
                                    adversarial training.
    :param clean_train: if true, train on clean examples
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Use label smoothing
    assert Y_train.shape[1] == 10
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder

    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    nb_classes = 10
    source_samples = 10
    img_rows = 28
    img_cols = 28
    channels = 1

    model_path = "models/mnist"
    # Train an MNIST model
    train_params = {
        'nb_epochs': FLAGS.nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }

    fgsm_params = {'eps': FLAGS.fgsm_eps, 'clip_min': 0., 'clip_max': 1.}
    rng = np.random.RandomState([2017, 8, 30])
    prune_factor = FLAGS.prune_factor
    eval_par = {'batch_size': batch_size}
    if clean_train:
        prune_percent = {
            'conv1_w': 5,
            'conv2_w': 5,
            'conv3_w': 5,
            'conv4_w': 5,
            'fc1_w': prune_factor,
            'fc2_w': prune_factor,
            'fc3_w': prune_factor
        }
        model = make_basic_cnn(nb_filters=nb_filters,
                               prune_percent=prune_percent)
        initialize_uninitialized_global_variables(sess)
        preds = model.get_probs(x)
        saver = tf.train.Saver()

        def fgsm_combo():
            acc = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_par)
            print('Test accuracy on legitimate examples: %0.4f\n' % acc)

            fgsm = FastGradientMethod(model, sess=sess)
            adv_x = fgsm.generate(x, **fgsm_params)
            preds_adv = model.get_probs(adv_x)
            acc = model_eval(sess,
                             x,
                             y,
                             preds_adv,
                             X_test,
                             Y_test,
                             args=eval_par)
            print(
                'Test accuracy on adversarial examples generated by fgsm: %0.4f\n'
                % acc)

            bim = BasicIterativeMethod(model, sess=sess)
            adv_x = bim.generate(x)
            preds_adv = model.get_probs(adv_x)

            acc = model_eval(sess,
                             x,
                             y,
                             preds_adv,
                             X_test,
                             Y_test,
                             args=eval_par)
            print(
                'Test accuracy on adversarial examples generated by IterativeMethod: %0.4f\n'
                % acc)

        def evaluate():
            # Evaluate the accuracy of the MNIST model on legitimate test
            # examples

            acc = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_par)
            report.clean_train_clean_eval = acc
            assert X_test.shape[0] == test_end - test_start, X_test.shape
            print('Test accuracy on legitimate examples: %0.4f' % acc)

        ckpt_name = './mnist_model.ckpt'

        if not FLAGS.resume:
            model_train(sess,
                        x,
                        y,
                        preds,
                        X_train,
                        Y_train,
                        evaluate=evaluate,
                        args=train_params,
                        rng=rng)
            saver.save(sess, ckpt_name)
        if FLAGS.resume:
            saver = tf.train.import_meta_graph(ckpt_name + '.meta')
            print("loading pretrain model")
            saver.restore(sess, ckpt_name)
            acc = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_par)
            print('Test accuracy on pretrained model: %0.4f\n' % acc)
        if not FLAGS.resume:
            import sys
            sys.exit()

        def do_jsma():
            print('Crafting ' + str(source_samples) + ' * ' +
                  str(nb_classes - 1) + ' adversarial examples')

            # Keep track of success (adversarial example classified in target)
            results = np.zeros((nb_classes, source_samples), dtype='i')

            # Rate of perturbed features for each test set example and target class
            perturbations = np.zeros((nb_classes, source_samples), dtype='f')

            # Initialize our array for grid visualization
            grid_shape = (nb_classes, nb_classes, img_rows, img_cols, channels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            # Instantiate a SaliencyMapMethod attack object
            jsma = SaliencyMapMethod(model, back='tf', sess=sess)
            jsma_params = {
                'theta': 1.,
                'gamma': 0.1,
                'clip_min': 0.,
                'clip_max': 1.,
                'y_target': None
            }

            figure = None
            # Loop over the samples we want to perturb into adversarial examples
            for sample_ind in xrange(0, source_samples):
                print('--------------------------------------')
                print('Attacking input %i/%i' %
                      (sample_ind + 1, source_samples))
                sample = X_test[sample_ind:(sample_ind + 1)]

                # We want to find an adversarial example for each possible target class
                # (i.e. all classes that differ from the label given in the dataset)
                current_class = int(np.argmax(Y_test[sample_ind]))
                target_classes = other_classes(nb_classes, current_class)

                # For the grid visualization, keep original images along the diagonal
                grid_viz_data[current_class,
                              current_class, :, :, :] = np.reshape(
                                  sample, (img_rows, img_cols, channels))

                # Loop over all target classes
                for target in target_classes:
                    print('Generating adv. example for target class %i' %
                          target)

                    # This call runs the Jacobian-based saliency map approach
                    one_hot_target = np.zeros((1, nb_classes),
                                              dtype=np.float32)
                    one_hot_target[0, target] = 1
                    jsma_params['y_target'] = one_hot_target
                    adv_x = jsma.generate_np(sample, **jsma_params)

                    # Check if success was achieved
                    res = int(model_argmax(sess, x, preds, adv_x) == target)

                    # Computer number of modified features
                    adv_x_reshape = adv_x.reshape(-1)
                    test_in_reshape = X_test[sample_ind].reshape(-1)
                    nb_changed = np.where(
                        adv_x_reshape != test_in_reshape)[0].shape[0]
                    percent_perturb = float(nb_changed) / adv_x.reshape(
                        -1).shape[0]

                    # Display the original and adversarial images side-by-side
                    if FLAGS.viz_enabled:
                        figure = pair_visual(
                            np.reshape(sample, (img_rows, img_cols)),
                            np.reshape(adv_x, (img_rows, img_cols)), figure)

                    # Add our adversarial example to our grid data
                    grid_viz_data[target, current_class, :, :, :] = np.reshape(
                        adv_x, (img_rows, img_cols, channels))

                    # Update the arrays for later analysis
                    results[target, sample_ind] = res
                    perturbations[target, sample_ind] = percent_perturb

            print('--------------------------------------')

            # Compute the number of adversarial examples that were successfully found
            nb_targets_tried = ((nb_classes - 1) * source_samples)
            succ_rate = float(np.sum(results)) / nb_targets_tried
            print('Avg. rate of successful adv. examples {0:.4f}'.format(
                succ_rate))
            report.clean_train_adv_eval = 1. - succ_rate

            # Compute the average distortion introduced by the algorithm
            percent_perturbed = np.mean(perturbations)
            print('Avg. rate of perturbed features {0:.4f}'.format(
                percent_perturbed))

            # Compute the average distortion introduced for successful samples only
            percent_perturb_succ = np.mean(perturbations * (results == 1))
            print('Avg. rate of perturbed features for successful '
                  'adversarial examples {0:.4f}'.format(percent_perturb_succ))
            if FLAGS.viz_enabled:
                import matplotlib.pyplot as plt
                plt.close(figure)
                _ = grid_visual(grid_viz_data)

            return report

        def do_cw():
            nb_adv_per_sample = str(nb_classes - 1) if FLAGS.targeted else '1'
            print('Crafting ' + str(source_samples) + ' * ' +
                  nb_adv_per_sample + ' adversarial examples')
            print("This could take some time ...")

            # Instantiate a CW attack object
            cw = CarliniWagnerL2(model, back='tf', sess=sess)

            if FLAGS.viz_enabled:
                assert source_samples == nb_classes
                idxs = [
                    np.where(np.argmax(Y_test, axis=1) == i)[0][0]
                    for i in range(nb_classes)
                ]
            if FLAGS.targeted:
                if FLAGS.viz_enabled:
                    # Initialize our array for grid visualization
                    grid_shape = (nb_classes, nb_classes, img_rows, img_cols,
                                  channels)
                    grid_viz_data = np.zeros(grid_shape, dtype='f')

                    adv_inputs = np.array([[instance] * nb_classes
                                           for instance in X_test[idxs]],
                                          dtype=np.float32)
                else:
                    adv_inputs = np.array(
                        [[instance] * nb_classes
                         for instance in X_test[:source_samples]],
                        dtype=np.float32)

                one_hot = np.zeros((nb_classes, nb_classes))
                one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

                adv_inputs = adv_inputs.reshape(
                    (source_samples * nb_classes, img_rows, img_cols, 1))
                adv_ys = np.array([one_hot] * source_samples,
                                  dtype=np.float32).reshape(
                                      (source_samples * nb_classes,
                                       nb_classes))
                yname = "y_target"
            else:
                if FLAGS.viz_enabled:
                    # Initialize our array for grid visualization
                    grid_shape = (nb_classes, 2, img_rows, img_cols, channels)
                    grid_viz_data = np.zeros(grid_shape, dtype='f')

                    adv_inputs = X_test[idxs]
                else:
                    adv_inputs = X_test[:source_samples]

                adv_ys = None
                yname = "y"

            cw_params = {
                'binary_search_steps': 1,
                yname: adv_ys,
                'max_iterations': FLAGS.attack_iterations,
                'learning_rate': 0.1,
                'batch_size': source_samples *
                nb_classes if FLAGS.targeted else source_samples,
                'initial_const': 10
            }

            adv = cw.generate_np(adv_inputs, **cw_params)

            if FLAGS.targeted:
                adv_accuracy = model_eval(sess,
                                          x,
                                          y,
                                          preds,
                                          adv,
                                          adv_ys,
                                          args=eval_par)
            else:
                if FLAGS.viz_enabled:
                    adv_accuracy = 1 - \
                                   model_eval(sess, x, y, preds, adv, Y_test[
                                       idxs], args=eval_par)
                else:
                    adv_accuracy = 1 - \
                                   model_eval(sess, x, y, preds, adv, Y_test[
                                       :source_samples], args=eval_par)

            if FLAGS.viz_enabled:
                for j in range(nb_classes):
                    if FLAGS.targeted:
                        for i in range(nb_classes):
                            grid_viz_data[i, j] = adv[i * nb_classes + j]
                    else:
                        grid_viz_data[j, 0] = adv_inputs[j]
                        grid_viz_data[j, 1] = adv[j]

                print(grid_viz_data.shape)

            print('--------------------------------------')

            # Compute the number of adversarial examples that were successfully found
            print('Avg. rate of successful adv. examples {0:.4f}'.format(
                adv_accuracy))
            report.clean_train_adv_eval = 1. - adv_accuracy

            # Compute the average distortion introduced by the algorithm
            percent_perturbed = np.mean(
                np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5)
            print('Avg. L_2 norm of perturbations {0:.4f}'.format(
                percent_perturbed))
            # Close TF session
            #            sess.close()

            # Finally, block & display a grid of all the adversarial examples
            if FLAGS.viz_enabled:
                import matplotlib.pyplot as plt
                _ = grid_visual(grid_viz_data)

            return report

        print("before pruning and gradient inhibition\n")
        fgsm_combo()
        do_cw()
        do_jsma()
        preds = model.get_probs(x)
        loss = model_loss(y, preds)
        if not FLAGS.load_pruned_model:
            print("start iterative pruning")
            for i in range(FLAGS.prune_iterations):
                print("iterative %d" % (i))
                start = time.time()
                dict_nzidx = model.apply_prune(sess)
                trainer = tf.train.AdamOptimizer(learning_rate)
                grads = trainer.compute_gradients(loss)
                grads = model.apply_prune_on_grads(grads, dict_nzidx)
                end = time.time()
                print('until grad compute elpased %f' % (end - start))
                prune_args = {'trainer': trainer, 'grads': grads}
                train_params = {
                    'nb_epochs': FLAGS.retrain_epoch,
                    'batch_size': batch_size,
                    'learning_rate': FLAGS.retrain_lr
                }
                start = time.time()
                model_train(sess,
                            x,
                            y,
                            preds,
                            X_train,
                            Y_train,
                            evaluate=evaluate,
                            args=train_params,
                            rng=rng,
                            prune_args=prune_args,
                            retrainindex=i)
                end = time.time()
                print('model_train function takes %f' % (end - start))
                eval_par = {'batch_size': batch_size}
                acc = model_eval(sess,
                                 x,
                                 y,
                                 preds_adv,
                                 X_test,
                                 Y_test,
                                 args=eval_par)
                print('Test accuracy on adversarial examples: %0.4f\n' % acc)
            saver.save(sess, './pruned_mnist_model.ckpt')
        else:
            print("loading pruned model")
            saver = tf.train.import_meta_graph(
                './pruned_mnist_model.ckpt.meta')
            saver.restore(sess, './pruned_mnist_model.ckpt')
            print("before  applying gradient inhibition")
            fgsm_combo()
            print("before gradient inhibition, doing c&w")
            do_cw()
            do_jsma()
        if FLAGS.do_inhibition:
            model.inhibition(sess,
                             original_method=FLAGS.use_inhibition_original,
                             inhibition_eps=FLAGS.inhibition_eps)

        fgsm_combo()
        do_cw()
        do_jsma()
예제 #4
0
def mnist_tutorial(train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   nb_epochs=6,
                   batch_size=128,
                   learning_rate=0.001,
                   train_dir="/tmp",
                   filename="mnist.ckpt",
                   load_model=False,
                   testing=False):
    """
    MNIST CleverHans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param train_dir: Directory storing the saved model
    :param filename: Filename to save model under
    :param load_model: True for load, False for not load
    :param testing: if true, test error is calculated
    :return: an AccuracyReport object
    """
    keras.layers.core.K.set_learning_phase(0)

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    if not hasattr(backend, "tf"):
        raise RuntimeError("This tutorial requires keras to be configured"
                           " to use the TensorFlow backend.")

    if keras.backend.image_dim_ordering() != 'tf':
        keras.backend.set_image_dim_ordering('tf')
        print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to "
              "'th', temporarily setting to 'tf'")

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)

    # Get MNIST test data
    # X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
    #                                               train_end=train_end,
    #                                               test_start=test_start,
    #                                               test_end=test_end)

    # # Use label smoothing
    # assert Y_train.shape[1] == 10
    # label_smooth = .1
    # Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # # Define input TF placeholder
    # x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    # y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph
    model = cnn_model()
    print(type(model))
    print(model)
    # preds = model(x)
    # print("Defined TensorFlow model graph.")

    # def evaluate():
    #     # Evaluate the accuracy of the MNIST model on legitimate test examples
    #     eval_params = {'batch_size': batch_size}
    #     acc = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params)
    #     report.clean_train_clean_eval = acc
    #     assert X_test.shape[0] == test_end - test_start, X_test.shape
    #     print('Test accuracy on legitimate examples: %0.4f' % acc)

    # # Train an MNIST model
    # train_params = {
    #     'nb_epochs': nb_epochs,
    #     'batch_size': batch_size,
    #     'learning_rate': learning_rate,
    #     'train_dir': train_dir,
    #     'filename': filename
    # }
    # ckpt = tf.train.get_checkpoint_state(train_dir)
    # ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path

    # rng = np.random.RandomState([2017, 8, 30])
    # if load_model and ckpt_path:
    #     saver = tf.train.Saver()
    #     saver.restore(sess, ckpt_path)
    #     print("Model loaded from: {}".format(ckpt_path))
    #     evaluate()
    # else:
    #     print("Model was not loaded, training from scratch.")
    #     model_train(sess, x, y, preds, X_train, Y_train, evaluate=evaluate,
    #                 args=train_params, save=True, rng=rng)

    # # Calculate training error
    # if testing:
    #     eval_params = {'batch_size': batch_size}
    #     acc = model_eval(sess, x, y, preds, X_train, Y_train, args=eval_params)
    #     report.train_clean_train_clean_eval = acc

    # # Initialize the Fast Gradient Sign Method (FGSM) attack object and graph
    wrap = KerasModelWrapper(model)
    print(wrap)
    print(type(wrap))
def mnist_tutorial(train_start=0, train_end=60000, test_start=0,
                   test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE, train_dir=TRAIN_DIR,
                   filename=FILENAME, load_model=LOAD_MODEL,
                   testing=False, label_smoothing=0.1,
                   adversarial_training = ADVERSARIAL_TRAINING,
                   save_model=SAVE_MODEL):
  """
  MNIST CleverHans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param train_dir: Directory storing the saved model
  :param filename: Filename to save model under
  :param load_model: True for load, False for not load
  :param testing: if true, test error is calculated
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """
  keras.layers.core.K.set_learning_phase(0)

  # Object used to keep track of (and return) key accuracies
  report = AccuracyReport()

  # Set TF random seed to improve reproducibility
  tf.set_random_seed(1234)

  if not hasattr(backend, "tf"):
    raise RuntimeError("This tutorial requires keras to be configured"
                       " to use the TensorFlow backend.")

  if keras.backend.image_dim_ordering() != 'tf':
    keras.backend.set_image_dim_ordering('tf')
    print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to "
          "'th', temporarily setting to 'tf'")

  # Create TF session and set as Keras backend session
  os.environ["CUDA_VISIBLE_DEVICES"] = '0'  # only use No.0 GPU
  config = tf.ConfigProto()
  config.allow_soft_placement=True
  config.gpu_options.allow_growth = True
  sess = tf.Session(config=config)
  keras.backend.set_session(sess)

  # Get MNIST test data
  mnist = MNIST(train_start=train_start, train_end=train_end,
                test_start=test_start, test_end=test_end)
  x_train, y_train = mnist.get_set('train')
  x_test, y_test = mnist.get_set('test')

  # Obtain Image Parameters
  img_rows, img_cols, nchannels = x_train.shape[1:4]
  nb_classes = y_train.shape[1]

  # Define input TF placeholder
  x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
  y = tf.placeholder(tf.float32, shape=(None, nb_classes))

  # Define TF model graph
  model = fc_modelC(img_rows=img_rows, img_cols=img_cols,
                    channels=nchannels, nb_filters=64,
                    nb_classes=nb_classes)
  preds = model(x)
  print("Defined TensorFlow model graph.")

  def evaluate():
    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    report.clean_train_clean_eval = acc
#        assert X_test.shape[0] == test_end - test_start, X_test.shape
    print('Test accuracy on legitimate examples: %0.4f' % acc)

  # Train an MNIST model
  train_params = {
      'nb_epochs': nb_epochs,
      'batch_size': batch_size,
      'learning_rate': learning_rate,
      'train_dir': train_dir,
      'filename': filename
  }

  rng = np.random.RandomState([2017, 8, 30])
  if not os.path.exists(train_dir):
    os.mkdir(train_dir)

  ckpt = tf.train.get_checkpoint_state(train_dir)
  print(train_dir, ckpt)
  ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path
  wrap = KerasModelWrapper(model)

  if load_model and ckpt_path:
    saver = tf.train.Saver()
    print(ckpt_path)
    saver.restore(sess, ckpt_path)
    print("Model loaded from: {}".format(ckpt_path))
    evaluate()
  else:
    print("Model was not loaded, training from scratch.")
    loss = CrossEntropy(wrap, smoothing=label_smoothing)
    train(sess, loss, x_train, y_train, evaluate=evaluate,
          args=train_params, rng=rng)
    if save_model:
        saver = tf.train.Saver(max_to_keep=1)
        saver.save(sess, '{}/mnist.ckpt'.format(train_dir), global_step=NB_EPOCHS)
        print("model has been saved")


  # Calculate training error
  if testing:
    eval_params = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds, x_train, y_train, args=eval_params)
    report.train_clean_train_clean_eval = acc

  # Initialize the Fast Gradient Sign Method (FGSM) attack object and graph
  fgsm = FastGradientMethod(wrap, sess=sess)
  fgsm_params = {'eps': 0.2,
                 'clip_min': 0.,
                 'clip_max': 1.}

  print(fgsm_params)
  adv_x = fgsm.generate(x, **fgsm_params)
  # Consider the attack to be constant
  adv_x = tf.stop_gradient(adv_x)
  preds_adv = model(adv_x)

  # Evaluate the accuracy of the MNIST model on adversarial examples
  eval_par = {'batch_size': batch_size}
  start_time = time.time()
  acc = model_eval(sess, x, y, preds_adv, x_test, y_test, args=eval_par)
  print('Test accuracy on adversarial examples: %0.4f' % acc)
  end_time = time.time()
  print("FGSM attack time is {}\n".format(end_time-start_time))
  report.clean_train_adv_eval = acc


  gc.collect()
def mnist_tutorial(train_start=0, train_end=60000, test_start=0,
                   test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE, train_dir=TRAIN_DIR,
                   filename=FILENAME, load_model=LOAD_MODEL,
                   testing=True, label_smoothing=0.1):
  """
  MNIST CleverHans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param train_dir: Directory storing the saved model
  :param filename: Filename to save model under
  :param load_model: True for load, False for not load
  :param testing: if true, test error is calculated
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """
  tf.keras.backend.set_learning_phase(0)

  # Object used to keep track of (and return) key accuracies
  report = AccuracyReport()

  # Set TF random seed to improve reproducibility
  tf.set_random_seed(1234)

  if keras.backend.image_data_format() != 'channels_last':
    raise NotImplementedError("this tutorial requires keras to be configured to channels_last format")

  # Create TF session and set as Keras backend session
  sess = tf.Session()
  keras.backend.set_session(sess)

  # Get MNIST test data
  mnist = MNIST(train_start=train_start, train_end=train_end,
                test_start=test_start, test_end=test_end)
  x_train, y_train = mnist.get_set('train')
  x_test, y_test = mnist.get_set('test')

  # Obtain Image Parameters
  img_rows, img_cols, nchannels = x_train.shape[1:4]
  nb_classes = y_train.shape[1]

  # Define input TF placeholder
  x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
  y = tf.placeholder(tf.float32, shape=(None, nb_classes))

  # Define TF model graph
  model = cnn_model(img_rows=img_rows, img_cols=img_cols,
                    channels=nchannels, nb_filters=64,
                    nb_classes=nb_classes)
  preds = model(x)
  print("Defined TensorFlow model graph.")

  def evaluate():
    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    report.clean_train_clean_eval = acc
#        assert X_test.shape[0] == test_end - test_start, X_test.shape
    print('Test accuracy on legitimate examples: %0.4f' % acc)

  # Train an MNIST model
  train_params = {
      'nb_epochs': nb_epochs,
      'batch_size': batch_size,
      'learning_rate': learning_rate,
      'train_dir': train_dir,
      'filename': filename
  }

  rng = np.random.RandomState([2017, 8, 30])
  if not os.path.exists(train_dir):
    os.mkdir(train_dir)

  ckpt = tf.train.get_checkpoint_state(train_dir)
  print(train_dir, ckpt)
  ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path
  wrap = KerasModelWrapper(model)

  if load_model and ckpt_path:
    saver = tf.train.Saver()
    print(ckpt_path)
    saver.restore(sess, ckpt_path)
    print("Model loaded from: {}".format(ckpt_path))
    evaluate()

  eval_params = {'batch_size': batch_size}

  def eval(xx):
    acc = model_eval(sess, x, y, preds, xx, y_test, args=eval_params)
    print(acc)

  eval(x_test)
  model = load_modell(str("./autoencoder_fg.h5"))
  x_test_decoded = model.predict(x_test)
  eval(x_test_decoded)

  _, fg_test = pickle.load(open("/data/mnist/fg.pkl"))
  fg_test = fg_test.astype('float32') / 255.
  fg_test = np.mean(fg_test, axis=3)
  fg_test = np.expand_dims(fg_test, axis=3)
  fg_test_decoded = model.predict(fg_test)
  eval(fg_test)
  eval(fg_test_decoded)
예제 #7
0
    ),
    'svhn': (
        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9'
    )
}

# this is the name of the scope of the author(s) Resnet34 graph. If the user wants to just load our network parameters
# and maybe later even use our scores.npy outputs (it takes a long time to compute yourself...), he/she must use
# these strings. Otherwise, any string is OK. We provide here as default the scope names we used.
ARCH_NAME = {'cifar10': 'model1', 'cifar100': 'model_cifar_100', 'svhn': 'model_svhn'}

weight_decay = 0.0004
LABEL_SMOOTHING = {'cifar10': 0.1, 'cifar100': 0.01, 'svhn': 0.1}

# Object used to keep track of (and return) key accuracies
report = AccuracyReport()

# Set TF random seed to improve reproducibility
superseed = 123456789
rand_gen = np.random.RandomState(superseed)
tf.set_random_seed(superseed)

# Set logging level to see debug information
set_log_level(logging.DEBUG)

# Create TF session
config_args = dict(allow_soft_placement=True)
sess = tf.Session(config=tf.ConfigProto(**config_args))

# get records from training
if FLAGS.checkpoint_dir != '':
예제 #8
0
def mnist_tutorial_jsma(train_start=0, train_end=60000, test_start=0,
                        test_end=10000, viz_enabled=True, nb_epochs=6,
                        batch_size=128, source_samples=10,
                        learning_rate=0.001):
    """
    MNIST tutorial for the Jacobian-based saliency map approach (JSMA)
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param source_samples: number of test inputs to attack
    :param learning_rate: learning rate for training
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    x_train, y_train, x_test, y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                          nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    nb_filters = 64
    # Define TF model graph
    model = ModelBasicCNN('model1', nb_classes, nb_filters)
    preds = model.get_logits(x)
    loss = LossCrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    sess.run(tf.global_variables_initializer())
    rng = np.random.RandomState([2017, 8, 30])
    train(sess, loss, x, y, x_train, y_train, args=train_params,
          rng=rng)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using the Jacobian-based saliency map approach
    ###########################################################################
    print('Crafting ' + str(source_samples) + ' * ' + str(nb_classes-1) +
          ' adversarial examples')

    # Keep track of success (adversarial example classified in target)
    results = np.zeros((nb_classes, source_samples), dtype='i')

    # Rate of perturbed features for each test set example and target class
    perturbations = np.zeros((nb_classes, source_samples), dtype='f')

    # Initialize our array for grid visualization
    grid_shape = (nb_classes, nb_classes, img_rows, img_cols, nchannels)
    grid_viz_data = np.zeros(grid_shape, dtype='f')

    # Instantiate a SaliencyMapMethod attack object
    jsma = SaliencyMapMethod(model, back='tf', sess=sess)
    jsma_params = {'theta': 1., 'gamma': 0.1,
                   'clip_min': 0., 'clip_max': 1.,
                   'y_target': None}

    figure = None
    # Loop over the samples we want to perturb into adversarial examples
    for sample_ind in xrange(0, source_samples):
        print('--------------------------------------')
        print('Attacking input %i/%i' % (sample_ind + 1, source_samples))
        sample = x_test[sample_ind:(sample_ind+1)]

        # We want to find an adversarial example for each possible target class
        # (i.e. all classes that differ from the label given in the dataset)
        current_class = int(np.argmax(y_test[sample_ind]))
        target_classes = other_classes(nb_classes, current_class)

        # For the grid visualization, keep original images along the diagonal
        grid_viz_data[current_class, current_class, :, :, :] = np.reshape(
            sample, (img_rows, img_cols, nchannels))

        # Loop over all target classes
        for target in target_classes:
            print('Generating adv. example for target class %i' % target)

            # This call runs the Jacobian-based saliency map approach
            one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
            one_hot_target[0, target] = 1
            jsma_params['y_target'] = one_hot_target
            adv_x = jsma.generate_np(sample, **jsma_params)

            # Check if success was achieved
            res = int(model_argmax(sess, x, preds, adv_x) == target)

            # Computer number of modified features
            adv_x_reshape = adv_x.reshape(-1)
            test_in_reshape = x_test[sample_ind].reshape(-1)
            nb_changed = np.where(adv_x_reshape != test_in_reshape)[0].shape[0]
            percent_perturb = float(nb_changed) / adv_x.reshape(-1).shape[0]

            # Display the original and adversarial images side-by-side
            if viz_enabled:
                figure = pair_visual(
                    np.reshape(sample, (img_rows, img_cols, nchannels)),
                    np.reshape(adv_x, (img_rows, img_cols, nchannels)), figure)

            # Add our adversarial example to our grid data
            grid_viz_data[target, current_class, :, :, :] = np.reshape(
                adv_x, (img_rows, img_cols, nchannels))

            # Update the arrays for later analysis
            results[target, sample_ind] = res
            perturbations[target, sample_ind] = percent_perturb

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    nb_targets_tried = ((nb_classes - 1) * source_samples)
    succ_rate = float(np.sum(results)) / nb_targets_tried
    print('Avg. rate of successful adv. examples {0:.4f}'.format(succ_rate))
    report.clean_train_adv_eval = 1. - succ_rate

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(perturbations)
    print('Avg. rate of perturbed features {0:.4f}'.format(percent_perturbed))

    # Compute the average distortion introduced for successful samples only
    percent_perturb_succ = np.mean(perturbations * (results == 1))
    print('Avg. rate of perturbed features for successful '
          'adversarial examples {0:.4f}'.format(percent_perturb_succ))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        import matplotlib.pyplot as plt
        plt.close(figure)
        _ = grid_visual(grid_viz_data)

    return report
예제 #9
0
    # tf.app.run()
    train_start=0
    train_end=60000
    test_start=0
    test_end=10000
    nb_epochs=6
    batch_size=128
    learning_rate=0.001
    train_dir="/tmp"
    filename="mnist.ckpt"
    testing=False
    # keras.layers.core.K.set_learning_phase(0)

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    if not hasattr(backend, "tf"):
        raise RuntimeError("This tutorial requires keras to be configured"
                           " to use the TensorFlow backend.")

    if keras.backend.image_dim_ordering() != 'tf':
        keras.backend.set_image_dim_ordering('tf')
        print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to "
              "'th', temporarily setting to 'tf'")

    # Create TF session and set as Keras backend session
    sess = tf.Session()
예제 #10
0
def mnist_tutorial(train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   nb_epochs=NB_EPOCHS,
                   batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE,
                   clean_train=True,
                   testing=False,
                   backprop_through_attack=False,
                   nb_filters=NB_FILTERS,
                   num_threads=None,
                   attack_string=None):
    """
    MNIST cleverhans tutorial
    :param train_start: index of first training set example.
    :param train_end: index of last training set example.
    :param test_start: index of first test set example.
    :param test_end: index of last test set example.
    :param nb_epochs: number of epochs to train model.
    :param batch_size: size of training batches.
    :param learning_rate: learning rate for training.
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param testing: if true, complete an AccuracyReport for unit tests
                    to verify that performance is adequate.
    :param backprop_through_attack: If True, backprop through adversarial
                                    example construction process during
                                    adversarial training.
    :param nb_filters: number of filters in the CNN used for training.
    :param num_threads: number of threads used for running the process.
    :param attack_string: attack name for crafting adversarial attacks and
                            adversarial training, in string format.
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Use label smoothing
    assert Y_train.shape[1] == 10
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Train an MNIST model
    model_path = "models/mnist"
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }

    # Initialize the attack object
    attack_class = attack_selection(attack_string)
    attack_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}

    rng = np.random.RandomState([2018, 6, 18])
    if clean_train:
        model = ModelBasicCNNTFE(nb_filters=nb_filters)

        def evaluate_clean():
            """Evaluate the accuracy of the MNIST model on legitimate test
            examples
            """
            eval_params = {'batch_size': batch_size}
            acc = model_eval(model, X_test, Y_test, args=eval_params)
            report.clean_train_clean_eval = acc
            assert X_test.shape[0] == test_end - test_start, X_test.shape
            print('Test accuracy on legitimate examples: %0.4f' % acc)

        train(model,
              X_train,
              Y_train,
              evaluate=evaluate_clean,
              args=train_params,
              rng=rng,
              var_list=model.get_params())

        if testing:
            # Calculate training error
            eval_params = {'batch_size': batch_size}
            acc = model_eval(model, X_train, Y_train, args=eval_params)
            report.train_clean_train_clean_eval = acc

        # Evaluate the accuracy of the MNIST model on adversarial examples
        eval_par = {'batch_size': batch_size}
        attack = attack_class(model)
        acc = model_eval(model,
                         X_test,
                         Y_test,
                         args=eval_par,
                         attack=attack,
                         attack_args=attack_params)
        print('Test accuracy on adversarial examples: %0.4f\n' % acc)
        report.clean_train_adv_eval = acc

        # Calculate training error
        if testing:
            eval_par = {'batch_size': batch_size}
            acc = model_eval(model,
                             X_train,
                             Y_train,
                             args=eval_par,
                             attack=attack,
                             attack_args=attack_params)
            print('Train accuracy on adversarial examples: %0.4f\n' % acc)
            report.train_clean_train_adv_eval = acc

        # Clear the previous Variables
        for var in model.get_params():
            var = None
        attack = None
        print("Repeating the process, using adversarial training")

    model_adv_train = ModelBasicCNNTFE(nb_filters=nb_filters)
    attack = attack_class(model_adv_train)

    def evaluate_adv():
        # Accuracy of adversarially trained model on legitimate test inputs
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(model_adv_train,
                              X_test,
                              Y_test,
                              args=eval_params)
        print('Test accuracy on legitimate examples: %0.4f' % accuracy)
        report.adv_train_clean_eval = accuracy
        # Accuracy of the adversarially trained model on adversarial examples
        accuracy = model_eval(model_adv_train,
                              X_test,
                              Y_test,
                              args=eval_params,
                              attack=attack,
                              attack_args=attack_params)
        print('Test accuracy on adversarial examples: %0.4f' % accuracy)
        report.adv_train_adv_eval = accuracy

    # Perform and evaluate adversarial training
    train(model_adv_train,
          X_train,
          Y_train,
          evaluate=evaluate_adv,
          args=train_params,
          rng=rng,
          var_list=model_adv_train.get_params(),
          attack=attack,
          attack_args=attack_params)

    # Calculate training errors
    if testing:
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(model_adv_train,
                              X_train,
                              Y_train,
                              args=eval_params,
                              attack=None,
                              attack_args=None)
        report.train_adv_train_clean_eval = accuracy
        accuracy = model_eval(model_adv_train,
                              X_train,
                              Y_train,
                              args=eval_params,
                              attack=attack,
                              attack_args=attack_params)
        report.train_adv_train_adv_eval = accuracy
    return report
예제 #11
0
def mnist_tutorial(
    train_start=0,
    train_end=60000,
    test_start=0,
    test_end=10000,
    nb_epochs=NB_EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=LEARNING_RATE,
    clean_train=CLEAN_TRAIN,
    testing=False,
    backprop_through_attack=BACKPROP_THROUGH_ATTACK,
    nb_filters=NB_FILTERS,
    num_threads=None,
    label_smoothing=0.1,
):
    """
    MNIST cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param testing: if true, complete an AccuracyReport for unit tests
                    to verify that performance is adequate
    :param backprop_through_attack: If True, backprop through adversarial
                                    example construction process during
                                    adversarial training.
    :param label_smoothing: float, amount of label smoothing for cross entropy
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get MNIST test data
    mnist = MNIST(
        train_start=train_start,
        train_end=train_end,
        test_start=test_start,
        test_end=test_end,
    )
    x_train, y_train = mnist.get_set("train")
    x_test, y_test = mnist.get_set("test")

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Train an MNIST model
    train_params = {
        "nb_epochs": nb_epochs,
        "batch_size": batch_size,
        "learning_rate": learning_rate,
    }
    eval_params = {"batch_size": batch_size}
    fgsm_params = {"eps": 0.3, "clip_min": 0.0, "clip_max": 1.0}
    rng = np.random.RandomState([2017, 8, 30])

    def do_eval(preds, x_set, y_set, report_key, is_adv=None):
        """
        Run the evaluation and print the results.
        """
        acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        setattr(report, report_key, acc)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = "adversarial"
        else:
            report_text = "legitimate"
        if report_text:
            print("Test accuracy on %s examples: %0.4f" % (report_text, acc))

    if clean_train:
        model = make_basic_picklable_cnn()
        # Tag the model so that when it is saved to disk, future scripts will
        # be able to tell what data it was trained on
        model.dataset_factory = mnist.get_factory()
        preds = model.get_logits(x)
        assert len(model.get_params()) > 0
        loss = CrossEntropy(model, smoothing=label_smoothing)

        def evaluate():
            """
            Run evaluation for the naively trained model on clean examples.
            """
            do_eval(preds, x_test, y_test, "clean_train_clean_eval", False)

        train(
            sess,
            loss,
            x_train,
            y_train,
            evaluate=evaluate,
            args=train_params,
            rng=rng,
            var_list=model.get_params(),
        )

        with sess.as_default():
            save("clean_model.joblib", model)

            print("Now that the model has been saved, you can evaluate it in a"
                  " separate process using `evaluate_pickled_model.py`. "
                  "You should get exactly the same result for both clean and "
                  "adversarial accuracy as you get within this program.")

        # Calculate training error
        if testing:
            do_eval(preds, x_train, y_train, "train_clean_train_clean_eval")

        # Initialize the Fast Gradient Sign Method (FGSM) attack object and
        # graph
        fgsm = FastGradientMethod(model, sess=sess)
        adv_x = fgsm.generate(x, **fgsm_params)
        preds_adv = model.get_logits(adv_x)

        # Evaluate the accuracy of the MNIST model on adversarial examples
        do_eval(preds_adv, x_test, y_test, "clean_train_adv_eval", True)

        # Calculate training error
        if testing:
            do_eval(preds_adv, x_train, y_train, "train_clean_train_adv_eval")

        print("Repeating the process, using adversarial training")

    # Create a new model and train it to be robust to FastGradientMethod
    model2 = make_basic_picklable_cnn()
    # Tag the model so that when it is saved to disk, future scripts will
    # be able to tell what data it was trained on
    model2.dataset_factory = mnist.get_factory()
    fgsm2 = FastGradientMethod(model2, sess=sess)

    def attack(x):
        """Return an adversarial example near clean example `x`"""
        return fgsm2.generate(x, **fgsm_params)

    loss2 = CrossEntropy(model2, smoothing=label_smoothing, attack=attack)
    preds2 = model2.get_logits(x)
    adv_x2 = attack(x)

    if not backprop_through_attack:
        # For the fgsm attack used in this tutorial, the attack has zero
        # gradient so enabling this flag does not change the gradient.
        # For some other attacks, enabling this flag increases the cost of
        # training, but gives the defender the ability to anticipate how
        # the atacker will change their strategy in response to updates to
        # the defender's parameters.
        adv_x2 = tf.stop_gradient(adv_x2)
    preds2_adv = model2.get_logits(adv_x2)

    def evaluate_adv():
        """
        Evaluate the adversarially trained model.
        """
        # Accuracy of adversarially trained model on legitimate test inputs
        do_eval(preds2, x_test, y_test, "adv_train_clean_eval", False)
        # Accuracy of the adversarially trained model on adversarial examples
        do_eval(preds2_adv, x_test, y_test, "adv_train_adv_eval", True)

    # Perform and evaluate adversarial training
    train(
        sess,
        loss2,
        x_train,
        y_train,
        evaluate=evaluate_adv,
        args=train_params,
        rng=rng,
        var_list=model2.get_params(),
    )

    with sess.as_default():
        save("adv_model.joblib", model2)
        print(
            "Now that the model has been saved, you can evaluate it in a "
            "separate process using "
            "`python evaluate_pickled_model.py adv_model.joblib`. "
            "You should get exactly the same result for both clean and "
            "adversarial accuracy as you get within this program."
            " You can also move beyond the tutorials directory and run the "
            " real `compute_accuracy.py` script (make sure cleverhans/scripts "
            "is in your PATH) to see that this FGSM-trained "
            "model is actually not very robust---it's just a model that trains "
            " quickly so the tutorial does not take a long time")

    # Calculate training errors
    if testing:
        do_eval(preds2, x_train, y_train, "train_adv_train_clean_eval")
        do_eval(preds2_adv, x_train, y_train, "train_adv_train_adv_eval")

    return report
def mnist_tutorial(nb_epochs=6, batch_size=128, train_end=-1, test_end=-1,
                   learning_rate=0.001):
    """
    MNIST cleverhans tutorial
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :return: an AccuracyReport object
    """
    # Train a pytorch MNIST model
    torch_model = PytorchMnistModel()
    if torch.cuda.is_available():
        torch_model = torch_model.cuda()
    report = AccuracyReport()

    train_loader = torch.utils.data.DataLoader(
        datasets.MNIST('data', train=True, download=True,
                       transform=transforms.ToTensor()),
        batch_size=batch_size, shuffle=True)
    test_loader = torch.utils.data.DataLoader(
        datasets.MNIST('data', train=False, transform=transforms.ToTensor()),
        batch_size=batch_size)

    # Truncate the datasets so that our test run more quickly
    train_loader.dataset.train_data = train_loader.dataset.train_data[
                                      :train_end]
    test_loader.dataset.test_data = test_loader.dataset.test_data[:test_end]

    # Train our model
    optimizer = optim.Adam(torch_model.parameters(), lr=learning_rate)
    train_loss = []

    total = 0
    correct = 0
    step = 0
    for epoch in range(nb_epochs):
        for xs, ys in train_loader:
            xs, ys = Variable(xs), Variable(ys)
            if torch.cuda.is_available():
                xs, ys = xs.cuda(), ys.cuda()
            optimizer.zero_grad()
            preds = torch_model(xs)
            loss = F.nll_loss(preds, ys)
            loss.backward()  # calc gradients
            train_loss.append(loss.data.item())
            optimizer.step()  # update gradients

            preds_np = preds.data.cpu().numpy()
            correct += (np.argmax(preds_np, axis=1) == ys).sum()
            total += len(xs)
            step += 1
            if total % 1000 == 0:
                acc = float(correct) / total
                print('[%s] Training accuracy: %.2f%%' % (step, acc * 100))
                total = 0
                correct = 0

    # Evaluate on clean data
    total = 0
    correct = 0
    for xs, ys in test_loader:
        xs, ys = Variable(xs), Variable(ys)
        if torch.cuda.is_available():
            xs, ys = xs.cuda(), ys.cuda()

        preds = torch_model(xs)
        preds_np = preds.data.cpu().numpy()

        correct += (np.argmax(preds_np, axis=1) == ys).sum()
        total += len(xs)

    acc = float(correct) / total
    report.clean_train_clean_eval = acc
    print('[%s] Clean accuracy: %.2f%%' % (step, acc * 100))

    # We use tf for evaluation on adversarial data
    sess = tf.Session()
    x_op = tf.placeholder(tf.float32, shape=(None, 1, 28, 28,))

    # Convert pytorch model to a tf_model and wrap it in cleverhans
    tf_model_fn = convert_pytorch_model_to_tf(torch_model)
    cleverhans_model = CallableModelWrapper(tf_model_fn, output_layer='logits')

    # Create an FGSM attack
    fgsm_op = FastGradientMethod(cleverhans_model, sess=sess)
    fgsm_params = {'eps': 0.3,
                   'clip_min': 0.,
                   'clip_max': 1.}
    adv_x_op = fgsm_op.generate(x_op, **fgsm_params)
    adv_preds_op = tf_model_fn(adv_x_op)

    # Run an evaluation of our model against fgsm
    total = 0
    correct = 0
    for xs, ys in test_loader:
        adv_preds = sess.run(adv_preds_op, feed_dict={x_op: xs})
        correct += (np.argmax(adv_preds, axis=1) == ys).sum()
        total += len(xs)

    acc = float(correct) / total
    print('Adv accuracy: {:.3f}'.format(acc * 100))
    report.clean_train_adv_eval = acc
    return report
def mnist_tutorial(train_start=0, train_end=60000, test_start=0,
                   test_end=10000, nb_epochs=6, batch_size=128,
                   learning_rate=0.001, train_dir="train_dir",
                   filename="mnist.ckpt", load_model=False,
                   testing=False, label_smoothing=True):
    """
    MNIST CleverHans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param train_dir: Directory storing the saved model
    :param filename: Filename to save model under
    :param load_model: True for load, False for not load
    :param testing: if true, test error is calculated
    :return: an AccuracyReport object
    """
    keras.layers.core.K.set_learning_phase(0)

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    if not hasattr(backend, "tf"):
        raise RuntimeError("This tutorial requires keras to be configured"
                           " to use the TensorFlow backend.")

    if keras.backend.image_dim_ordering() != 'tf':
        keras.backend.set_image_dim_ordering('tf')
        print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to "
              "'th', temporarily setting to 'tf'")

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)

    # Get MNIST test data
    x_train, y_train, x_test, y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    if label_smoothing:
        label_smooth = .1
        y_train = y_train.clip(label_smooth / (nb_classes-1),
                               1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                          nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Define TF model graph
    model = cnn_model(img_rows=img_rows, img_cols=img_cols,
                      channels=nchannels, nb_filters=64,
                      nb_classes=nb_classes)
    preds = model(x)
    print("Defined TensorFlow model graph.")

    def evaluate():
        # Evaluate the accuracy of the MNIST model on legitimate test examples
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
        report.clean_train_clean_eval = acc
#        assert X_test.shape[0] == test_end - test_start, X_test.shape
        print('Test accuracy on legitimate examples: %0.4f' % acc)

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': train_dir,
        'filename': filename
    }

    rng = np.random.RandomState([2017, 8, 30])
    if not os.path.exists(train_dir):
        os.mkdir(train_dir)

    ckpt = tf.train.get_checkpoint_state(train_dir)
    print(train_dir, ckpt)
    ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path
    wrap = KerasModelWrapper(model)

    if load_model and ckpt_path:
        saver = tf.train.Saver()
        print(ckpt_path)
        saver.restore(sess, ckpt_path)
        print("Model loaded from: {}".format(ckpt_path))
        evaluate()
    else:
        print("Model was not loaded, training from scratch.")
        loss = LossCrossEntropy(wrap, smoothing=0.1)
        train(sess, loss, x, y, x_train, y_train, evaluate=evaluate,
              args=train_params, save=True, rng=rng)

    # Calculate training error
    if testing:
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds, x_train, y_train, args=eval_params)
        report.train_clean_train_clean_eval = acc

    # Initialize the Fast Gradient Sign Method (FGSM) attack object and graph
    fgsm = FastGradientMethod(wrap, sess=sess)
    fgsm_params = {'eps': 0.3,
                   'clip_min': 0.,
                   'clip_max': 1.}
    adv_x = fgsm.generate(x, **fgsm_params)
    # Consider the attack to be constant
    adv_x = tf.stop_gradient(adv_x)
    preds_adv = model(adv_x)

    # Evaluate the accuracy of the MNIST model on adversarial examples
    eval_par = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds_adv, x_test, y_test, args=eval_par)
    print('Test accuracy on adversarial examples: %0.4f\n' % acc)
    report.clean_train_adv_eval = acc

    # Calculating train error
    if testing:
        eval_par = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds_adv, x_train,
                         y_train, args=eval_par)
        report.train_clean_train_adv_eval = acc

    print("Repeating the process, using adversarial training")
    # Redefine TF model graph
    model_2 = cnn_model(img_rows=img_rows, img_cols=img_cols,
                        channels=nchannels, nb_filters=64,
                        nb_classes=nb_classes)
    wrap_2 = KerasModelWrapper(model_2)
    preds_2 = model_2(x)
    fgsm2 = FastGradientMethod(wrap_2, sess=sess)

    def attack(x):
        return fgsm2.generate(x, **fgsm_params)

    preds_2_adv = model_2(attack(x))
    loss_2 = LossCrossEntropy(wrap_2, smoothing=0.1, attack=attack)

    def evaluate_2():
        # Accuracy of adversarially trained model on legitimate test inputs
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess, x, y, preds_2, x_test, y_test,
                              args=eval_params)
        print('Test accuracy on legitimate examples: %0.4f' % accuracy)
        report.adv_train_clean_eval = accuracy

        # Accuracy of the adversarially trained model on adversarial examples
        accuracy = model_eval(sess, x, y, preds_2_adv, x_test,
                              y_test, args=eval_params)
        print('Test accuracy on adversarial examples: %0.4f' % accuracy)
        report.adv_train_adv_eval = accuracy

    # Perform and evaluate adversarial training
    train(sess, loss_2, x, y, x_train, y_train, evaluate=evaluate_2,
          args=train_params, save=False, rng=rng)

    # Calculate training errors
    if testing:
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess, x, y, preds_2, x_train, y_train,
                              args=eval_params)
        report.train_adv_train_clean_eval = accuracy
        accuracy = model_eval(sess, x, y, preds_2_adv, x_train,
                              y_train, args=eval_params)
        report.train_adv_train_adv_eval = accuracy

    return report
예제 #14
0
def mnist_ae(train_start=0, train_end=60000, test_start=0,
                   test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE,
                   clean_train=CLEAN_TRAIN,
                   testing=False,
                   backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                   num_threads=None,
                   label_smoothing=0.1):
  report = AccuracyReport()

  # Set TF random seed to improve reproducibility
  tf.set_random_seed(1234)
  rng = np.random.RandomState()

  source_samples = 10
  # Create TF session
  sess = tf.Session()
  print("Created TensorFlow session.")

  set_log_level(logging.DEBUG)

  if num_threads:
    config_args = dict(intra_op_parallelism_threads=1)
  else:
    config_args = {}
  sess = tf.Session(config=tf.ConfigProto(**config_args))

  # Get CIFAR10 data
  data = CIFAR10(train_start=train_start, train_end=train_end,
                 test_start=test_start, test_end=test_end)
  dataset_size = data.x_train.shape[0]
  dataset_train = data.to_tensorflow()[0]
  dataset_train = dataset_train.map(
      lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
  dataset_train = dataset_train.batch(batch_size)
  dataset_train = dataset_train.prefetch(16)
  x_train, y_train = data.get_set('train')
  x_test, y_test = data.get_set('test')

  nb_latent_size = 100
  # Get MNIST test data
  # Obtain Image Parameters
  img_rows, img_cols, nchannels = x_train.shape[1:4]
  nb_classes = y_train.shape[1]
  print("img_Rows, img_cols, nchannels: ", img_rows, img_cols, nchannels)

  # Define input TF placeholder
  x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
  x_t = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
  y = tf.placeholder(tf.float32, shape=(None, nb_classes))
  y_t = tf.placeholder( tf.float32, shape=(None, nb_classes))
  #z = tf.placeholder(tf.float32, shape = (None, nb_latent_size))
  #z_t = tf.placeholder(tf.float32, shape = (None, nb_latent_size))
  '''
  save_dir= 'models'
  model_name = 'cifar10_AE.h5'
  model_path_ae = os.path.join(save_dir, model_name)
  '''
  #model_ae= ae_model(x, img_rows=img_rows, img_cols=img_cols,
   #                 channels=nchannels)
  #recon = model_ae(x)
  #print("recon: ",recon)
  wrap_ae = ModelVAE('wrap_ae')
  recon = wrap_ae.get_layer(x,'RECON')
  print("Defined TensorFlow model graph.")

  def evaluate_ae():
    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': 128}
    noise, d1, d2, dist_diff, avg_dist_lat = model_eval_ae(sess, x, x_t,recon, x_train, x_train, args=eval_params)
    print("reconstruction distance: ", d1)
  
  # Train an MNIST model
  train_params = {
      'nb_epochs': nb_epochs,
      'batch_size': batch_size,
      'learning_rate': learning_rate,
      #'train_dir': train_dir_ae,
      #'filename': filename
  }
  rng = np.random.RandomState([2017, 8, 30])
  #if not os.path.exists(train_dir_ae):
   # os.mkdir(train_dir_ae)

  #ckpt = tf.train.get_checkpoint_state(train_dir_ae)
  #print(train_dir_ae, ckpt)
  #ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path
  


  if clean_train_vae==True:
    print("Training VAE")
    loss = vae_loss(wrap_ae)
    
    train_ae(sess, loss, x_train, x_train, tf.train.AdamOptimizer(learning_rate=0.0002, beta1=0.5), evaluate=evaluate_ae,
                args=train_params, rng=rng, var_list = wrap_ae.get_params())
    
    saver = tf.train.Saver()
    saver.save(sess, "train_dir/model_vae_fgsm.ckpt")
    print("saved model")
    

  else:
    print("Loading VAE")
    saver = tf.train.Saver()
    #print(ckpt_path)
    saver.restore(sess, "train_dir/model_vae.ckpt")
    evaluate_ae()
    if(train_further):
      train_params = {
        'nb_epochs': 10,
        'batch_size': batch_size,
        'learning_rate': 0.0002,
    }
      #training with the saved model as starting point
      loss = SquaredError(wrap_ae)
      train_ae(sess, loss, x_train, x_train, optimizer = tf.train.AdamOptimizer(learning_rate=0.0002, beta1=0.5), evaluate=evaluate_ae,
            args=train_params, rng=rng)
      saver = tf.train.Saver()
      saver.save(sess, "train_dir/model_vae_fgsm.ckpt")

      evaluate_ae()
    
      print("Model loaded and trained for more epochs")

  num_classes = 10
  '''
  save_dir= 'models'
  model_name = 'cifar10_CNN.h5'
  model_path_cls = os.path.join(save_dir, model_name)
  '''
  cl_model = cnn_cl_model(img_rows=img_rows, img_cols=img_cols,
                    channels=nchannels, nb_filters=64,
                    nb_classes=nb_classes)
  preds_cl = cl_model(x)
  def do_eval_cls(preds, x_set, y_set, x_tar_set,report_key, is_adv = None):
    acc = model_eval(sess, x, y, preds, x_t, x_set, y_set, x_tar_set, args=eval_params_cls)

  def evaluate():
    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds_cl,x_t, x_test, y_test, x_test,args=eval_params)
    report.clean_train_clean_eval = acc
#        assert X_test.shape[0] == test_end - test_start, X_test.shape
    print('Test accuracy on legitimate examples: %0.4f' % acc)

  train_params = {
      'nb_epochs': 100,
      'batch_size': batch_size,
      'learning_rate': learning_rate,
      #'train_dir': train_dir_cl,
      #'filename': filename
  }
  rng = np.random.RandomState([2017, 8, 30])
  
  wrap_cl = KerasModelWrapper(cl_model)

  if clean_train_cl == True:  
    train_params = {
        'nb_epochs': 5,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        #'train_dir': train_dir_cl,
        #'filename': filename
      }
    print("Training CNN Classifier")
    '''
    datagen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    )
    datagen.fit(x_train)
    '''
    loss_cl = CrossEntropy(wrap_cl, smoothing=label_smoothing)
    #for x_batch, y_batch in datagen.flow(x_train, y_train, batch_size = 128):
     # train(sess, loss_cl, x_batch, y_batch, tf.train.AdamOptimizer(learning_rate=0.0002, beta1=0.5), evaluate=evaluate,
      #          args=train_params, rng=rng)
    train(sess, loss_cl, x_train, y_train, evaluate=evaluate, optimizer = tf.train.RMSPropOptimizer(learning_rate = 0.0001, decay = 1e-6),
          args=train_params, rng=rng)
    saver = tf.train.Saver()
    saver.save(sess, "train_dir/model_cnn_cl.ckpt")
    print("saved model at ", "train_dir/model_cnn_cl_fgsm.ckpt")
    
  else:
    print("Loading CNN Classifier")
    saver = tf.train.Saver()
    #print(ckpt_path)
    saver.restore(sess, "train_dir/model_cnn_cl.ckpt")
    evaluate()
    if(train_further):
      train_params = {
        'nb_epochs': 10,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': train_dir_cl,
        'filename': filename
      }
      loss_cl = CrossEntropy(wrap_cl, smoothing=label_smoothing)
      train(sess, loss_cl, x_train, y_train, evaluate=evaluate, optimizer = tf.train.RMSPropOptimizer(learning_rate = 0.0001, decay = 1e-6),
            args=train_params, rng=rng)
      saver = tf.train.Saver()
      saver.save(sess, "train_dir/model_cl_fgsm.ckpt")
      print("Model loaded and trained further")
      evaluate()


  ###########################################################################
  # Craft adversarial examples using Carlini and Wagner's approach
  ###########################################################################
  nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
  print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
        ' adversarial examples')
  print("This could take some time ...")

  # Instantiate a CW attack object
 #cw = CarliniWagnerAE(wrap_ae,wrap_cl, sess=sess)

  if viz_enabled:
    assert source_samples == nb_classes
    idxs = [np.where(np.argmax(y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)]
  if targeted:
    if viz_enabled:
      # Initialize our array for grid visualization
      grid_shape = (nb_classes, nb_classes, img_rows, img_cols,
                    nchannels)
      grid_viz_data = np.zeros(grid_shape, dtype='f')
      grid_viz_data_1 = np.zeros(grid_shape, dtype='f')

      adv_inputs = np.array(
          [[instance] * (nb_classes-1) for instance in x_test[idxs]],
          dtype=np.float32)

      #adv_input_y = np.array([[instance]*(nb_classes-1) for instance in y_test[idxs]])
      
      adv_input_y = []
      for curr_num in range(nb_classes):
        targ = []
        for id in range(nb_classes-1):
            targ.append(y_test[idxs[curr_num]])
        adv_input_y.append(targ)
      
      adv_input_y = np.array(adv_input_y)

      adv_target_y = []
      for curr_num in range(nb_classes):
        targ = []
        for id in range(nb_classes):
          if(id!=curr_num):
            targ.append(y_test[idxs[id]])
        adv_target_y.append(targ)
      
      adv_target_y = np.array(adv_target_y)

      #print("adv_input_y: \n", adv_input_y)
      #print("adv_target_y: \n", adv_target_y)

      adv_input_targets = []
      for curr_num in range(nb_classes):
        targ = []
        for id in range(nb_classes):
          if(id!=curr_num):
            targ.append(x_test[idxs[id]])
        adv_input_targets.append(targ)
      adv_input_targets = np.array(adv_input_targets)

      adv_inputs = adv_inputs.reshape(
        (source_samples * (nb_classes-1), img_rows, img_cols, nchannels))
      adv_input_targets = adv_input_targets.reshape(
        (source_samples * (nb_classes-1), img_rows, img_cols, nchannels))

      adv_input_y = adv_input_y.reshape(source_samples*(nb_classes-1), 10)
      adv_target_y = adv_target_y.reshape(source_samples*(nb_classes-1), 10)

    one_hot = np.zeros((nb_classes, nb_classes))
    one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

    

  adv_ys = np.array([one_hot] * source_samples,
                      dtype=np.float32).reshape((source_samples *
                                                 nb_classes, nb_classes))
  yname = "y_target"

  fgsm_params = {
      'eps': 0.3,
      'clip_min': 0.,
      'clip_max': 1.
  }

  fgsm = FastGradientMethodAe(wrap_ae, sess=sess)
  adv = fgsm.generate(x,x_t, **fgsm_params)

  adv = sess.run(adv, {x: adv_inputs, x_t: adv_input_targets})

  recon_orig = wrap_ae.get_layer(x, 'RECON')
  recon_orig = sess.run(recon_orig, feed_dict = {x: adv_inputs})
  recon_adv = wrap_ae.get_layer(x, 'RECON')
  recon_adv = sess.run(recon_adv, feed_dict = {x: adv})
  pred_adv_recon = wrap_cl.get_logits(x)
  pred_adv_recon = sess.run(pred_adv_recon, {x:recon_adv})

  #scores1 = cl_model.evaluate(recon_adv, adv_input_y, verbose=1)
  #scores2 = cl_model.evaluate(recon_adv, adv_target_y, verbose = 1)
  #acc_1 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_target_y, adv_input_targets, args=eval_params_cls)
  #acc_2 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_input_y, adv_input_targets, args=eval_params_cls)
  shape = np.shape(adv_inputs)
  noise = np.sum(np.square(adv-adv_inputs))/(np.shape(adv)[0])
  noise = pow(noise,0.5)
  d1 = np.sum(np.square(recon_adv-adv_inputs))/(np.shape(adv_inputs)[0])
  d2 = np.sum(np.square(recon_adv-adv_input_targets))/(np.shape(adv_inputs)[0])
  acc_1 = (sum(np.argmax(pred_adv_recon, axis=-1)==
                             np.argmax(adv_target_y, axis=-1)))/(np.shape(adv_target_y)[0])
  acc_2 = (sum(np.argmax(pred_adv_recon, axis=-1)==
                             np.argmax(adv_input_y, axis=-1)))/(np.shape(adv_target_y)[0])
  print("noise: ", noise)
  print("d1: ", d1)
  print("d2: ", d2)
  print("classifier acc_target: ", acc_1)
  print("classifier acc_true: ", acc_2)

  #print("recon_adv[0]\n", recon_adv[0,:,:,0])
  curr_class = 0
  if viz_enabled:
    for j in range(nb_classes):
      if targeted:
        for i in range(nb_classes):
          #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i]
          if(i==j):
            grid_viz_data[i,j] = recon_orig[curr_class*9]
            grid_viz_data_1[i,j] = adv_inputs[curr_class*9]
            curr_class = curr_class+1
          else:
            if(j>i):
              grid_viz_data[i,j] = recon_adv[i*(nb_classes-1) + j-1]
              grid_viz_data_1[i,j] = adv[i*(nb_classes-1)+j-1]
            else:
              grid_viz_data[i,j] = recon_adv[i*(nb_classes-1) + j]
              grid_viz_data_1[i,j] = adv[i*(nb_classes-1)+j]


    #rint(grid_viz_data.shape)

  print('--------------------------------------')

  # Compute the number of adversarial examples that were successfully found

  # Compute the average distortion introduced by the algorithm
  percent_perturbed = np.mean(np.sum((adv - adv_inputs)**2,
                                     axis=(1, 2, 3))**.5)
  print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))
  # Finally, block & display a grid of all the adversarial examples
  
  if viz_enabled:
    
    plt.ioff()
    figure = plt.figure()
    figure.canvas.set_window_title('Cleverhans: Grid Visualization')

    # Add the images to the plot
    num_cols = grid_viz_data.shape[0]
    num_rows = grid_viz_data.shape[1]
    num_channels = grid_viz_data.shape[4]
    for yy in range(num_rows):
      for xx in range(num_cols):
        figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols))
        plt.axis('off')
        plt.imshow(grid_viz_data[xx, yy, :, :, :])

    # Draw the plot and return
    plt.savefig('cifar10_fgsm_vae_fig1')
    figure = plt.figure()
    figure.canvas.set_window_title('Cleverhans: Grid Visualization')
    for yy in range(num_rows):
      for xx in range(num_cols):
        figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols))
        plt.axis('off')
        plt.imshow(grid_viz_data_1[xx, yy, :, :, :])

    # Draw the plot and return
    plt.savefig('cifar10_fgsm_vae_fig2')

  if adversarial_training:

    print("starting adversarial training")

    index_shuf = list(range(len(x_train)))
    x_train_target = x_train[index_shuf]
    y_train_target = y_train[index_shuf]
      # Randomly repeat a few training examples each epoch to avoid
      # having a too-small batch
    '''
    while len(index_shuf) % batch_size != 0:
      index_shuf.append(rng.randint(len(x_train)))
      nb_batches = len(index_shuf) // batch_size
      rng.shuffle(index_shuf)
      # Shuffling here versus inside the loop doesn't seem to affect
      # timing very much, but shuffling here makes the code slightly
      # easier to read
    ''' 
      
    print("len of x_train_target and x_train: ", len(x_train_target), len(x_train))
    for ind in range (0, len(x_train)):
      r_ind = -1
      while(np.argmax(y_train_target[ind])==np.argmax(y_train[ind])):
        r_ind = rng.randint(0,len(x_train))
        y_train_target[ind] = y_train[r_ind]
      if r_ind>-1:  
        x_train_target[ind] = x_train[r_ind]
    wrap_ae2 = ModelVAE('wrap_ae2')
    fgsm2 = FastGradientMethodAe(wrap_ae2, sess=sess)
    adv2 = fgsm.generate(x,x_t, **fgsm_params)

    adv_set = sess.run(adv2, {x: x_train, x_t: x_train_target})
    x_train_aim = np.append(x_train, x_train, axis = 0)
    x_train_app = np.append(x_train, adv_set, axis = 0)
    loss2 =  vae_loss(wrap_ae2)
    train_params = {
        'nb_epochs': 5,
        'batch_size': batch_size,
        'learning_rate': learning_rate}

    train_ae(sess, loss2, x_train_app,  x_train_aim, tf.train.AdamOptimizer(learning_rate=0.0002, beta1=0.5),
                args=train_params, rng=rng, var_list = wrap_ae2.get_params())

    evaluate_ae()

    adv3 = fgsm2.generate(x, x_t, **fgsm_params)
    adv3 = sess.run(adv3, {x: adv_inputs, x_t: adv_input_targets})
    recon_orig2 = wrap_ae2.get_layer(x, 'RECON')
    recon_orig2 = sess.run(recon_orig2, feed_dict = {x: adv_inputs})
    recon_adv2 = wrap_ae2.get_layer(x, 'RECON')
    recon_adv2 = sess.run(recon_adv2, feed_dict = {x: adv3})
    pred_adv_recon2 = wrap_cl.get_logits(x)
    pred_adv_recon2 = sess.run(pred_adv_recon2, {x:recon_adv2})

    shape = np.shape(adv_inputs)
    noise = np.sum(np.square(adv3-adv_inputs))/(np.shape(adv3)[0])
    noise = pow(noise,0.5)
    d1 = np.sum(np.square(recon_adv2-adv_inputs))/(np.shape(adv_inputs)[0])
    d2 = np.sum(np.square(recon_adv2-adv_input_targets))/(np.shape(adv_inputs)[0])
    acc_1 = (sum(np.argmax(pred_adv_recon2, axis=-1)==
                               np.argmax(adv_target_y, axis=-1)))/(np.shape(adv_target_y)[0])
    acc_2 = (sum(np.argmax(pred_adv_recon2, axis=-1)==
                               np.argmax(adv_input_y, axis=-1)))/(np.shape(adv_target_y)[0])
    print("noise: ", noise)
    print("d1: ", d1)
    print("d2: ", d2)
    print("classifier acc_target: ", acc_1)
    print("classifier acc_true: ", acc_2)

    #print("recon_adv[0]\n", recon_adv[0,:,:,0])
    curr_class = 0
    if viz_enabled:
      for j in range(nb_classes):
        if targeted:
          for i in range(nb_classes):
            #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i]
            if(i==j):
              grid_viz_data[i,j] = recon_orig2[curr_class*9]
              grid_viz_data_1[i,j] = adv_inputs[curr_class*9]
              curr_class = curr_class+1
            else:
              if(j>i):
                grid_viz_data[i,j] = recon_adv2[i*(nb_classes-1) + j-1]
                grid_viz_data_1[i,j] = adv3[i*(nb_classes-1)+j-1]
              else:
                grid_viz_data[i,j] = recon_adv2[i*(nb_classes-1) + j]
                grid_viz_data_1[i,j] = adv3[i*(nb_classes-1)+j]


      #rint(grid_viz_data.shape)

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(np.sum((adv - adv_inputs)**2,
                                       axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))
    # Finally, block & display a grid of all the adversarial examples
    
    if viz_enabled:
      
      plt.ioff()
      figure = plt.figure()
      figure.canvas.set_window_title('Cleverhans: Grid Visualization')

      # Add the images to the plot
      num_cols = grid_viz_data.shape[0]
      num_rows = grid_viz_data.shape[1]
      num_channels = grid_viz_data.shape[4]
      for yy in range(num_rows):
        for xx in range(num_cols):
          figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols))
          plt.axis('off')
          plt.imshow(grid_viz_data[xx, yy, :, :, :])

      # Draw the plot and return
      plt.savefig('cifar10_vae_fgsm_adv_fig1')
      figure = plt.figure()
      figure.canvas.set_window_title('Cleverhans: Grid Visualization')
      for yy in range(num_rows):
        for xx in range(num_cols):
          figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols))
          plt.axis('off')
          plt.imshow(grid_viz_data_1[xx, yy, :, :, :])

      # Draw the plot and return
      plt.savefig('cifar10_vae_fgsm_adv_fig2')
    


  #return report
  if binarization:

    print("----------------")
    print("BINARIZATION")

    adv[adv>0.5] = 1.0
    adv[adv<=0.5] = 0.0
    
     
    recon_orig = wrap_ae.get_layer(x, 'RECON')
    recon_adv = wrap_ae.get_layer(x, 'RECON')
    #pred_adv = wrap_cl.get_logits(x)
    recon_orig = sess.run(recon_orig, {x: adv_inputs})
    recon_adv = sess.run(recon_adv, {x: adv})
    #pred_adv = sess.run(pred_adv, {x: recon_adv})
    pred_adv_recon = wrap_cl.get_logits(x)
    pred_adv_recon = sess.run(pred_adv_recon, {x:recon_adv})

    eval_params = {'batch_size': 90}
    if targeted:
     
      noise = np.sum(np.square(adv-adv_inputs))/(np.shape(adv)[0])
      noise = pow(noise,0.5)
      d1 = np.sum(np.square(recon_adv-adv_inputs))/(np.shape(adv_inputs)[0])
      d2 = np.sum(np.square(recon_adv-adv_input_targets))/(np.shape(adv_inputs)[0])
      acc_1 = (sum(np.argmax(pred_adv_recon, axis=-1)==
                               np.argmax(adv_target_y, axis=-1)))/(np.shape(adv_target_y)[0])
      acc_2 = (sum(np.argmax(pred_adv_recon, axis=-1)==
                               np.argmax(adv_input_y, axis=-1)))/(np.shape(adv_target_y)[0])
      print("noise: ", noise)
      print("d1: ", d1)
      print("d2: ", d2)
      print("classifier acc_target: ", acc_1)
      print("classifier acc_true: ", acc_2)


    curr_class = 0
    if viz_enabled:
      for j in range(nb_classes):
          for i in range(nb_classes):
            #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i]
            if(i==j):
              grid_viz_data[i,j] = recon_orig[curr_class*9]
              grid_viz_data_1[i,j] = adv_inputs[curr_class*9]
              curr_class = curr_class+1
            else:
              if(j>i):
                grid_viz_data[i,j] = recon_adv[i*(nb_classes-1) + j-1]
                grid_viz_data_1[i,j] = adv[i*(nb_classes-1)+j-1]
              else:
                grid_viz_data[i,j] = recon_adv[i*(nb_classes-1) + j]
                grid_viz_data_1[i,j] = adv[i*(nb_classes-1)+j]
      

    plt.ioff()
    figure = plt.figure()
    figure.canvas.set_window_title('Cleverhans: Grid Visualization')

    # Add the images to the plot
    num_cols = grid_viz_data.shape[0]
    num_rows = grid_viz_data.shape[1]
    num_channels = grid_viz_data.shape[4]
    for yy in range(num_rows):
      for xx in range(num_cols):
        figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy* num_cols))
        plt.axis('off')

        if num_channels == 1:
          plt.imshow(grid_viz_data[xx, yy, :, :, 0])
        else:
          plt.imshow(grid_viz_data[xx, yy, :, :, :])

    # Draw the plot and return
    plt.savefig('cifar10_fgsm_vae_fig1_bin')
    figure = plt.figure()
    figure.canvas.set_window_title('Cleverhans: Grid Visualization')
    for yy in range(num_rows):
      for xx in range(num_cols):
        figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols))
        plt.axis('off')

        if num_channels == 1:
          plt.imshow(grid_viz_data_1[xx, yy, :, :, 0])
        else:
          plt.imshow(grid_viz_data_1[xx, yy, :, :, :])

    # Draw the plot and return
    plt.savefig('cifar10_fgsm_vae_fig2_bin')

  if(mean_filtering ==True):

      print("----------------")
      print("MEAN FILTERING")

      adv = uniform_filter(adv, 2)

      recon_orig = wrap_ae.get_layer(x, 'RECON')
      recon_adv = wrap_ae.get_layer(x, 'RECON')
      pred_adv_recon = wrap_cl.get_logits(x)
      recon_orig = sess.run(recon_orig, {x: adv_inputs})
      recon_adv = sess.run(recon_adv, {x: adv})
      pred_adv_recon = sess.run(pred_adv_recon, {x: recon_adv})

      eval_params = {'batch_size': 90}
      
      noise = np.sum(np.square(adv-adv_inputs))/(np.shape(adv)[0])
      noise = pow(noise,0.5)
      d1 = np.sum(np.square(recon_adv-adv_inputs))/(np.shape(adv_inputs)[0])
      d2 = np.sum(np.square(recon_adv-adv_input_targets))/(np.shape(adv_inputs)[0])
      acc_1 = (sum(np.argmax(pred_adv_recon, axis=-1)==
                               np.argmax(adv_target_y, axis=-1)))/(np.shape(adv_target_y)[0])
      acc_2 = (sum(np.argmax(pred_adv_recon, axis=-1)==
                               np.argmax(adv_input_y, axis=-1)))/(np.shape(adv_target_y)[0])
      print("noise: ", noise)
      print("d1: ", d1)
      print("d2: ", d2)
      print("classifier acc_target: ", acc_1)
      print("classifier acc_true: ", acc_2)


      curr_class = 0
      if viz_enabled:
        for j in range(nb_classes):
            for i in range(nb_classes):
              #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i]
              if(i==j):
                grid_viz_data[i,j] = recon_orig[curr_class*9]
                grid_viz_data_1[i,j] = adv_inputs[curr_class*9]
                curr_class = curr_class+1
              else:
                if(j>i):
                  grid_viz_data[i,j] = recon_adv[i*(nb_classes-1) + j-1]
                  grid_viz_data_1[i,j] = adv[i*(nb_classes-1)+j-1]
                else:
                  grid_viz_data[i,j] = recon_adv[i*(nb_classes-1) + j]
                  grid_viz_data_1[i,j] = adv[i*(nb_classes-1)+j]
        

      plt.ioff()
      figure = plt.figure()
      figure.canvas.set_window_title('Cleverhans: Grid Visualization')

      # Add the images to the plot
      num_cols = grid_viz_data.shape[0]
      num_rows = grid_viz_data.shape[1]
      num_channels = grid_viz_data.shape[4]
      for yy in range(num_rows):
        for xx in range(num_cols):
          figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy* num_cols))
          plt.axis('off')

          if num_channels == 1:
            plt.imshow(grid_viz_data[xx, yy, :, :, 0])
          else:
            plt.imshow(grid_viz_data[xx, yy, :, :, :])

      # Draw the plot and return
      plt.savefig('cifar10_fgsm_vae_fig1_mean')
      figure = plt.figure()
      figure.canvas.set_window_title('Cleverhans: Grid Visualization')
      for yy in range(num_rows):
        for xx in range(num_cols):
          figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols))
          plt.axis('off')

          if num_channels == 1:
            plt.imshow(grid_viz_data_1[xx, yy, :, :, 0])
          else:
            plt.imshow(grid_viz_data_1[xx, yy, :, :, :])

      # Draw the plot and return
      plt.savefig('cifar10_fgsm_vae_fig2_mean')
예제 #15
0
    def helper_run_multi_gpu_madryetal(self, extra_flags=None):
        """
        Compare the single GPU performance to multiGPU performance.
        """
        # Run the trainers on a dataset of reduced size
        flags = {
            "train_start": 0,
            "train_end": 5000,
            "test_start": 0,
            "test_end": 333,
            "nb_epochs": 5,
            "testing": True,
        }

        # Run the multi-gpu trainer for adversarial training
        flags.update(
            {
                "batch_size": 128,
                "adam_lrn": 0.001,
                "dataset": "mnist",
                "only_adv_train": False,
                "eval_iters": 1,
                "fast_tests": True,
                "save_dir": None,
                "save_steps": 10000,
                "attack_nb_iter_train": 10,
                "sync_step": None,
                "adv_train": True,
                "save": False,
                "model_type": "basic",
                "attack_type_test": "MadryEtAl_y",
            }
        )
        if extra_flags is not None:
            flags.update(extra_flags)

        # Run the multi-gpu trainer for adversarial training using 2 gpus
        # trainer_multigpu by default sets `allow_soft_placement=True`
        flags.update(
            {"ngpu": 2, "attack_type_train": "MadryEtAl_y_multigpu", "sync_step": 1}
        )
        HParams = namedtuple("HParams", flags.keys())

        hparams = HParams(**flags)
        np.random.seed(42)
        tf.set_random_seed(42)
        with tf.variable_scope(None, "runner"):
            report_dict = run_trainer(hparams)
        report_m = AccuracyReport()
        report_m.train_adv_train_clean_eval = report_dict["train"]
        report_m.adv_train_clean_eval = report_dict["test"]
        report_m.adv_train_adv_eval = report_dict["MadryEtAl_y"]

        flags.update({"ngpu": 1, "attack_type_train": "MadryEtAl_y"})
        hparams = HParams(**flags)
        np.random.seed(42)
        tf.set_random_seed(42)
        with tf.variable_scope(None, "runner"):
            report_dict = run_trainer(hparams)
        report_s = AccuracyReport()
        report_s.train_adv_train_clean_eval = report_dict["train"]
        report_s.adv_train_clean_eval = report_dict["test"]
        report_s.adv_train_adv_eval = report_dict["MadryEtAl_y"]

        self.assertClose(
            report_s.train_adv_train_clean_eval,
            report_m.train_adv_train_clean_eval,
            atol=5e-2,
        )
        self.assertClose(
            report_s.adv_train_clean_eval, report_m.adv_train_clean_eval, atol=2e-2
        )
        self.assertClose(
            report_s.adv_train_adv_eval, report_m.adv_train_adv_eval, atol=5e-2
        )
예제 #16
0
def SNNL_example(
    train_start=0,
    train_end=60000,
    test_start=0,
    test_end=10000,
    nb_epochs=NB_EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=LEARNING_RATE,
    nb_filters=NB_FILTERS,
    SNNL_factor=SNNL_FACTOR,
    output_dir=OUTPUT_DIR,
):
    """
    A simple model trained to minimize Cross Entropy and Maximize Soft Nearest
    Neighbor Loss at each internal layer. This outputs a TSNE of the sign of
    the adversarial gradients of a trained model. A model with a negative
    SNNL_factor will show little or no class clusters, while a model with a
    0 SNNL_factor will have class clusters in the adversarial gradient direction.
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param SNNL_factor: multiplier for Soft Nearest Neighbor Loss
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    sess = tf.Session()

    # Get MNIST data
    mnist = MNIST(
        train_start=train_start,
        train_end=train_end,
        test_start=test_start,
        test_end=test_end,
    )
    x_train, y_train = mnist.get_set("train")
    x_test, y_test = mnist.get_set("test")

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Train an MNIST model
    train_params = {
        "nb_epochs": nb_epochs,
        "batch_size": batch_size,
        "learning_rate": learning_rate,
    }
    eval_params = {"batch_size": batch_size}
    rng = np.random.RandomState([2017, 8, 30])

    def do_eval(preds, x_set, y_set, report_key):
        acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        setattr(report, report_key, acc)
        print("Test accuracy on legitimate examples: %0.4f" % (acc))

    model = ModelBasicCNN("model", nb_classes, nb_filters)
    preds = model.get_logits(x)
    cross_entropy_loss = CrossEntropy(model)
    if not SNNL_factor:
        loss = cross_entropy_loss
    else:
        loss = SNNLCrossEntropy(model, factor=SNNL_factor, optimize_temperature=False)

    def evaluate():
        do_eval(preds, x_test, y_test, "clean_train_clean_eval")

    train(
        sess,
        loss,
        x_train,
        y_train,
        evaluate=evaluate,
        args=train_params,
        rng=rng,
        var_list=model.get_params(),
    )

    do_eval(preds, x_train, y_train, "train_clean_train_clean_eval")

    def imscatter(points, images, ax=None, zoom=1, cmap="hot"):
        if ax is None:
            ax = plt.gca()
        artists = []
        i = 0
        if not isinstance(cmap, list):
            cmap = [cmap] * len(points)
        for x0, y0 in points:
            transformed = (images[i] - np.min(images[i])) / (
                np.max(images[i]) - np.min(images[i])
            )
            im = OffsetImage(transformed[:, :, 0], zoom=zoom, cmap=cmap[i])
            ab = AnnotationBbox(im, (x0, y0), xycoords="data", frameon=False)
            artists.append(ax.add_artist(ab))
            i += 1
        ax.update_datalim(np.column_stack(np.transpose(points)))
        ax.autoscale()
        ax.get_xaxis().set_ticks([])
        ax.get_yaxis().set_ticks([])
        return artists

    adv_grads = tf.sign(tf.gradients(cross_entropy_loss.fprop(x, y), x))
    feed_dict = {x: x_test[:batch_size], y: y_test[:batch_size]}
    adv_grads_val = sess.run(adv_grads, feed_dict=feed_dict)
    adv_grads_val = np.reshape(adv_grads_val, (batch_size, img_rows * img_cols))

    X_embedded = TSNE(n_components=2, verbose=0).fit_transform(adv_grads_val)
    plt.figure(num=None, figsize=(50, 50), dpi=40, facecolor="w", edgecolor="k")
    plt.title(
        "TSNE of Sign of Adv Gradients, SNNLCrossEntropy Model, factor:"
        + str(FLAGS.SNNL_factor),
        fontsize=42,
    )
    imscatter(X_embedded, x_test[:batch_size], zoom=2, cmap="Purples")
    plt.savefig(
        output_dir + "adversarial_gradients_SNNL_factor_" + str(SNNL_factor) + ".png"
    )
예제 #17
0
    def test_run_single_gpu_fgsm(self):
        """
        Test the basic single GPU performance by comparing to the FGSM
        tutorial.
        """
        from cleverhans_tutorials import mnist_tutorial_tf

        # Run the MNIST tutorial on a dataset of reduced size
        flags = {
            "train_start": 0,
            "train_end": 5000,
            "test_start": 0,
            "test_end": 333,
            "nb_epochs": 5,
            "testing": True,
        }
        report = mnist_tutorial_tf.mnist_tutorial(**flags)

        # Run the multi-gpu trainer for clean training
        flags.update(
            {
                "batch_size": 128,
                "adam_lrn": 0.001,
                "dataset": "mnist",
                "only_adv_train": False,
                "eval_iters": 1,
                "ngpu": 1,
                "fast_tests": False,
                "attack_type_train": "",
                "save_dir": None,
                "save_steps": 10000,
                "attack_nb_iter_train": None,
                "save": False,
                "model_type": "basic",
                "attack_type_test": "FGSM",
            }
        )

        flags.update({"adv_train": False})
        HParams = namedtuple("HParams", flags.keys())

        hparams = HParams(**flags)
        np.random.seed(42)
        tf.set_random_seed(42)
        with tf.variable_scope(None, "runner"):
            report_dict = run_trainer(hparams)
        report_2 = AccuracyReport()
        report_2.train_clean_train_clean_eval = report_dict["train"]
        report_2.clean_train_clean_eval = report_dict["test"]
        report_2.clean_train_adv_eval = report_dict["FGSM"]

        # Run the multi-gpu trainer for adversarial training
        flags.update({"adv_train": True, "attack_type_train": "FGSM"})
        HParams = namedtuple("HParams", flags.keys())

        hparams = HParams(**flags)
        np.random.seed(42)
        tf.set_random_seed(42)
        with tf.variable_scope(None, "runner"):
            report_dict = run_trainer(hparams)
        report_2.train_adv_train_clean_eval = report_dict["train"]
        report_2.adv_train_clean_eval = report_dict["test"]
        report_2.adv_train_adv_eval = report_dict["FGSM"]

        self.assertClose(
            report.train_clean_train_clean_eval,
            report_2.train_clean_train_clean_eval,
            atol=5e-2,
        )
        self.assertClose(
            report.clean_train_clean_eval, report_2.clean_train_clean_eval, atol=2e-2
        )
        self.assertClose(
            report.clean_train_adv_eval, report_2.clean_train_adv_eval, atol=5e-2
        )
        self.assertClose(
            report.train_adv_train_clean_eval,
            report_2.train_adv_train_clean_eval,
            atol=1e-1,
        )
        self.assertClose(
            report.adv_train_clean_eval, report_2.adv_train_clean_eval, atol=2e-2
        )
        self.assertClose(
            report.adv_train_adv_eval, report_2.adv_train_adv_eval, atol=1e-1
        )
예제 #18
0
    def test_run_single_gpu_fgsm(self):
        """
        Test the basic single GPU performance by comparing to the FGSM
        tutorial.
        """
        from cleverhans_tutorials import mnist_tutorial_tf

        # Run the MNIST tutorial on a dataset of reduced size
        flags = {'train_start': 0,
                 'train_end': 5000,
                 'test_start': 0,
                 'test_end': 333,
                 'nb_epochs': 5,
                 'testing': True}
        report = mnist_tutorial_tf.mnist_tutorial(**flags)

        # Run the multi-gpu trainer for clean training
        flags.update({'batch_size': 128, 'adam_lrn': 0.001,
                      'dataset': 'mnist', 'only_adv_train': False,
                      'eval_iters': 1, 'ngpu': 1, 'fast_tests': False,
                      'attack_type_train': '',
                      'save_dir': None, 'save_steps': 10000,
                      'attack_nb_iter_train': None, 'save': False,
                      'model_type': 'basic', 'attack_type_test': 'FGSM'})

        flags.update({'adv_train': False})
        HParams = namedtuple('HParams', flags.keys())

        hparams = HParams(**flags)
        np.random.seed(42)
        tf.set_random_seed(42)
        with tf.variable_scope(None, 'runner'):
            report_dict = run_trainer(hparams)
        report_2 = AccuracyReport()
        report_2.train_clean_train_clean_eval = report_dict['train']
        report_2.clean_train_clean_eval = report_dict['test']
        report_2.clean_train_adv_eval = report_dict['FGSM']

        # Run the multi-gpu trainer for adversarial training
        flags.update({'adv_train': True,
                      'attack_type_train': 'FGSM',
                      })
        HParams = namedtuple('HParams', flags.keys())

        hparams = HParams(**flags)
        np.random.seed(42)
        tf.set_random_seed(42)
        with tf.variable_scope(None, 'runner'):
            report_dict = run_trainer(hparams)
        report_2.train_adv_train_clean_eval = report_dict['train']
        report_2.adv_train_clean_eval = report_dict['test']
        report_2.adv_train_adv_eval = report_dict['FGSM']

        self.assertClose(report.train_clean_train_clean_eval,
                         report_2.train_clean_train_clean_eval,
                         atol=5e-2)
        self.assertClose(report.clean_train_clean_eval,
                         report_2.clean_train_clean_eval,
                         atol=2e-2)
        self.assertClose(report.clean_train_adv_eval,
                         report_2.clean_train_adv_eval,
                         atol=5e-2)
        self.assertClose(report.train_adv_train_clean_eval,
                         report_2.train_adv_train_clean_eval,
                         atol=1e-1)
        self.assertClose(report.adv_train_clean_eval,
                         report_2.adv_train_clean_eval,
                         atol=2e-2)
        self.assertClose(report.adv_train_adv_eval,
                         report_2.adv_train_adv_eval,
                         atol=1e-1)
예제 #19
0
def mnist_tutorial(train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   nb_epochs=6,
                   batch_size=128,
                   learning_rate=0.001):
    """
    MNIST cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    sess = tf.Session()

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Use label smoothing
    assert Y_train.shape[1] == 10.
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph
    model = make_basic_cnn()
    preds = model.get_probs(x)
    print("Defined TensorFlow model graph.")

    def evaluate():
        # Evaluate the accuracy of the MNIST model on legitimate test examples
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params)
        report.clean_train_clean_eval = acc
        assert X_test.shape[0] == test_end - test_start, X_test.shape
        print('Test accuracy on legitimate examples: %0.4f' % acc)

    model_path = "models/mnist"
    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    model_train(sess,
                x,
                y,
                preds,
                X_train,
                Y_train,
                evaluate=evaluate,
                args=train_params)

    # Initialize the Fast Gradient Sign Method (FGSM) attack object and graph
    fgsm = FastGradientMethod(model, sess=sess)
    fgsm_params = {'eps': 0.3}

    adv_x = fgsm.generate(x, **fgsm_params)
    preds_adv = model.get_probs(adv_x)

    # Evaluate the accuracy of the MNIST model on adversarial examples
    eval_par = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
    print('Test accuracy on adversarial examples: %0.4f\n' % acc)
    report.clean_train_adv_eval = acc

    print("Repeating the process, using adversarial training")
    # Redefine TF model graph
    model_2 = make_basic_cnn()
    preds_2 = model_2(x)
    fgsm2 = FastGradientMethod(model_2, sess=sess)
    preds_2_adv = model_2(fgsm2.generate(x, **fgsm_params))

    def evaluate_2():
        # Accuracy of adversarially trained model on legitimate test inputs
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2,
                              X_test,
                              Y_test,
                              args=eval_params)
        print('Test accuracy on legitimate examples: %0.4f' % accuracy)
        report.adv_train_clean_eval = accuracy

        # Accuracy of the adversarially trained model on adversarial examples
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2_adv,
                              X_test,
                              Y_test,
                              args=eval_params)
        print('Test accuracy on adversarial examples: %0.4f' % accuracy)
        report.adv_train_adv_eval = accuracy

    # Perform and evaluate adversarial training
    model_train(sess,
                x,
                y,
                preds_2,
                X_train,
                Y_train,
                predictions_adv=preds_2_adv,
                evaluate=evaluate_2,
                args=train_params)

    return report
예제 #20
0
from cleverhans.utils_keras import KerasModelWrapper
from cleverhans.utils_tf import model_eval
from lib.utils import load_gtsrb
from parameters import *
from small_net import *
from stn.conv_model import conv_model_no_color_adjust
import logging

import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

keras.layers.core.K.set_learning_phase(0)

# Object used to keep track of (and return) key accuracies
report = AccuracyReport()

# Set TF random seed to improve reproducibility
tf.set_random_seed(1234)

if K.image_dim_ordering() != 'tf':
    K.set_image_dim_ordering('tf')
    print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to "
          "'th', temporarily setting to 'tf'")

# Create TF session and set as Keras backend session
sess = tf.Session()
K.set_session(sess)

set_log_level(logging.DEBUG)
예제 #21
0
def mnist_tutorial_cw(train_start=0, train_end=60000, test_start=0,
                      test_end=10000, viz_enabled=True, nb_epochs=6,
                      batch_size=128, source_samples=10,
                      learning_rate=0.001, attack_iterations=100,
                      model_path=os.path.join("models", "mnist"),
                      targeted=True):
    """
    MNIST tutorial for Carlini and Wagner's attack
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param source_samples: number of test inputs to attack
    :param learning_rate: learning rate for training
    :param model_path: path to the model file
    :param targeted: should we run a targeted attack? or untargeted?
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    x_train, y_train, x_test, y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                          nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    nb_filters = 64

    # Define TF model graph
    model = ModelBasicCNN('model1', nb_classes, nb_filters)
    preds = model.get_logits(x)
    loss = LossCrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': os.path.join(*os.path.split(model_path)[:-1]),
        'filename': os.path.split(model_path)[-1]
    }

    rng = np.random.RandomState([2017, 8, 30])
    # check if we've trained before, and if we have, use that pre-trained model
    if os.path.exists(model_path + ".meta"):
        tf_model_load(sess, model_path)
    else:
        train(sess, loss, x, y, x_train, y_train, args=train_params,
              save=os.path.exists("models"), rng=rng)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using Carlini and Wagner's approach
    ###########################################################################
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
    print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
          ' adversarial examples')
    print("This could take some time ...")

    # Instantiate a CW attack object
    cw = CarliniWagnerL2(model, back='tf', sess=sess)

    if viz_enabled:
        assert source_samples == nb_classes
        idxs = [np.where(np.argmax(y_test, axis=1) == i)[0][0]
                for i in range(nb_classes)]
    if targeted:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, nb_classes, img_rows, img_cols,
                          nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = np.array(
                [[instance] * nb_classes for instance in x_test[idxs]],
                dtype=np.float32)
        else:
            adv_inputs = np.array(
                [[instance] * nb_classes for
                 instance in x_test[:source_samples]], dtype=np.float32)

        one_hot = np.zeros((nb_classes, nb_classes))
        one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

        adv_inputs = adv_inputs.reshape(
            (source_samples * nb_classes, img_rows, img_cols, nchannels))
        adv_ys = np.array([one_hot] * source_samples,
                          dtype=np.float32).reshape((source_samples *
                                                     nb_classes, nb_classes))
        yname = "y_target"
    else:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, 2, img_rows, img_cols, nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = x_test[idxs]
        else:
            adv_inputs = x_test[:source_samples]

        adv_ys = None
        yname = "y"

    cw_params = {'binary_search_steps': 1,
                 yname: adv_ys,
                 'max_iterations': attack_iterations,
                 'learning_rate': 0.1,
                 'batch_size': source_samples * nb_classes if
                 targeted else source_samples,
                 'initial_const': 10}

    adv = cw.generate_np(adv_inputs,
                         **cw_params)

    eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
    if targeted:
        adv_accuracy = model_eval(
            sess, x, y, preds, adv, adv_ys, args=eval_params)
    else:
        if viz_enabled:
            adv_accuracy = 1 - \
                model_eval(sess, x, y, preds, adv, y_test[
                           idxs], args=eval_params)
        else:
            adv_accuracy = 1 - \
                model_eval(sess, x, y, preds, adv, y_test[
                           :source_samples], args=eval_params)

    if viz_enabled:
        for j in range(nb_classes):
            if targeted:
                for i in range(nb_classes):
                    grid_viz_data[i, j] = adv[i * nb_classes + j]
            else:
                grid_viz_data[j, 0] = adv_inputs[j]
                grid_viz_data[j, 1] = adv[j]

        print(grid_viz_data.shape)

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy))
    report.clean_train_adv_eval = 1. - adv_accuracy

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(np.sum((adv - adv_inputs)**2,
                                       axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        import matplotlib.pyplot as plt
        _ = grid_visual(grid_viz_data)

    return report
예제 #22
0
def mnist_tutorial(train_start=0, train_end=60000, test_start=0,
                   test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE, train_dir=TRAIN_DIR,
                   filename=FILENAME, load_model=LOAD_MODEL,
                   testing=True, label_smoothing=0.1):
  """
  MNIST CleverHans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param train_dir: Directory storing the saved model
  :param filename: Filename to save model under
  :param load_model: True for load, False for not load
  :param testing: if true, test error is calculated
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """
  tf.keras.backend.set_learning_phase(0)

  # Object used to keep track of (and return) key accuracies
  report = AccuracyReport()

  # Set TF random seed to improve reproducibility
  tf.set_random_seed(1234)

  if keras.backend.image_data_format() != 'channels_last':
    raise NotImplementedError("this tutorial requires keras to be configured to channels_last format")

  # Create TF session and set as Keras backend session
  sess = tf.Session()
  keras.backend.set_session(sess)

  # Get MNIST test data
  mnist = MNIST(train_start=train_start, train_end=train_end,
                test_start=test_start, test_end=test_end)
  [x_adv_train, x_adv_test] = pickle.load(open(PKLDATA+'.pkl'))
  x_train, y_train = mnist.get_set('train')
  x_train = x_adv_train
  x_test, y_test = mnist.get_set('test')
  x_test = x_adv_test

  # Obtain Image Parameters
  img_rows, img_cols, nchannels = x_train.shape[1:4]
  nb_classes = y_train.shape[1]

  # Define input TF placeholder
  x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
  y = tf.placeholder(tf.float32, shape=(None, nb_classes))

  # Define TF model graph
  model = cnn_model(img_rows=img_rows, img_cols=img_cols,
                    channels=nchannels, nb_filters=64,
                    nb_classes=nb_classes)
  preds = model(x)
  print("Defined TensorFlow model graph.")

  def evaluate():
    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    report.clean_train_clean_eval = acc
    print('Test accuracy on adversarial examples: %0.4f' % acc)

  # Train an MNIST model
  train_params = {
      'nb_epochs': nb_epochs,
      'batch_size': batch_size,
      'learning_rate': learning_rate,
      'train_dir': train_dir,
      'filename': filename
  }

  rng = np.random.RandomState([2017, 8, 30])
  if not os.path.exists(train_dir):
    os.mkdir(train_dir)

  ckpt = tf.train.get_checkpoint_state(train_dir)
  print(train_dir, ckpt)
  ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path
  wrap = KerasModelWrapper(model)

  if load_model and ckpt_path:
    saver = tf.train.Saver()
    print(ckpt_path)
    saver.restore(sess, ckpt_path)
    print("Model loaded from: {}".format(ckpt_path))
    evaluate()
  else:
    print("Model was not loaded, training from scratch.")
    loss = CrossEntropy(wrap, smoothing=label_smoothing)
    train(sess, loss, x_train, y_train, evaluate=evaluate,
          args=train_params, rng=rng)

  # Calculate training error
  if testing:
    eval_params = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds, x_train, y_train, args=eval_params)
    report.train_clean_train_clean_eval = acc

  # Initialize the Fast Gradient Sign Method (FGSM) attack object and graph
  filter_x = binary_filter(x)

  batch = 1000

  def convert(x_input):
    x_output = None
    for i in tqdm(range(int(len(x_input) / batch))):
      tmp = sess.run(filter_x, feed_dict={x: x_input[i * batch:(i + 1) * batch]})
      if x_output is None:
        x_output = tmp
      else:
        x_output = np.concatenate((x_output, tmp))
    return x_output

  x_filter_test = convert(x_test)
  x_filter_train = convert(x_train)


  def evaluate_adv():
    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds, x_filter_test, y_test, args=eval_params)
    report.clean_train_clean_eval = acc
    print('Test accuracy on filtered examples: %0.4f' % acc)

  evaluate_adv()
예제 #23
0
    def helper_run_multi_gpu_madryetal(self, extra_flags=None):
        """
    Compare the single GPU performance to multiGPU performance.
    """
        # Run the trainers on a dataset of reduced size
        flags = {
            'train_start': 0,
            'train_end': 5000,
            'test_start': 0,
            'test_end': 333,
            'nb_epochs': 5,
            'testing': True
        }

        # Run the multi-gpu trainer for adversarial training
        flags.update({
            'batch_size': 128,
            'adam_lrn': 0.001,
            'dataset': 'mnist',
            'only_adv_train': False,
            'eval_iters': 1,
            'fast_tests': True,
            'save_dir': None,
            'save_steps': 10000,
            'attack_nb_iter_train': 10,
            'sync_step': None,
            'adv_train': True,
            'save': False,
            'model_type': 'basic',
            'attack_type_test': 'MadryEtAl_y'
        })
        if extra_flags is not None:
            flags.update(extra_flags)

        # Run the multi-gpu trainer for adversarial training using 2 gpus
        # trainer_multigpu by default sets `allow_soft_placement=True`
        flags.update({
            'ngpu': 2,
            'attack_type_train': 'MadryEtAl_y_multigpu',
            'sync_step': 1
        })
        HParams = namedtuple('HParams', flags.keys())

        hparams = HParams(**flags)
        np.random.seed(42)
        tf.compat.v1.set_random_seed(42)
        with tf.compat.v1.variable_scope(None, 'runner'):
            report_dict = run_trainer(hparams)
        report_m = AccuracyReport()
        report_m.train_adv_train_clean_eval = report_dict['train']
        report_m.adv_train_clean_eval = report_dict['test']
        report_m.adv_train_adv_eval = report_dict['MadryEtAl_y']

        flags.update({'ngpu': 1, 'attack_type_train': 'MadryEtAl_y'})
        hparams = HParams(**flags)
        np.random.seed(42)
        tf.compat.v1.set_random_seed(42)
        with tf.compat.v1.variable_scope(None, 'runner'):
            report_dict = run_trainer(hparams)
        report_s = AccuracyReport()
        report_s.train_adv_train_clean_eval = report_dict['train']
        report_s.adv_train_clean_eval = report_dict['test']
        report_s.adv_train_adv_eval = report_dict['MadryEtAl_y']

        self.assertClose(report_s.train_adv_train_clean_eval,
                         report_m.train_adv_train_clean_eval,
                         atol=5e-2)
        self.assertClose(report_s.adv_train_clean_eval,
                         report_m.adv_train_clean_eval,
                         atol=2e-2)
        self.assertClose(report_s.adv_train_adv_eval,
                         report_m.adv_train_adv_eval,
                         atol=5e-2)
예제 #24
0
def cifar10_cw_recon(train_start=0,
                     train_end=60000,
                     test_start=0,
                     test_end=10000,
                     viz_enabled=VIZ_ENABLED,
                     nb_epochs=NB_EPOCHS,
                     batch_size=BATCH_SIZE,
                     source_samples=SOURCE_SAMPLES,
                     learning_rate=LEARNING_RATE,
                     attack_iterations=ATTACK_ITERATIONS,
                     model_path=MODEL_PATH,
                     model_path_cls=MODEL_PATH,
                     targeted=TARGETED,
                     num_threads=None,
                     label_smoothing=0.1,
                     nb_filters=NB_FILTERS,
                     filename=FILENAME,
                     train_dir_ae=TRAIN_DIR_AE,
                     train_dir_cl=TRAIN_DIR_CL):

    # Object used to keep track of (and return) key accuracies

    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)
    rng = np.random.RandomState()

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get CIFAR10 data
    data = CIFAR10(train_start=train_start,
                   train_end=train_end,
                   test_start=test_start,
                   test_end=test_end)
    dataset_size = data.x_train.shape[0]
    dataset_train = data.to_tensorflow()[0]
    dataset_train = dataset_train.map(
        lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
    dataset_train = dataset_train.batch(batch_size)
    dataset_train = dataset_train.prefetch(16)
    x_train, y_train = data.get_set('train')
    x_test, y_test = data.get_set('test')

    nb_latent_size = 100
    # Get MNIST test data
    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]
    print("img_Rows, img_cols, nchannels: ", img_rows, img_cols, nchannels)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    x_t = tf.placeholder(tf.float32,
                         shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    y_t = tf.placeholder(tf.float32, shape=(None, nb_classes))

    #model_vae= vae_model(x, img_rows=img_rows, img_cols=img_cols,
    #                 channels=nchannels)

    wrap_vae = ModelVAE('wrap_vae')
    recon = wrap_vae.get_layer(x, 'RECON')
    #print("recon: ",recon)
    print("Defined TensorFlow model graph.")

    def evaluate_ae():
        # Evaluate the accuracy of the MNIST model on legitimate test examples
        eval_params = {'batch_size': 128}
        noise, d1, d2, dist_diff, avg_dist_lat = model_eval_ae(
            sess, x, x_t, recon, x_train, x_train, args=eval_params)
        print("reconstruction distance: ", d1)

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': train_dir_ae,
        'filename': filename
    }
    rng = np.random.RandomState([2017, 8, 30])
    if not os.path.exists(train_dir_ae):
        os.mkdir(train_dir_ae)

    #ckpt = tf.train.get_checkpoint_state(train_dir_ae)
    #print(train_dir_ae, ckpt)
    #ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path
    #wrap_vae = KerasModelWrapper(model_vae)
    latent_dim = 20
    intermediate_dim = 128

    #train_ae(sess, global_loss, x_train, x_train, evaluate = evaluate_ae, args = train_params, rng = rng, var_list=wrap_vae.get_params())

    if clean_train_vae == True:
        print("Training VAE")
        loss = vae_loss(wrap_vae)
        train_ae(sess,
                 loss,
                 x_train,
                 x_train,
                 evaluate=evaluate_ae,
                 args=train_params,
                 rng=rng,
                 var_list=wrap_vae.get_params())
        saver = tf.train.Saver()
        saver.save(sess, "train_dir/model_vae.ckpt")
        print("saved model")

    else:
        print("Loading VAE")
        saver = tf.train.Saver()
        #print(ckpt_path)
        saver.restore(sess, "train_dir/model_vae.ckpt")
        evaluate_ae()
        if (train_further):
            train_params = {
                'nb_epochs': 10,
                'batch_size': batch_size,
                'learning_rate': learning_rate,
                'train_dir': train_dir_ae,
                'filename': filename
            }
            #training with the saved model as starting point
            loss = SquaredError(wrap_vae)
            train_ae(sess,
                     loss,
                     x_train,
                     x_train,
                     evaluate=evaluate_vae,
                     args=train_params,
                     rng=rng)
            saver = tf.train.Saver()
            saver.save(sess, "train_dir/model_ae_final.ckpt")

            evaluate_ae()

            print("Model loaded and trained for more epochs")

    num_classes = 10
    '''
  save_dir= 'models'
  model_name = 'cifar10_CNN.h5'
  model_path_cls = os.path.join(save_dir, model_name)
  '''
    cl_model = cnn_cl_model(img_rows=img_rows,
                            img_cols=img_cols,
                            channels=nchannels,
                            nb_filters=64,
                            nb_classes=nb_classes)
    preds_cl = cl_model(x)

    def do_eval_cls(preds, x_set, y_set, x_tar_set, report_key, is_adv=None):
        acc = model_eval(sess,
                         x,
                         y,
                         preds,
                         x_t,
                         x_set,
                         y_set,
                         x_tar_set,
                         args=eval_params_cls)

    def evaluate():
        # Evaluate the accuracy of the MNIST model on legitimate test examples
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess,
                         x,
                         y,
                         preds_cl,
                         x_t,
                         x_test,
                         y_test,
                         x_test,
                         args=eval_params)
        report.clean_train_clean_eval = acc
        #        assert X_test.shape[0] == test_end - test_start, X_test.shape
        print('Test accuracy on legitimate examples: %0.4f' % acc)

    train_params = {
        'nb_epochs': 3,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': train_dir_cl,
        'filename': filename
    }
    rng = np.random.RandomState([2017, 8, 30])
    if not os.path.exists(train_dir_cl):
        os.mkdir(train_dir_cl)

    #ckpt = tf.train.get_checkpoint_state(train_dir_cl)
    #print(train_dir_cl, ckpt)
    #ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path
    wrap_cl = KerasModelWrapper(cl_model)

    if clean_train_cl == True:
        print("Training CNN Classifier")
        loss_cl = CrossEntropy(wrap_cl, smoothing=label_smoothing)
        train(sess,
              loss_cl,
              x_train,
              y_train,
              evaluate=evaluate,
              optimizer=tf.train.RMSPropOptimizer(learning_rate=0.0001,
                                                  decay=1e-6),
              args=train_params,
              rng=rng)
        saver = tf.train.Saver()
        saver.save(sess, "train_dir/model_cnn_cl_vae.ckpt")
        print("saved model at ", "train_dir/model_cnn_cl.ckpt")

    else:
        print("Loading CNN Classifier")
        saver = tf.train.Saver()
        #print(ckpt_path)
        saver.restore(sess, "train_dir/model_cnn_cl_vae.ckpt")
        print("Model loaded")
        evaluate()

        # Score trained model.
    '''
  scores = cl_model.evaluate(x_test, y_test, verbose=1)
  print('Test loss:', scores[0])
  print('Test accuracy:', scores[1])
  cl_model_wrap = KerasModelWrapper(cl_model)
` '''
    ###########################################################################
    # Craft adversarial examples using Carlini and Wagner's approach
    ###########################################################################
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
    print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
          ' adversarial examples')
    print("This could take some time ...")

    # Instantiate a CW attack object
    cw = CarliniWagnerAE(wrap_vae, wrap_cl, sess=sess)

    if viz_enabled:
        assert source_samples == nb_classes
        idxs = [
            np.where(np.argmax(y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)
        ]
    if targeted:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, nb_classes, img_rows, img_cols,
                          nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')
            grid_viz_data_1 = np.zeros(grid_shape, dtype='f')

            adv_inputs = np.array([[instance] * (nb_classes - 1)
                                   for instance in x_test[idxs]],
                                  dtype=np.float32)

            #adv_input_y = np.array([[instance]*(nb_classes-1) for instance in y_test[idxs]])

            adv_input_y = []
            for curr_num in range(nb_classes):
                targ = []
                for id in range(nb_classes - 1):
                    targ.append(y_test[idxs[curr_num]])
                adv_input_y.append(targ)

            adv_input_y = np.array(adv_input_y)

            adv_target_y = []
            for curr_num in range(nb_classes):
                targ = []
                for id in range(nb_classes):
                    if (id != curr_num):
                        targ.append(y_test[idxs[id]])
                adv_target_y.append(targ)

            adv_target_y = np.array(adv_target_y)

            #print("adv_input_y: \n", adv_input_y)
            #print("adv_target_y: \n", adv_target_y)

            adv_input_targets = []
            for curr_num in range(nb_classes):
                targ = []
                for id in range(nb_classes):
                    if (id != curr_num):
                        targ.append(x_test[idxs[id]])
                adv_input_targets.append(targ)
            adv_input_targets = np.array(adv_input_targets)

            adv_inputs = adv_inputs.reshape((source_samples * (nb_classes - 1),
                                             img_rows, img_cols, nchannels))
            adv_input_targets = adv_input_targets.reshape(
                (source_samples * (nb_classes - 1), img_rows, img_cols,
                 nchannels))

            adv_input_y = adv_input_y.reshape(
                source_samples * (nb_classes - 1), 10)
            adv_target_y = adv_target_y.reshape(
                source_samples * (nb_classes - 1), 10)

        one_hot = np.zeros((nb_classes, nb_classes))
        one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

    adv_ys = np.array([one_hot] * source_samples, dtype=np.float32).reshape(
        (source_samples * nb_classes, nb_classes))
    yname = "y_target"

    cw_params_batch_size = source_samples * (nb_classes - 1)

    cw_params = {
        'binary_search_steps': 1,
        yname: adv_ys,
        'max_iterations': attack_iterations,
        'learning_rate': CW_LEARNING_RATE,
        'batch_size': cw_params_batch_size,
        'initial_const': 1
    }

    adv = cw.generate_np(adv_inputs, adv_input_targets, **cw_params)
    #adv = sess.run(adv)

    #print("layer names: \n", wrap_vae.get_layer_names())
    recon_orig = wrap_vae.get_layer(x, 'RECON')
    recon_orig = sess.run(recon_orig, feed_dict={x: adv_inputs})
    recon_adv = wrap_vae.get_layer(x, 'RECON')
    recon_adv = sess.run(recon_adv, feed_dict={x: adv})
    pred_adv_recon = wrap_cl.get_logits(x)
    pred_adv_recon = sess.run(pred_adv_recon, {x: recon_adv})

    #scores1 = cl_model.evaluate(recon_adv, adv_input_y, verbose=1)
    #scores2 = cl_model.evaluate(recon_adv, adv_target_y, verbose = 1)
    #acc_1 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_target_y, adv_input_targets, args=eval_params_cls)
    #acc_2 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_input_y, adv_input_targets, args=eval_params_cls)
    shape = np.shape(adv_inputs)
    noise = np.sum(np.square(adv - adv_inputs)) / (np.shape(adv)[0])
    noise = pow(noise, 0.5)
    d1 = np.sum(np.square(recon_adv - adv_inputs)) / (np.shape(adv_inputs)[0])
    d2 = np.sum(
        np.square(recon_adv - adv_input_targets)) / (np.shape(adv_inputs)[0])
    acc_1 = (sum(
        np.argmax(pred_adv_recon, axis=-1) == np.argmax(adv_target_y, axis=-1))
             ) / (np.shape(adv_target_y)[0])
    acc_2 = (sum(
        np.argmax(pred_adv_recon, axis=-1) == np.argmax(adv_input_y, axis=-1))
             ) / (np.shape(adv_target_y)[0])
    print("noise: ", noise)
    print("d1: ", d1)
    print("d2: ", d2)
    print("classifier acc_target: ", acc_1)
    print("classifier acc_true: ", acc_2)

    #print("recon_adv[0]\n", recon_adv[0,:,:,0])
    curr_class = 0
    if viz_enabled:
        for j in range(nb_classes):
            if targeted:
                for i in range(nb_classes):
                    #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i]
                    if (i == j):
                        grid_viz_data[i, j] = recon_orig[curr_class * 9]
                        grid_viz_data_1[i, j] = adv_inputs[curr_class * 9]
                        curr_class = curr_class + 1
                    else:
                        if (j > i):
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j - 1]
                            grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) +
                                                        j - 1]
                        else:
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j]
                            grid_viz_data_1[i,
                                            j] = adv[i * (nb_classes - 1) + j]

        #rint(grid_viz_data.shape)

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(
        np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))
    # Finally, block & display a grid of all the adversarial examples

    if viz_enabled:

        plt.ioff()
        figure = plt.figure()
        figure.canvas.set_window_title('Cleverhans: Grid Visualization')

        # Add the images to the plot
        num_cols = grid_viz_data.shape[0]
        num_rows = grid_viz_data.shape[1]
        num_channels = grid_viz_data.shape[4]
        for yy in range(num_rows):
            for xx in range(num_cols):
                figure.add_subplot(num_rows, num_cols,
                                   (xx + 1) + (yy * num_cols))
                plt.axis('off')
                plt.imshow(grid_viz_data[xx, yy, :, :, :])

        # Draw the plot and return
        plt.savefig('cifar10_vae_fig1')
        figure = plt.figure()
        figure.canvas.set_window_title('Cleverhans: Grid Visualization')
        for yy in range(num_rows):
            for xx in range(num_cols):
                figure.add_subplot(num_rows, num_cols,
                                   (xx + 1) + (yy * num_cols))
                plt.axis('off')
                plt.imshow(grid_viz_data_1[xx, yy, :, :, :])

        # Draw the plot and return
        plt.savefig('cifar10_vae_fig2')

    #return report

    #adversarial training
    if (adv_train == True):

        print("starting adversarial training")
        #sess1 = tf.Session()
        adv_input_set = []
        adv_input_target_set = []

        for i in range(20):

            indices = np.arange(np.shape(x_train)[0])
            np.random.shuffle(indices)
            print("indices: ", indices[1:10])
            x_train = x_train[indices]
            y_train = y_train[indices]

            idxs = [
                np.where(np.argmax(y_train, axis=1) == i)[0][0]
                for i in range(nb_classes)
            ]
            adv_inputs_2 = np.array([[instance] * (nb_classes - 1)
                                     for instance in x_train[idxs]],
                                    dtype=np.float32)
            adv_input_targets_2 = []
            for curr_num in range(nb_classes):
                targ = []
                for id in range(nb_classes):
                    if (id != curr_num):
                        targ.append(x_train[idxs[id]])
                adv_input_targets_2.append(targ)
            adv_input_targets_2 = np.array(adv_input_targets_2)

            adv_inputs_2 = adv_inputs_2.reshape(
                (source_samples * (nb_classes - 1), img_rows, img_cols,
                 nchannels))
            adv_input_targets_2 = adv_input_targets_2.reshape(
                (source_samples * (nb_classes - 1), img_rows, img_cols,
                 nchannels))

            adv_input_set.append(adv_inputs_2)
            adv_input_target_set.append(adv_input_targets_2)

        adv_input_set = np.array(adv_input_set),
        adv_input_target_set = np.array(adv_input_target_set)
        print("shape of adv_input_set: ", np.shape(adv_input_set))
        print("shape of adv_input_target_set: ",
              np.shape(adv_input_target_set))
        adv_input_set = np.reshape(
            adv_input_set,
            (np.shape(adv_input_set)[0] * np.shape(adv_input_set)[1] *
             np.shape(adv_input_set)[2], np.shape(adv_input_set)[3],
             np.shape(adv_input_set)[4], np.shape(adv_input_set)[5]))
        adv_input_target_set = np.reshape(adv_input_target_set,
                                          (np.shape(adv_input_target_set)[0] *
                                           np.shape(adv_input_target_set)[1],
                                           np.shape(adv_input_target_set)[2],
                                           np.shape(adv_input_target_set)[3],
                                           np.shape(adv_input_target_set)[4]))

        print("generated adversarial training set")

        adv_set = cw.generate_np(adv_input_set, adv_input_target_set,
                                 **cw_params)

        x_train_aim = np.append(x_train, adv_input_set, axis=0)
        x_train_app = np.append(x_train, adv_set, axis=0)

        #model_name = 'cifar10_AE_adv.h5'
        #model_path_ae = os.path.join(save_dir, model_name)

        model_ae_adv = ae_model(x,
                                img_rows=img_rows,
                                img_cols=img_cols,
                                channels=nchannels)
        recon = model_ae_adv(x)
        wrap_vae_adv = KerasModelWrapper(model_ae_adv)
        #print("recon: ",recon)
        #print("Defined TensorFlow model graph.")

        print("Training Adversarial AE")
        loss = SquaredError(wrap_vae_adv)
        train_ae(sess,
                 loss_2,
                 x_train_app,
                 x_train_aim,
                 evaluate=evaluate_ae,
                 args=train_params,
                 rng=rng)
        saver = tf.train.Saver()
        saver.save(sess, "train_dir/model_ae_adv.ckpt")
        print("saved model")

        cw2 = CarliniWagnerAE(wrap_vae_adv, wrap_cl, sess=sess)

        adv_2 = cw2.generate_np(adv_inputs, adv_input_targets, **cw_params)

        recon_adv = wrap_vae_adv.get_layer(x, 'RECON')
        recon_orig = wrap_vae_adv.get_layer(x, 'RECON')
        recon_adv = sess.run(recon_adv, {x: adv_2})
        recon_orig = sess.run(recon_orig, {x: adv_inputs})
        pred_adv_recon = wrap_cl.get_logits(x)
        pred_adv_recon = sess.run(pred_adv_recon, {x: recon_adv})

        if targeted:

            noise = reduce_sum(tf.square(adv_inputs - adv_2),
                               list(range(1, len(shape))))
            print("noise: ", noise)
        pred_adv_recon = cl_model.get_layer(recon_adv)
        #scores1 = cl_model.evaluate(recon_adv, adv_input_y, verbose=1)
        #scores2 = cl_model.eval_params(recon_adv, adv_target_y, verbose = 1)
        #acc_1 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_target_y, adv_input_targets, args=eval_params_cls)
        #acc_2 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_input_y, adv_input_targets, args=eval_params_cls)
        noise = np.sum(np.square(adv - adv_inputs)) / (np.shape(adv)[0])
        noise = pow(noise, 0.5)
        d1 = np.sum(
            np.square(recon_adv - adv_inputs)) / (np.shape(adv_inputs)[0])
        d2 = np.sum(np.square(recon_adv -
                              adv_input_targets)) / (np.shape(adv_inputs)[0])
        acc_1 = (sum(
            np.argmax(pred_adv_recon, axis=-1) == np.argmax(
                adv_target_y, axis=-1))) / (np.shape(adv_target_y)[0])
        acc_2 = (sum(
            np.argmax(pred_adv_recon, axis=-1) == np.argmax(
                adv_input_y, axis=-1))) / (np.shape(adv_target_y)[0])
        print("noise: ", noise)
        print("d1: ", d1)
        print("d2: ", d2)
        print("classifier acc_target: ", acc_1)
        print("classifier acc_true: ", acc_2)

        #print("recon_adv[0]\n", recon_adv[0,:,:,0])
        curr_class = 0
        if viz_enabled:
            for j in range(nb_classes):
                for i in range(nb_classes):
                    #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i]
                    if (i == j):
                        grid_viz_data[i, j] = recon_orig[curr_class * 9]
                        grid_viz_data_1[i, j] = adv_inputs[curr_class * 9]
                        curr_class = curr_class + 1
                    else:
                        if (j > i):
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j - 1]
                            grid_viz_data_1[i,
                                            j] = adv_2[i * (nb_classes - 1) +
                                                       j - 1]
                        else:
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j]
                            grid_viz_data_1[i, j] = adv_2[i *
                                                          (nb_classes - 1) + j]

            #rint(grid_viz_data.shape)

        print('--------------------------------------')

        # Compute the number of adversarial examples that were successfully found

        # Compute the average distortion introduced by the algorithm
        percent_perturbed = np.mean(
            np.sum((adv_2 - adv_inputs)**2, axis=(1, 2, 3))**.5)
        print(
            'Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

        # Close TF session
        #sess.close()

        # Finally, block & display a grid of all the adversarial examples
        if viz_enabled:
            #_ = grid_visual(grid_viz_data)
            #_ = grid_visual(grid_viz_data_1)
            plt.ioff()
            figure = plt.figure()
            figure.canvas.set_window_title('Cleverhans: Grid Visualization')

            # Add the images to the plot
            num_cols = grid_viz_data.shape[0]
            num_rows = grid_viz_data.shape[1]
            num_channels = grid_viz_data.shape[4]
            for yy in range(num_rows):
                for xx in range(num_cols):
                    figure.add_subplot(num_rows, num_cols,
                                       (xx + 1) + (yy * num_cols))
                    plt.axis('off')

                    if num_channels == 1:
                        plt.imshow(grid_viz_data[xx, yy, :, :, 0])
                    else:
                        plt.imshow(grid_viz_data[xx, yy, :, :, :])

            # Draw the plot and return
            plt.savefig('cifar10_fig1_vae_adv_trained')
            figure = plt.figure()
            figure.canvas.set_window_title('Cleverhans: Grid Visualization')
            for yy in range(num_rows):
                for xx in range(num_cols):
                    figure.add_subplot(num_rows, num_cols,
                                       (xx + 1) + (yy * num_cols))
                    plt.axis('off')
                    plt.imshow(grid_viz_data_1[xx, yy, :, :, :])

            # Draw the plot and return
            plt.savefig('cifar10_fig2_vae_adv_trained')

            #return report


#binarization defense
#if(binarization_defense == True or mean_filtering==True):
    if (binarization_defense == True):

        print("BINARIZATION")
        print("---------------------------")
        adv[adv > 0.5] = 1.0
        adv[adv <= 0.5] = 0.0

        recon_orig = wrap_vae.get_layer(x, 'RECON')
        recon_adv = wrap_vae.get_layer(x, 'RECON')
        #pred_adv = wrap_cl.get_logits(x)
        recon_orig = sess.run(recon_orig, {x: adv_inputs})
        recon_adv = sess.run(recon_adv, {x: adv})
        #pred_adv = sess.run(pred_adv, {x: recon_adv})
        pred_adv_recon = wrap_cl.get_logits(x)
        pred_adv_recon = sess.run(pred_adv_recon, {x: recon_adv})

        eval_params = {'batch_size': 90}
        if targeted:

            noise = np.sum(np.square(adv - adv_inputs)) / (np.shape(adv)[0])
            noise = pow(noise, 0.5)
            d1 = np.sum(
                np.square(recon_adv - adv_inputs)) / (np.shape(adv_inputs)[0])
            d2 = np.sum(np.square(recon_adv - adv_input_targets)) / (
                np.shape(adv_inputs)[0])
            acc_1 = (sum(
                np.argmax(pred_adv_recon, axis=-1) == np.argmax(
                    adv_target_y, axis=-1))) / (np.shape(adv_target_y)[0])
            acc_2 = (sum(
                np.argmax(pred_adv_recon, axis=-1) == np.argmax(
                    adv_input_y, axis=-1))) / (np.shape(adv_target_y)[0])
            print("noise: ", noise)
            print("d1: ", d1)
            print("d2: ", d2)
            print("classifier acc_target: ", acc_1)
            print("classifier acc_true: ", acc_2)

        curr_class = 0
        if viz_enabled:
            for j in range(nb_classes):
                for i in range(nb_classes):
                    #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i]
                    if (i == j):
                        grid_viz_data[i, j] = recon_orig[curr_class * 9]
                        grid_viz_data_1[i, j] = adv_inputs[curr_class * 9]
                        curr_class = curr_class + 1
                    else:
                        if (j > i):
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j - 1]
                            grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) +
                                                        j - 1]
                        else:
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j]
                            grid_viz_data_1[i,
                                            j] = adv[i * (nb_classes - 1) + j]

        plt.ioff()
        figure = plt.figure()
        figure.canvas.set_window_title('Cleverhans: Grid Visualization')

        # Add the images to the plot
        num_cols = grid_viz_data.shape[0]
        num_rows = grid_viz_data.shape[1]
        num_channels = grid_viz_data.shape[4]
        for yy in range(num_rows):
            for xx in range(num_cols):
                figure.add_subplot(num_rows, num_cols,
                                   (xx + 1) + (yy * num_cols))
                plt.axis('off')

                if num_channels == 1:
                    plt.imshow(grid_viz_data[xx, yy, :, :, 0])
                else:
                    plt.imshow(grid_viz_data[xx, yy, :, :, :])

        # Draw the plot and return
        plt.savefig('cifar10_vae_fig1_bin')
        figure = plt.figure()
        figure.canvas.set_window_title('Cleverhans: Grid Visualization')
        for yy in range(num_rows):
            for xx in range(num_cols):
                figure.add_subplot(num_rows, num_cols,
                                   (xx + 1) + (yy * num_cols))
                plt.axis('off')

                if num_channels == 1:
                    plt.imshow(grid_viz_data_1[xx, yy, :, :, 0])
                else:
                    plt.imshow(grid_viz_data_1[xx, yy, :, :, :])

        # Draw the plot and return
        plt.savefig('cifar10_vae_fig2_bin')

    if (mean_filtering == True):

        print("MEAN FILTERING")
        print("---------------------------")
        adv = uniform_filter(adv, 2)

        recon_orig = wrap_vae.get_layer(x, 'RECON')
        recon_adv = wrap_vae.get_layer(x, 'RECON')
        pred_adv_recon = wrap_cl.get_logits(x)
        recon_orig = sess.run(recon_orig, {x: adv_inputs})
        recon_adv = sess.run(recon_adv, {x: adv})
        pred_adv_recon = sess.run(pred_adv_recon, {x: recon_adv})

        eval_params = {'batch_size': 90}

        noise = np.sum(np.square(adv - adv_inputs)) / (np.shape(adv)[0])
        noise = pow(noise, 0.5)
        d1 = np.sum(
            np.square(recon_adv - adv_inputs)) / (np.shape(adv_inputs)[0])
        d2 = np.sum(np.square(recon_adv -
                              adv_input_targets)) / (np.shape(adv_inputs)[0])
        acc_1 = (sum(
            np.argmax(pred_adv_recon, axis=-1) == np.argmax(
                adv_target_y, axis=-1))) / (np.shape(adv_target_y)[0])
        acc_2 = (sum(
            np.argmax(pred_adv_recon, axis=-1) == np.argmax(
                adv_input_y, axis=-1))) / (np.shape(adv_target_y)[0])
        print("noise: ", noise)
        print("d1: ", d1)
        print("d2: ", d2)
        print("classifier acc_target: ", acc_1)
        print("classifier acc_true: ", acc_2)

        curr_class = 0
        if viz_enabled:
            for j in range(nb_classes):
                for i in range(nb_classes):
                    #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i]
                    if (i == j):
                        grid_viz_data[i, j] = recon_orig[curr_class * 9]
                        grid_viz_data_1[i, j] = adv_inputs[curr_class * 9]
                        curr_class = curr_class + 1
                    else:
                        if (j > i):
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j - 1]
                            grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) +
                                                        j - 1]
                        else:
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j]
                            grid_viz_data_1[i,
                                            j] = adv[i * (nb_classes - 1) + j]

        plt.ioff()
        figure = plt.figure()
        figure.canvas.set_window_title('Cleverhans: Grid Visualization')

        # Add the images to the plot
        num_cols = grid_viz_data.shape[0]
        num_rows = grid_viz_data.shape[1]
        num_channels = grid_viz_data.shape[4]
        for yy in range(num_rows):
            for xx in range(num_cols):
                figure.add_subplot(num_rows, num_cols,
                                   (xx + 1) + (yy * num_cols))
                plt.axis('off')

                if num_channels == 1:
                    plt.imshow(grid_viz_data[xx, yy, :, :, 0])
                else:
                    plt.imshow(grid_viz_data[xx, yy, :, :, :])

        # Draw the plot and return
        plt.savefig('cifar10_vae_fig1_mean')
        figure = plt.figure()
        figure.canvas.set_window_title('Cleverhans: Grid Visualization')
        for yy in range(num_rows):
            for xx in range(num_cols):
                figure.add_subplot(num_rows, num_cols,
                                   (xx + 1) + (yy * num_cols))
                plt.axis('off')

                if num_channels == 1:
                    plt.imshow(grid_viz_data_1[xx, yy, :, :, 0])
                else:
                    plt.imshow(grid_viz_data_1[xx, yy, :, :, :])

        # Draw the plot and return
        plt.savefig('cifar10_vae_fig2_mean')
예제 #25
0
    def test_run_single_gpu_fgsm(self):
        """
    Test the basic single GPU performance by comparing to the FGSM
    tutorial.
    """
        from cleverhans_tutorials import mnist_tutorial_tf

        # Run the MNIST tutorial on a dataset of reduced size
        flags = {
            'train_start': 0,
            'train_end': 5000,
            'test_start': 0,
            'test_end': 333,
            'nb_epochs': 5,
            'testing': True
        }
        report = mnist_tutorial_tf.mnist_tutorial(**flags)

        # Run the multi-gpu trainer for clean training
        flags.update({
            'batch_size': 128,
            'adam_lrn': 0.001,
            'dataset': 'mnist',
            'only_adv_train': False,
            'eval_iters': 1,
            'ngpu': 1,
            'fast_tests': False,
            'attack_type_train': '',
            'save_dir': None,
            'save_steps': 10000,
            'attack_nb_iter_train': None,
            'save': False,
            'model_type': 'basic',
            'attack_type_test': 'FGSM'
        })

        flags.update({'adv_train': False})
        HParams = namedtuple('HParams', flags.keys())

        hparams = HParams(**flags)
        np.random.seed(42)
        tf.compat.v1.set_random_seed(42)
        with tf.compat.v1.variable_scope(None, 'runner'):
            report_dict = run_trainer(hparams)
        report_2 = AccuracyReport()
        report_2.train_clean_train_clean_eval = report_dict['train']
        report_2.clean_train_clean_eval = report_dict['test']
        report_2.clean_train_adv_eval = report_dict['FGSM']

        # Run the multi-gpu trainer for adversarial training
        flags.update({'adv_train': True, 'attack_type_train': 'FGSM'})
        HParams = namedtuple('HParams', flags.keys())

        hparams = HParams(**flags)
        np.random.seed(42)
        tf.compat.v1.set_random_seed(42)
        with tf.compat.v1.variable_scope(None, 'runner'):
            report_dict = run_trainer(hparams)
        report_2.train_adv_train_clean_eval = report_dict['train']
        report_2.adv_train_clean_eval = report_dict['test']
        report_2.adv_train_adv_eval = report_dict['FGSM']

        self.assertClose(report.train_clean_train_clean_eval,
                         report_2.train_clean_train_clean_eval,
                         atol=5e-2)
        self.assertClose(report.clean_train_clean_eval,
                         report_2.clean_train_clean_eval,
                         atol=2e-2)
        self.assertClose(report.clean_train_adv_eval,
                         report_2.clean_train_adv_eval,
                         atol=5e-2)
        self.assertClose(report.train_adv_train_clean_eval,
                         report_2.train_adv_train_clean_eval,
                         atol=1e-1)
        self.assertClose(report.adv_train_clean_eval,
                         report_2.adv_train_clean_eval,
                         atol=2e-2)
        self.assertClose(report.adv_train_adv_eval,
                         report_2.adv_train_adv_eval,
                         atol=1e-1)
def JSMA_FGSM(train_start=0,
              train_end=60000,
              test_start=0,
              test_end=10000,
              nb_epochs=6,
              batch_size=128,
              learning_rate=0.001,
              clean_train=True,
              testing=False,
              backprop_through_attack=False,
              nb_filters=64):
    """
    MNIST cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param testing: if true, complete an AccuracyReport for unit tests
                    to verify that performance is adequate
    :param backprop_through_attack: If True, backprop through adversarial
                                    example construction process during
                                    adversarial training.
    :param clean_train: if true, train on clean examples
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    sess = tf.Session()

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)
    source_samples = batch_size
    # Use label smoothing
    # Hopefully this doesn't screw up JSMA...
    assert Y_train.shape[1] == 10
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    model_path = "models/mnist"
    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_par = {'batch_size': batch_size}
    rng = np.random.RandomState([2017, 8, 30])

    if clean_train:
        model = make_basic_cnn(nb_filters=nb_filters)
        preds = model.get_probs(x)

        def evaluate():
            # Evaluate the accuracy of the MNIST model on legitimate test
            # examples
            eval_params = {'batch_size': batch_size}
            acc = model_eval(sess,
                             x,
                             y,
                             preds,
                             X_test,
                             Y_test,
                             args=eval_params)
            report.clean_train_clean_eval = acc
            assert X_test.shape[0] == test_end - test_start, X_test.shape
            print('Test accuracy on legitimate examples: %0.4f' % acc)

        model_train(sess,
                    x,
                    y,
                    preds,
                    X_train,
                    Y_train,
                    evaluate=evaluate,
                    args=train_params,
                    rng=rng)
        print("#####Starting attacks on clean model#####")
        #################################################################
        #Clean test against JSMA
        jsma_params = {
            'theta': 1.,
            'gamma': 0.1,
            'clip_min': 0.,
            'clip_max': 1.,
            'y_target': None
        }

        jsma = SaliencyMapMethod(model, back='tf', sess=sess)
        adv_x = jsma.generate(x, **jsma_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on JSMA adversarial examples: %0.4f' % acc)
        ################################################################
        #Clean test against FGSM
        fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}

        fgsm = FastGradientMethod(model, sess=sess)
        adv_x = fgsm.generate(x, **fgsm_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on FGSM adversarial examples: %0.4f' % acc)
        ################################################################
        #Clean test against BIM
        bim_params = {
            'eps': 0.3,
            'eps_iter': 0.01,
            'nb_iter': 100,
            'clip_min': 0.,
            'clip_max': 1.
        }
        bim = BasicIterativeMethod(model, sess=sess)
        adv_x = bim.generate(x, **bim_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on BIM adversarial examples: %0.4f' % acc)
        ################################################################
        #Clean test against EN
        en_params = {
            'binary_search_steps': 1,
            #'y': None,
            'max_iterations': 100,
            'learning_rate': 0.1,
            'batch_size': source_samples,
            'initial_const': 10
        }
        en = ElasticNetMethod(model, back='tf', sess=sess)
        adv_x = en.generate(x, **en_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on EN adversarial examples: %0.4f' % acc)
        ################################################################
        #Clean test against DF
        deepfool_params = {
            'nb_candidate': 10,
            'overshoot': 0.02,
            'max_iter': 50,
            'clip_min': 0.,
            'clip_max': 1.
        }
        deepfool = DeepFool(model, sess=sess)
        adv_x = deepfool.generate(x, **deepfool_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on DF adversarial examples: %0.4f' % acc)
        ################################################################
        #Clean test against VAT
        vat_params = {
            'eps': 2.0,
            'num_iterations': 1,
            'xi': 1e-6,
            'clip_min': 0.,
            'clip_max': 1.
        }
        vat = VirtualAdversarialMethod(model, sess=sess)
        adv_x = vat.generate(x, **vat_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on VAT adversarial examples: %0.4f\n' % acc)
        ################################################################
        print("Repeating the process, using adversarial training\n")
    # Redefine TF model graph
    model_2 = make_basic_cnn(nb_filters=nb_filters)
    preds_2 = model_2(x)
    #################################################################
    #Adversarial test against JSMA
    jsma_params = {
        'theta': 1.,
        'gamma': 0.1,
        'clip_min': 0.,
        'clip_max': 1.,
        'y_target': None
    }

    jsma = SaliencyMapMethod(model, back='tf', sess=sess)
    adv_x = jsma.generate(x, **jsma_params)
    preds_adv_jsma = model.get_probs(adv_x)
    ################################################################
    #Adversarial test against FGSM
    fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}

    fgsm = FastGradientMethod(model, sess=sess)
    adv_x = fgsm.generate(x, **fgsm_params)
    preds_adv_fgsm = model.get_probs(adv_x)
    ################################################################
    #Adversarial test against BIM
    bim_params = {
        'eps': 0.3,
        'eps_iter': 0.01,
        'nb_iter': 100,
        'clip_min': 0.,
        'clip_max': 1.
    }
    bim = BasicIterativeMethod(model, sess=sess)
    adv_x = bim.generate(x, **bim_params)
    preds_adv_bim = model.get_probs(adv_x)
    ################################################################
    #Adversarial test against EN
    en_params = {
        'binary_search_steps': 5,
        #'y': None,
        'max_iterations': 100,
        'learning_rate': 0.1,
        'batch_size': source_samples,
        'initial_const': 10
    }
    en = ElasticNetMethod(model, back='tf', sess=sess)
    adv_x = en.generate(x, **en_params)
    preds_adv_en = model.get_probs(adv_x)
    ################################################################
    #Adversarial test against DF
    deepfool_params = {
        'nb_candidate': 10,
        'overshoot': 0.02,
        'max_iter': 200,
        'clip_min': 0.,
        'clip_max': 1.
    }
    deepfool = DeepFool(model, sess=sess)
    adv_x = deepfool.generate(x, **deepfool_params)
    preds_adv_df = model.get_probs(adv_x)
    ################################################################
    #Adversarial test against VAT
    vat_params = {
        'eps': 2.0,
        'num_iterations': 1,
        'xi': 1e-6,
        'clip_min': 0.,
        'clip_max': 1.
    }
    vat = VirtualAdversarialMethod(model, sess=sess)
    adv_x = vat.generate(x, **vat_params)
    preds_adv_vat = model.get_probs(adv_x)
    ################################################################
    print("#####Evaluate trained model#####")

    def evaluate_2():
        # Evaluate the accuracy of the MNIST model on JSMA adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_jsma,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on JSMA adversarial examples: %0.4f' % acc)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_fgsm,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on FGSM adversarial examples: %0.4f' % acc)

        # Evaluate the accuracy of the MNIST model on BIM adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_bim,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on BIM adversarial examples: %0.4f' % acc)

        # Evaluate the accuracy of the MNIST model on EN adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_en,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on EN adversarial examples: %0.4f' % acc)

        # Evaluate the accuracy of the MNIST model on DF adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_df,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on DF adversarial examples: %0.4f' % acc)

        # Evaluate the accuracy of the MNIST model on VAT adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_vat,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on VAT adversarial examples: %0.4f\n' % acc)

    preds_2_adv = [
        preds_adv_jsma, preds_adv_fgsm
        # ,preds_adv_bim
        # ,preds_adv_en
        # ,preds_adv_df
    ]

    model_train(sess,
                x,
                y,
                preds_2,
                X_train,
                Y_train,
                predictions_adv=preds_2_adv,
                evaluate=evaluate_2,
                args=train_params,
                rng=rng)
예제 #27
0
def mnist_tutorial(train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   nb_epochs=NB_EPOCHS,
                   batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE,
                   train_dir=TRAIN_DIR,
                   filename=FILENAME,
                   load_model=LOAD_MODEL,
                   testing=False,
                   label_smoothing=0.1,
                   save_model=SAVE_MODEL,
                   attack_method=ATTACK_METHOD,
                   model_type=MODEL_TYPE):
    """
  MNIST CleverHans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param train_dir: Directory storing the saved model
  :param filename: Filename to save model under
  :param load_model: True for load, False for not load
  :param testing: if true, test error is calculated
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """
    keras.layers.core.K.set_learning_phase(0)

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    if not hasattr(backend, "tf"):
        raise RuntimeError("This tutorial requires keras to be configured"
                           " to use the TensorFlow backend.")

    if keras.backend.image_dim_ordering() != 'tf':
        keras.backend.set_image_dim_ordering('tf')
        print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to "
              "'th', temporarily setting to 'tf'")

    # Create TF session and set as Keras backend session
    os.environ["CUDA_VISIBLE_DEVICES"] = '1'  # only use No.0 GPU
    config = tf.ConfigProto()
    config.allow_soft_placement = True
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    keras.backend.set_session(sess)

    # Get MNIST test data
    mnist = MNIST(train_start=train_start,
                  train_end=train_end,
                  test_start=test_start,
                  test_end=test_end)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Define TF model graph
    the_model = modelA
    if model_type == 'a':
        the_model = modelA
    elif model_type == 'b':
        the_model = modelB
    elif model_type == 'c':
        the_model = modelC
    else:
        exit('the model type must be a or b or c.')
    model = the_model(img_rows=img_rows,
                      img_cols=img_cols,
                      channels=nchannels,
                      nb_filters=64,
                      nb_classes=nb_classes)
    preds = model(x)
    print("Defined TensorFlow model graph.")

    def evaluate():
        # Evaluate the accuracy of the MNIST model on legitimate test examples
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
        report.clean_train_clean_eval = acc
        #        assert X_test.shape[0] == test_end - test_start, X_test.shape
        print('Test accuracy on legitimate examples: %0.4f' % acc)

    # Train an MNIST model
    train_dir = train_dir + '/' + model_type
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': train_dir,
        'filename': filename
    }

    rng = np.random.RandomState([2017, 8, 30])
    if not os.path.exists(train_dir):
        os.mkdir(train_dir)

    ckpt = tf.train.get_checkpoint_state(train_dir)
    print(train_dir, ckpt)
    ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path
    wrap = KerasModelWrapper(model)

    if load_model and ckpt_path:
        saver = tf.train.Saver()
        print(ckpt_path)
        saver.restore(sess, ckpt_path)
        print("Model loaded from: {}".format(ckpt_path))
        evaluate()
    else:
        print("Model was not loaded, training from scratch.")
        loss = CrossEntropy(wrap, smoothing=label_smoothing)
        train(sess,
              loss,
              x_train,
              y_train,
              evaluate=evaluate,
              args=train_params,
              rng=rng)
        if save_model:
            saver = tf.train.Saver(max_to_keep=1)
            saver.save(sess,
                       '{}/mnist.ckpt'.format(train_dir),
                       global_step=NB_EPOCHS)
            keras.models.save_model(
                model, '{}/{}_mnist.h5'.format(train_dir, model_type))
            print("model has been saved")

    # Calculate training error
    if testing:
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds, x_train, y_train, args=eval_params)
        report.train_clean_train_clean_eval = acc

    # Initialize the Basic Iterative Method (BIM) attack object and graph
    if attack_method == 'fgsm':
        att_method = FastGradientMethod(wrap, sess=sess)
        att_method_params = {'eps': 0.2, 'clip_min': 0., 'clip_max': 1.}
    elif attack_method == 'bim':
        att_method = BasicIterativeMethod(wrap, sess=sess)
        att_method_params = {
            'eps': 0.2,
            'eps_iter': 0.06,
            'nb_iter': 10,
            'clip_min': 0.,
            'clip_max': 1.
        }
    elif attack_method == 'mifgsm':
        att_method = MomentumIterativeMethod(wrap, sess=sess)
        att_method_params = {
            'eps': 0.2,
            'eps_iter': 0.08,
            'nb_iter': 10,
            'decay_factor': 0.4,
            'clip_min': 0.,
            'clip_max': 1.
        }
    else:
        exit("the attack method must be fgsm,bim,mifgsm")

    print(att_method_params)
    adv_x = att_method.generate(x, **att_method_params)
    # Consider the attack to be constant
    adv_x = tf.stop_gradient(adv_x)
    preds_adv = model(adv_x)

    # Evaluate the accuracy of the MNIST model on adversarial examples
    eval_par = {'batch_size': batch_size}
    start_time = time.time()
    acc = model_eval(sess, x, y, preds_adv, x_test, y_test, args=eval_par)

    print('Test accuracy on adversarial examples: %0.4f' % acc)
    end_time = time.time()
    print("{} attack time is {}\n".format(attack_method,
                                          end_time - start_time))
    report.clean_train_adv_eval = acc

    #save_acc = np.array(save_acc)
    #record = pd.DataFrame(save_acc,columns=["decay","acc"])
    #record.to_csv("result/mnist_fc_decay__change.csv",index=False)

    tmp_train = sess.run(adv_x, feed_dict={
        x: x_test[0:1000]
    }).reshape(1000, 28, 28)
    for i in range(1, 10):
        tmp_data = sess.run(adv_x,
                            feed_dict={
                                x: x_test[i * 1000:(i + 1) * 1000]
                            }).reshape(1000, 28, 28)
        tmp_train = np.concatenate([tmp_train, tmp_data])
    np.save("{}_{}_test_adv.npy".format(attack_method, model_type), tmp_train)

    gc.collect()
예제 #28
0
def run_adversarial(num_epochs=NUM_EPOCHS, batch_size=BATCH_SIZE,
                   train_end=-1, test_end=-1):
    """
    MNIST cleverhans tutorial
    :param num_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :return: an AccuracyReport object
    """
    # Train a pytorch MNIST model
    torch_model = LeNetCNN()
    if torch.cuda.is_available():
        torch_model = torch_model.cuda()
    report = AccuracyReport()

    train_loader = torch.utils.data.DataLoader(
        datasets.MNIST('data', train=True, download=True,
                     transform=transforms.ToTensor()),
        batch_size=batch_size, shuffle=True)
    test_loader = torch.utils.data.DataLoader(
        datasets.MNIST('data', train=False, transform=transforms.ToTensor()),
        batch_size=batch_size)

    # Train our model
    optimizer = optim.Adam(torch_model.parameters())
    train_loss = []

    total = 0
    correct = 0
    step = 0
    for _epoch in range(num_epochs):
        for xs, ys in train_loader:
            xs, ys = Variable(xs), Variable(ys)
            if torch.cuda.is_available():
                xs, ys = xs.cuda(), ys.cuda()
            optimizer.zero_grad()
            preds = torch_model(xs)
            loss = F.nll_loss(preds, ys)
            loss.backward()  # calc gradients
            train_loss.append(loss.data.item())
            optimizer.step()  # update gradients

            preds_np = preds.cpu().detach().numpy()
            correct += (np.argmax(preds_np, axis=1) == ys.cpu().detach().numpy()).sum()
            total += train_loader.batch_size
            step += 1
            if total % 1000 == 0:
                acc = float(correct) / total
                print('[%s] Training accuracy: %.2f%%' % (step, acc * 100))
                total = 0
                correct = 0

    # Evaluate on clean data
    total = 0
    correct = 0
    for xs, ys in test_loader:
        xs, ys = Variable(xs), Variable(ys)
        if torch.cuda.is_available():
            xs, ys = xs.cuda(), ys.cuda()

        preds = torch_model(xs)
        preds_np = preds.cpu().detach().numpy()

        correct += (np.argmax(preds_np, axis=1) == ys.cpu().detach().numpy()).sum()
        total += len(xs)

    acc = float(correct) / total
    report.clean_train_clean_eval = acc
    print('[%s] Clean accuracy: %.2f%%' % (step, acc * 100))

    # We use tf for evaluation on adversarial data
    sess = tf.Session()
    x_op = tf.placeholder(tf.float32, shape=(None, 1, 28, 28,))

    # Convert pytorch model to a tf_model and wrap it in cleverhans
    tf_model_fn = convert_pytorch_model_to_tf(torch_model)
    cleverhans_model = CallableModelWrapper(tf_model_fn, output_layer='logits')

    # Create an FGSM attack
    fgsm_op = FastGradientMethod(cleverhans_model, sess=sess)
    fgsm_params = {'eps': 0.3,
                   'clip_min': 0.,
                   'clip_max': 1.}
    adv_x_op = fgsm_op.generate(x_op, **fgsm_params)
    adv_preds_op = tf_model_fn(adv_x_op)

    # Run an evaluation of our model against fgsm
    total = 0
    correct = 0
    for xs, ys in test_loader:
        adv_preds = sess.run(adv_preds_op, feed_dict={x_op: xs})
        correct += (np.argmax(adv_preds, axis=1) == ys.cpu().detach().numpy()).sum()
        total += test_loader.batch_size

    acc = float(correct) / total
    print('Adv accuracy: {:.3f}'.format(acc * 100))
    report.clean_train_adv_eval = acc
    return report
def cifar10_tutorial_cw(train_start=0, train_end=60000, test_start=0,
                      test_end=10000, viz_enabled=VIZ_ENABLED,
                      nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                      source_samples=SOURCE_SAMPLES,
                      learning_rate=LEARNING_RATE,
                      attack_iterations=ATTACK_ITERATIONS,
                      model_path=MODEL_PATH,
                      targeted=TARGETED,
                      noise_output=NOISE_OUTPUT):
  """
  CIFAR10 tutorial for Carlini and Wagner's attack
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param viz_enabled: (boolean) activate plots of adversarial examples
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param nb_classes: number of output classes
  :param source_samples: number of test inputs to attack
  :param learning_rate: learning rate for training
  :param model_path: path to the model file
  :param targeted: should we run a targeted attack? or untargeted?
  :return: an AccuracyReport object
  """
  # Object used to keep track of (and return) key accuracies
  report = AccuracyReport()

  # Set TF random seed to improve reproducibility
  tf.set_random_seed(1234)

  # Create TF session
  sess = tf.Session()
  print("Created TensorFlow session.")

  set_log_level(logging.DEBUG)

  # Get Fashion MNIST test data
  fashion = keras.datasets.fashion_mnist
  (x_train, y_train), (x_test, y_test) = fashion.load_data()
  # cifar10 = CIFAR10(train_start=train_start, train_end=train_end,
  #               test_start=test_start, test_end=test_end)
  # x_train, y_train = cifar10.get_set('train')
  # x_test, y_test = cifar10.get_set('test')
  x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
  x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
  y_train = np_utils.to_categorical(y_train, 10)
  y_test = np_utils.to_categorical(y_test, 10)
  x_train = x_train.astype('float32')
  x_test = x_test.astype('float32')
  x_train /= 255
  x_test /= 255

  # Obtain Image Parameters
  img_rows, img_cols = x_train.shape[1:3]
  nchannels = 1
  nb_classes = 10

  # Define input TF placeholder
  x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
  y = tf.placeholder(tf.float32, shape=(None, nb_classes))
  nb_filters = 64

  # Define TF model graph
  model = ModelBasicCNN('model1', nb_classes, nb_filters)
  preds = model.get_logits(x)
  loss = CrossEntropy(model, smoothing=0.1)
  print("Defined TensorFlow model graph.")

  ###########################################################################
  # Training the model using TensorFlow
  ###########################################################################

  # Train an CIFAR10 model
  train_params = {
      'nb_epochs': nb_epochs,
      'batch_size': batch_size,
      'learning_rate': learning_rate,
      'filename': os.path.split(model_path)[-1]
  }

  rng = np.random.RandomState([2017, 8, 30])
  # check if we've trained before, and if we have, use that pre-trained model
  if os.path.exists(model_path + ".meta"):
    tf_model_load(sess, model_path)
  else:
    train(sess, loss, x_train, y_train, args=train_params, rng=rng)
    saver = tf.train.Saver()
    saver.save(sess, model_path)

  # Evaluate the accuracy of the CIFAR10 model on legitimate test examples
  eval_params = {'batch_size': batch_size}
  accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
  assert x_test.shape[0] == test_end - test_start, x_test.shape
  print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
  report.clean_train_clean_eval = accuracy

  ###########################################################################
  # Craft adversarial examples using Carlini and Wagner's approach
  ###########################################################################
  nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
  print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
        ' adversarial examples')
  print("This could take some time ...")

  # Instantiate a CW attack object
  cw = CarliniWagnerL2(model, sess=sess)

  if viz_enabled:
    assert source_samples == nb_classes
    idxs = [np.where(np.argmax(y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)]
  if targeted:
    if viz_enabled:
      # Initialize our array for grid visualization
      grid_shape = (nb_classes, 1, img_rows, img_cols,
                    nchannels)
      grid_viz_data = np.zeros(grid_shape, dtype='f')

      adv_inputs = np.array(
          [[instance] * nb_classes for instance in x_test[idxs]],
          dtype=np.float32)
    else:
      adv_inputs = np.array(
          [[instance] * nb_classes for
           instance in x_test[:source_samples]], dtype=np.float32)

    one_hot = np.zeros((nb_classes, nb_classes))
    one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

    adv_inputs = adv_inputs.reshape(
        (source_samples * nb_classes, img_rows, img_cols, nchannels))
    adv_ys = np.array([one_hot] * source_samples,
                      dtype=np.float32).reshape((source_samples *
                                                 nb_classes, nb_classes))
    yname = "y_target"
  else:
    if viz_enabled:
      # Initialize our array for grid visualization
      grid_shape = (nb_classes, nb_classes, img_rows, img_cols, nchannels)
      grid_viz_data = np.zeros(grid_shape, dtype='f')

      adv_inputs = x_test[idxs]
    else:
      adv_inputs = x_test[:source_samples]

    adv_ys = None
    yname = "y"

  if targeted:
    cw_params_batch_size = source_samples * nb_classes
  else:
    cw_params_batch_size = source_samples
  cw_params = {'binary_search_steps': 1,
               yname: adv_ys,
               'max_iterations': attack_iterations,
               'learning_rate': CW_LEARNING_RATE,
               'batch_size': cw_params_batch_size,
               'initial_const': 10}

  adv = cw.generate_np(adv_inputs,
                       **cw_params)

  eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
  if targeted:
    adv_accuracy = model_eval(
        sess, x, y, preds, adv, adv_ys, args=eval_params)
  else:
    if viz_enabled:
      err = model_eval(sess, x, y, preds, adv, y_test[idxs], args=eval_params)
      adv_accuracy = 1 - err
    else:
      err = model_eval(sess, x, y, preds, adv, y_test[:source_samples],
                       args=eval_params)
      adv_accuracy = 1 - err

  if viz_enabled:
    for i in range(nb_classes):
      if noise_output:
        image = adv[i * nb_classes] - adv_inputs[i * nb_classes]
      else:
        image = adv[i * nb_classes]
      grid_viz_data[i, 0] = image

  print('--------------------------------------')

  # Compute the number of adversarial examples that were successfully found
  print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy))
  report.clean_train_adv_eval = 1. - adv_accuracy

  # Compute the average distortion introduced by the algorithm
  percent_perturbed = np.mean(np.sum((adv - adv_inputs)**2,
                                     axis=(1, 2, 3))**.5)
  print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

  # Close TF session
  sess.close()
  def save_visual(data, path):
    """
    Modified version of cleverhans.plot.pyplot
    """
    figure = plt.figure()
    # figure.canvas.set_window_title('Cleverhans: Grid Visualization')

    # Add the images to the plot
    num_cols = data.shape[0]
    num_rows = data.shape[1]
    num_channels = data.shape[4]
    for y in range(num_rows):
      for x in range(num_cols):
        figure.add_subplot(num_rows, num_cols, (x + 1) + (y * num_cols))
        plt.axis('off')

        if num_channels == 1:
          plt.imshow(data[x, y, :, :, 0], cmap='gray')
        else:
          plt.imshow(data[x, y, :, :, :])

    # Draw the plot and return
    plt.savefig(path)
    return figure

  # Finally, block & display a grid of all the adversarial examples
  if viz_enabled:
    # _ = grid_visual(grid_viz_data)
    # cleverhans_image.save("output", grid_viz_data)
    if noise_output:
      image_name = "output/cw_fashion_mnist_noise.png"
    else:
      image_name = "output/cw_fashion_mnist.png"
    _ = save_visual(grid_viz_data, image_name)

  return report
예제 #30
0
def mnist_tutorial(train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   nb_epochs=6,
                   batch_size=128,
                   learning_rate=0.001,
                   clean_train=True,
                   testing=False,
                   backprop_through_attack=False,
                   nb_filters=64,
                   num_threads=None,
                   label_smoothing=0.1):
    """
    MNIST cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param testing: if true, complete an AccuracyReport for unit tests
                    to verify that performance is adequate
    :param backprop_through_attack: If True, backprop through adversarial
                                    example construction process during
                                    adversarial training.
    :param label_smoothing: float, amount of label smoothing for cross entropy
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get MNIST test data
    x_train, y_train, x_test, y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)
    # Use Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_params = {'batch_size': batch_size}
    fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
    rng = np.random.RandomState([2017, 8, 30])

    def do_eval(preds, x_set, y_set, report_key, is_adv=None):
        acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        setattr(report, report_key, acc)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

    if clean_train:
        model = ModelBasicCNN('model1', nb_classes, nb_filters)
        preds = model.get_logits(x)
        loss = LossCrossEntropy(model, smoothing=label_smoothing)

        def evaluate():
            do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)

        train(sess,
              loss,
              x,
              y,
              x_train,
              y_train,
              evaluate=evaluate,
              args=train_params,
              rng=rng,
              var_list=model.get_params())

        # Calculate training error
        if testing:
            do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval')

        # Initialize the Fast Gradient Sign Method (FGSM) attack object and
        # graph
        fgsm = FastGradientMethod(model, sess=sess)
        adv_x = fgsm.generate(x, **fgsm_params)
        preds_adv = model.get_logits(adv_x)

        # Evaluate the accuracy of the MNIST model on adversarial examples
        do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True)

        # Calculate training error
        if testing:
            do_eval(preds_adv, x_train, y_train, 'train_clean_train_adv_eval')

        print('Repeating the process, using adversarial training')

    # Create a new model and train it to be robust to FastGradientMethod
    model2 = ModelBasicCNN('model2', nb_classes, nb_filters)
    fgsm2 = FastGradientMethod(model2, sess=sess)

    def attack(x):
        return fgsm2.generate(x, **fgsm_params)

    loss2 = LossCrossEntropy(model2, smoothing=label_smoothing, attack=attack)
    preds2 = model2.get_logits(x)
    adv_x2 = attack(x)

    if not backprop_through_attack:
        # For the fgsm attack used in this tutorial, the attack has zero
        # gradient so enabling this flag does not change the gradient.
        # For some other attacks, enabling this flag increases the cost of
        # training, but gives the defender the ability to anticipate how
        # the atacker will change their strategy in response to updates to
        # the defender's parameters.
        adv_x2 = tf.stop_gradient(adv_x2)
    preds2_adv = model2.get_logits(adv_x2)

    def evaluate2():
        # Accuracy of adversarially trained model on legitimate test inputs
        do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False)
        # Accuracy of the adversarially trained model on adversarial examples
        do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True)

    # Perform and evaluate adversarial training
    train(sess,
          loss2,
          x,
          y,
          x_train,
          y_train,
          evaluate=evaluate2,
          args=train_params,
          rng=rng,
          var_list=model2.get_params())

    # Calculate training errors
    if testing:
        do_eval(preds2, x_train, y_train, 'train_adv_train_clean_eval')
        do_eval(preds2_adv, x_train, y_train, 'train_adv_train_adv_eval')

    return report
예제 #31
0
def mnist_tutorial(train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   nb_epochs=6,
                   batch_size=128,
                   learning_rate=0.001,
                   train_dir="/tmp",
                   filename="mnist.ckpt",
                   load_model=False,
                   testing=False):
    """
    MNIST CleverHans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param train_dir: Directory storing the saved model
    :param filename: Filename to save model under
    :param load_model: True for load, False for not load
    :param testing: if true, test error is calculated
    :return: an AccuracyReport object
    """
    keras.layers.core.K.set_learning_phase(0)

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    if not hasattr(backend, "tf"):
        raise RuntimeError("This tutorial requires keras to be configured"
                           " to use the TensorFlow backend.")

    # Image dimensions ordering should follow the Theano convention
    if keras.backend.image_dim_ordering() != 'tf':
        keras.backend.set_image_dim_ordering('tf')
        print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to "
              "'th', temporarily setting to 'tf'")

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Use label smoothing
    assert Y_train.shape[1] == 10
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph
    model = cnn_model()
    preds = model(x)
    print("Defined TensorFlow model graph.")

    def evaluate():
        # Evaluate the accuracy of the MNIST model on legitimate test examples
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params)
        report.clean_train_clean_eval = acc
        assert X_test.shape[0] == test_end - test_start, X_test.shape
        print('Test accuracy on legitimate examples: %0.4f' % acc)

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': train_dir,
        'filename': filename
    }
    ckpt = tf.train.get_checkpoint_state(train_dir)
    ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path

    rng = np.random.RandomState([2017, 8, 30])
    if load_model and ckpt_path:
        saver = tf.train.Saver()
        saver.restore(sess, ckpt_path)
        print("Model loaded from: {}".format(ckpt_path))
        evaluate()
    else:
        print("Model was not loaded, training from scratch.")
        model_train(sess,
                    x,
                    y,
                    preds,
                    X_train,
                    Y_train,
                    evaluate=evaluate,
                    args=train_params,
                    save=True,
                    rng=rng)

    # Calculate training error
    if testing:
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds, X_train, Y_train, args=eval_params)
        report.train_clean_train_clean_eval = acc

    # Initialize the Fast Gradient Sign Method (FGSM) attack object and graph
    wrap = KerasModelWrapper(model)
    fgsm = FastGradientMethod(wrap, sess=sess)
    fgsm_params = {'eps': 0.3}
    adv_x = fgsm.generate(x, **fgsm_params)
    # Consider the attack to be constant
    adv_x = tf.stop_gradient(adv_x)
    preds_adv = model(adv_x)

    # Evaluate the accuracy of the MNIST model on adversarial examples
    eval_par = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
    print('Test accuracy on adversarial examples: %0.4f\n' % acc)
    report.clean_train_adv_eval = acc

    # Calculating train error
    if testing:
        eval_par = {'batch_size': batch_size}
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv,
                         X_train,
                         Y_train,
                         args=eval_par)
        report.train_clean_train_adv_eval = acc

    print("Repeating the process, using adversarial training")
    # Redefine TF model graph
    model_2 = cnn_model()
    preds_2 = model_2(x)
    wrap_2 = KerasModelWrapper(model_2)
    fgsm2 = FastGradientMethod(wrap_2, sess=sess)
    preds_2_adv = model_2(fgsm2.generate(x, **fgsm_params))

    def evaluate_2():
        # Accuracy of adversarially trained model on legitimate test inputs
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2,
                              X_test,
                              Y_test,
                              args=eval_params)
        print('Test accuracy on legitimate examples: %0.4f' % accuracy)
        report.adv_train_clean_eval = accuracy

        # Accuracy of the adversarially trained model on adversarial examples
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2_adv,
                              X_test,
                              Y_test,
                              args=eval_params)
        print('Test accuracy on adversarial examples: %0.4f' % accuracy)
        report.adv_train_adv_eval = accuracy

    # Perform and evaluate adversarial training
    model_train(sess,
                x,
                y,
                preds_2,
                X_train,
                Y_train,
                predictions_adv=preds_2_adv,
                evaluate=evaluate_2,
                args=train_params,
                save=False,
                rng=rng)

    # Calculate training errors
    if testing:
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2,
                              X_train,
                              Y_train,
                              args=eval_params)
        report.train_adv_train_clean_eval = accuracy
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2_adv,
                              X_train,
                              Y_train,
                              args=eval_params)
        report.train_adv_train_adv_eval = accuracy

    return report
def cifar10_tutorial(train_start=0,
                     train_end=60000,
                     test_start=0,
                     test_end=10000,
                     nb_epochs=NB_EPOCHS,
                     batch_size=BATCH_SIZE,
                     learning_rate=LEARNING_RATE,
                     clean_train=CLEAN_TRAIN,
                     testing=False,
                     backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                     nb_filters=NB_FILTERS,
                     num_threads=None,
                     label_smoothing=0.1,
                     adversarial_training=ADVERSARIAL_TRAINING):
    """
  CIFAR10 cleverhans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param clean_train: perform normal training on clean examples only
                      before performing adversarial training.
  :param testing: if true, complete an AccuracyReport for unit tests
                  to verify that performance is adequate
  :param backprop_through_attack: If True, backprop through adversarial
                                  example construction process during
                                  adversarial training.
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :param adversarial_training: True means using adversarial training
  :return: an AccuracyReport object
  """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        # put data on cpu and gpu both
        config_args = dict(allow_soft_placement=True)
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get CIFAR10 data
    data = CIFAR10(train_start=train_start,
                   train_end=train_end,
                   test_start=test_start,
                   test_end=test_end)
    dataset_size = data.x_train.shape[0]
    dataset_train = data.to_tensorflow()[0]
    dataset_train = dataset_train.map(
        lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
    dataset_train = dataset_train.batch(batch_size)
    dataset_train = dataset_train.prefetch(16)
    x_train, y_train = data.get_set('train')
    x_test, y_test = data.get_set('test')

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_test.shape[1:4]
    nb_classes = y_test.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_params = {'batch_size': batch_size}
    mifgsm_params = {
        'eps': 0.5,
        'clip_min': 0.,
        'eps_iter': 0.002,
        'nb_iter': 10,
        'clip_max': 1.,
        'ord': np.inf
    }
    rng = np.random.RandomState([2017, 8, 30])

    def do_eval(preds, x_set, y_set, report_key, is_adv=None):
        acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        setattr(report, report_key, acc)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

    if clean_train:
        model = ModelAllConvolutional('model1',
                                      nb_classes,
                                      nb_filters,
                                      input_shape=[32, 32, 3])

        preds = model.get_logits(x)
        loss = CrossEntropy(model, smoothing=label_smoothing)

        def evaluate():
            do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)

        """
    when training, evaluating can be happened
    """
        train(sess,
              loss,
              None,
              None,
              dataset_train=dataset_train,
              dataset_size=dataset_size,
              evaluate=evaluate,
              args=train_params,
              rng=rng,
              var_list=model.get_params())
        # save model
        model_path = sys.path[0] + '/simple_cifar10.ckpt'
        saver = tf.train.Saver(max_to_keep=1)
        saver.save(sess, model_path)

        # Calculate training error
        if testing:
            do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval')
        # Initialize the Basic Iterative Method (BIM) attack object and
        # graph
        mifgsm = MomentumIterativeMethod(model, sess=sess)

        for i in range(20):
            adv_x = mifgsm.generate(x, **mifgsm_params)
            preds_adv = model.get_logits(adv_x)

            print("eps:%0.2f" %
                  (mifgsm_params["eps_iter"] * mifgsm_params['nb_iter']))
            # Evaluate the accuracy of the MNIST model on adversarial examples
            do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True)
            mifgsm_params['eps_iter'] = mifgsm_params['eps_iter'] + 0.002

        # Calculate training error
        if testing:
            do_eval(preds_adv, x_train, y_train, 'train_clean_train_adv_eval')

        print('Repeating the process, using adversarial training')

    if not adversarial_training:
        return report
    # Create a new model and train it to be robust to BasicIterativeMethod
    model2 = ModelAllConvolutional('model2',
                                   nb_classes,
                                   nb_filters,
                                   input_shape=[32, 32, 3])
    bim2 = MomentumIterativeMethod(model2, sess=sess)

    def attack(x):
        return bim2.generate(x, **mifgsm_params)

    # add attack to loss
    loss2 = CrossEntropy(model2, smoothing=label_smoothing, attack=attack)
    preds2 = model2.get_logits(x)
    adv_x2 = attack(x)

    if not backprop_through_attack:
        # For the fgsm attack used in this tutorial, the attack has zero
        # gradient so enabling this flag does not change the gradient.
        # For some other attacks, enabling this flag increases the cost of
        # training, but gives the defender the ability to anticipate how
        # the attacker will change their strategy in response to updates to
        # the defender's parameters.
        adv_x2 = tf.stop_gradient(adv_x2)
    preds2_adv = model2.get_logits(adv_x2)

    def evaluate2():
        # Accuracy of adversarially trained model on legitimate test inputs
        do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False)
        # Accuracy of the adversarially trained model on adversarial examples
        do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True)

    # Perform and evaluate adversarial training
    train(sess,
          loss2,
          None,
          None,
          dataset_train=dataset_train,
          dataset_size=dataset_size,
          evaluate=evaluate2,
          args=train_params,
          rng=rng,
          var_list=model2.get_params())

    # Calculate training errors
    if testing:
        do_eval(preds2, x_train, y_train, 'train_adv_train_clean_eval')
        do_eval(preds2_adv, x_train, y_train, 'train_adv_train_adv_eval')

    return report
예제 #33
0
def mnist_tutorial(train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   nb_epochs=NB_EPOCHS,
                   batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE,
                   train_dir=TRAIN_DIR,
                   filename=FILENAME,
                   load_model=LOAD_MODEL,
                   testing=False,
                   label_smoothing=0.1):
    """
    MNIST CleverHans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param train_dir: Directory storing the saved model
    :param filename: Filename to save model under
    :param load_model: True for load, False for not load
    :param testing: if true, test error is calculated
    :param label_smoothing: float, amount of label smoothing for cross entropy
    :return: an AccuracyReport object
    """
    keras.layers.core.K.set_learning_phase(0)

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    if not hasattr(backend, "tf"):
        raise RuntimeError("This tutorial requires keras to be configured"
                           " to use the TensorFlow backend.")

    if keras.backend.image_dim_ordering() != 'tf':
        keras.backend.set_image_dim_ordering('tf')
        print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to "
              "'th', temporarily setting to 'tf'")

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)

    # Get MNIST test data
    x_train, y_train, x_test, y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Define TF model graph
    model = cnn_model(img_rows=img_rows,
                      img_cols=img_cols,
                      channels=nchannels,
                      nb_filters=64,
                      nb_classes=nb_classes)
    preds = model(x)
    print("Defined TensorFlow model graph.")

    def evaluate():
        # Evaluate the accuracy of the MNIST model on legitimate test examples
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
        report.clean_train_clean_eval = acc
        #        assert X_test.shape[0] == test_end - test_start, X_test.shape
        print('Test accuracy on legitimate examples: %0.4f' % acc)

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': train_dir,
        'filename': filename
    }

    rng = np.random.RandomState([2017, 8, 30])
    if not os.path.exists(train_dir):
        os.mkdir(train_dir)

    ckpt = tf.train.get_checkpoint_state(train_dir)
    print(train_dir, ckpt)
    ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path
    wrap = KerasModelWrapper(model)

    if load_model and ckpt_path:
        saver = tf.train.Saver()
        print(ckpt_path)
        saver.restore(sess, ckpt_path)
        print("Model loaded from: {}".format(ckpt_path))
        evaluate()
    else:
        print("Model was not loaded, training from scratch.")
        loss = CrossEntropy(wrap, smoothing=label_smoothing)
        train(sess,
              loss,
              x_train,
              y_train,
              evaluate=evaluate,
              args=train_params,
              rng=rng)

    # Calculate training error
    if testing:
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds, x_train, y_train, args=eval_params)
        report.train_clean_train_clean_eval = acc

    # Initialize the Fast Gradient Sign Method (FGSM) attack object and graph
    fgsm = FastGradientMethod(wrap, sess=sess)
    fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
    adv_x = fgsm.generate(x, **fgsm_params)
    # Consider the attack to be constant
    adv_x = tf.stop_gradient(adv_x)
    preds_adv = model(adv_x)

    # Evaluate the accuracy of the MNIST model on adversarial examples
    eval_par = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds_adv, x_test, y_test, args=eval_par)
    print('Test accuracy on adversarial examples: %0.4f\n' % acc)
    report.clean_train_adv_eval = acc

    # Calculating train error
    if testing:
        eval_par = {'batch_size': batch_size}
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv,
                         x_train,
                         y_train,
                         args=eval_par)
        report.train_clean_train_adv_eval = acc

    print("Repeating the process, using adversarial training")
    # Redefine TF model graph
    model_2 = cnn_model(img_rows=img_rows,
                        img_cols=img_cols,
                        channels=nchannels,
                        nb_filters=64,
                        nb_classes=nb_classes)
    wrap_2 = KerasModelWrapper(model_2)
    preds_2 = model_2(x)
    fgsm2 = FastGradientMethod(wrap_2, sess=sess)

    def attack(x):
        return fgsm2.generate(x, **fgsm_params)

    preds_2_adv = model_2(attack(x))
    loss_2 = CrossEntropy(wrap_2, smoothing=label_smoothing, attack=attack)

    def evaluate_2():
        # Accuracy of adversarially trained model on legitimate test inputs
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2,
                              x_test,
                              y_test,
                              args=eval_params)
        print('Test accuracy on legitimate examples: %0.4f' % accuracy)
        report.adv_train_clean_eval = accuracy

        # Accuracy of the adversarially trained model on adversarial examples
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2_adv,
                              x_test,
                              y_test,
                              args=eval_params)
        print('Test accuracy on adversarial examples: %0.4f' % accuracy)
        report.adv_train_adv_eval = accuracy

    # Perform and evaluate adversarial training
    train(sess,
          loss_2,
          x_train,
          y_train,
          evaluate=evaluate_2,
          args=train_params,
          rng=rng)

    # Calculate training errors
    if testing:
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2,
                              x_train,
                              y_train,
                              args=eval_params)
        report.train_adv_train_clean_eval = accuracy
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2_adv,
                              x_train,
                              y_train,
                              args=eval_params)
        report.train_adv_train_adv_eval = accuracy

    return report
예제 #34
0
def mnist_tutorial_jsma(train_start=0, train_end=60000, test_start=0,
                        test_end=10000, viz_enabled=True, nb_epochs=6,
                        batch_size=128, nb_classes=10, source_samples=10,
                        learning_rate=0.001):
    """
    MNIST tutorial for the Jacobian-based saliency map approach (JSMA)
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param source_samples: number of test inputs to attack
    :param learning_rate: learning rate for training
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # MNIST-specific dimensions
    img_rows = 28
    img_cols = 28
    channels = 1

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph
    model = make_basic_cnn()
    preds = model(x)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    sess.run(tf.global_variables_initializer())
    rng = np.random.RandomState([2017, 8, 30])
    model_train(sess, x, y, preds, X_train, Y_train, args=train_params,
                rng=rng)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params)
    assert X_test.shape[0] == test_end - test_start, X_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using the Jacobian-based saliency map approach
    ###########################################################################
    print('Crafting ' + str(source_samples) + ' * ' + str(nb_classes-1) +
          ' adversarial examples')

    # Keep track of success (adversarial example classified in target)
    results = np.zeros((nb_classes, source_samples), dtype='i')

    # Rate of perturbed features for each test set example and target class
    perturbations = np.zeros((nb_classes, source_samples), dtype='f')

    # Initialize our array for grid visualization
    grid_shape = (nb_classes, nb_classes, img_rows, img_cols, channels)
    grid_viz_data = np.zeros(grid_shape, dtype='f')

    # Instantiate a SaliencyMapMethod attack object
    jsma = SaliencyMapMethod(model, back='tf', sess=sess)
    jsma_params = {'theta': 1., 'gamma': 0.1,
                   'clip_min': 0., 'clip_max': 1.,
                   'y_target': None}

    figure = None
    # Loop over the samples we want to perturb into adversarial examples
    for sample_ind in xrange(0, source_samples):
        print('--------------------------------------')
        print('Attacking input %i/%i' % (sample_ind + 1, source_samples))
        sample = X_test[sample_ind:(sample_ind+1)]

        # We want to find an adversarial example for each possible target class
        # (i.e. all classes that differ from the label given in the dataset)
        current_class = int(np.argmax(Y_test[sample_ind]))
        target_classes = other_classes(nb_classes, current_class)

        # For the grid visualization, keep original images along the diagonal
        grid_viz_data[current_class, current_class, :, :, :] = np.reshape(
            sample, (img_rows, img_cols, channels))

        # Loop over all target classes
        for target in target_classes:
            print('Generating adv. example for target class %i' % target)

            # This call runs the Jacobian-based saliency map approach
            one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
            one_hot_target[0, target] = 1
            jsma_params['y_target'] = one_hot_target
            adv_x = jsma.generate_np(sample, **jsma_params)

            # Check if success was achieved
            res = int(model_argmax(sess, x, preds, adv_x) == target)

            # Computer number of modified features
            adv_x_reshape = adv_x.reshape(-1)
            test_in_reshape = X_test[sample_ind].reshape(-1)
            nb_changed = np.where(adv_x_reshape != test_in_reshape)[0].shape[0]
            percent_perturb = float(nb_changed) / adv_x.reshape(-1).shape[0]

            # Display the original and adversarial images side-by-side
            if viz_enabled:
                figure = pair_visual(
                    np.reshape(sample, (img_rows, img_cols)),
                    np.reshape(adv_x, (img_rows, img_cols)), figure)

            # Add our adversarial example to our grid data
            grid_viz_data[target, current_class, :, :, :] = np.reshape(
                adv_x, (img_rows, img_cols, channels))

            # Update the arrays for later analysis
            results[target, sample_ind] = res
            perturbations[target, sample_ind] = percent_perturb

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    nb_targets_tried = ((nb_classes - 1) * source_samples)
    succ_rate = float(np.sum(results)) / nb_targets_tried
    print('Avg. rate of successful adv. examples {0:.4f}'.format(succ_rate))
    report.clean_train_adv_eval = 1. - succ_rate

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(perturbations)
    print('Avg. rate of perturbed features {0:.4f}'.format(percent_perturbed))

    # Compute the average distortion introduced for successful samples only
    percent_perturb_succ = np.mean(perturbations * (results == 1))
    print('Avg. rate of perturbed features for successful '
          'adversarial examples {0:.4f}'.format(percent_perturb_succ))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        import matplotlib.pyplot as plt
        plt.close(figure)
        _ = grid_visual(grid_viz_data)

    return report
def baseline_deepfool(train_start=0,
                      train_end=60000,
                      test_start=0,
                      test_end=10000,
                      nb_epochs=6,
                      batch_size=128,
                      learning_rate=0.001,
                      clean_train=True,
                      testing=False,
                      backprop_through_attack=False,
                      nb_filters=64):
    """
    MNIST cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param testing: if true, complete an AccuracyReport for unit tests
                    to verify that performance is adequate
    :param backprop_through_attack: If True, backprop through adversarial
                                    example construction process during
                                    adversarial training.
    :param clean_train: if true, train on clean examples
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    sess = tf.Session()

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Use label smoothing
    assert Y_train.shape[1] == 10
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    model_path = "models/mnist"
    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    deepfool_params = {
        'nb_candidate': 10,
        'overshoot': 0.02,
        'max_iter': 50,
        'clip_min': 0.,
        'clip_max': 1.
    }
    rng = np.random.RandomState([2017, 8, 30])

    if clean_train:
        model = make_basic_cnn(nb_filters=nb_filters)
        preds = model.get_probs(x)

        def evaluate():
            # Evaluate the accuracy of the MNIST model on legitimate test
            # examples
            eval_params = {'batch_size': batch_size}
            acc = model_eval(sess,
                             x,
                             y,
                             preds,
                             X_test,
                             Y_test,
                             args=eval_params)
            report.clean_train_clean_eval = acc
            assert X_test.shape[0] == test_end - test_start, X_test.shape
            print('Test accuracy on legitimate examples: %0.4f' % acc)

        #
        # HERE already trained model, thus we need a new one (model_2)
        model_train(sess,
                    x,
                    y,
                    preds,
                    X_train,
                    Y_train,
                    evaluate=evaluate,
                    args=train_params,
                    rng=rng)

        # Calculate training error
        if testing:
            eval_params = {'batch_size': batch_size}
            acc = model_eval(sess,
                             x,
                             y,
                             preds,
                             X_train,
                             Y_train,
                             args=eval_params)
            report.train_clean_train_clean_eval = acc

        # Initialize the Fast Gradient Sign Method (FGSM) attack object and
        # graph
        deepfool = DeepFool(model, sess=sess)
        adv_x = deepfool.generate(x, **deepfool_params)

        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on adversarial examples
        eval_par = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Test accuracy on DeepFool adversarial examples: %0.4f\n' % acc)
        report.clean_train_adv_eval = acc

        # Calculate training error
        if testing:
            eval_par = {'batch_size': batch_size}
            acc = model_eval(sess,
                             x,
                             y,
                             preds_adv,
                             X_train,
                             Y_train,
                             args=eval_par)
            report.train_clean_train_adv_eval = acc

        print("Repeating the process, using adversarial training")
        # Redefine TF model graph
    model_2 = make_basic_cnn(nb_filters=nb_filters)
    preds_2 = model_2(x)

    deepfool2 = DeepFool(model_2, sess=sess)
    adv_x_2 = deepfool2.generate(x, **deepfool_params)
    if not backprop_through_attack:
        # For the fgsm attack used in this tutorial, the attack has zero
        # gradient so enabling this flag does not change the gradient.
        # For some other attacks, enabling this flag increases the cost of
        # training, but gives the defender the ability to anticipate how
        # the atacker will change their strategy in response to updates to
        # the defender's parameters.
        adv_x_2 = tf.stop_gradient(adv_x_2)
    preds_2_adv = model_2(adv_x_2)

    #
    # let's generate DeepFool examples
    #
    # let's generate FGSM examples
    #
    fgsm = FastGradientMethod(model_2, sess=sess)
    fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
    adv_x_fgsm = fgsm.generate(x, **fgsm_params)
    if not backprop_through_attack:
        # For the fgsm attack used in this tutorial, the attack has zero
        # gradient so enabling this flag does not change the gradient.
        # For some other attacks, enabling this flag increases the cost of
        # training, but gives the defender the ability to anticipate how
        # the atacker will change their strategy in response to updates to
        # the defender's parameters.
        adv_x_fgsm = tf.stop_gradient(adv_x_fgsm)
    preds_2_fgsm = model_2(adv_x_fgsm)

    # DON'T WANT TO TRAIN on FGSM adv examples yet

    def evaluate_2():
        # Accuracy of adversarially trained model on legitimate test inputs
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2,
                              X_test,
                              Y_test,
                              args=eval_params)
        print('Test accuracy on legitimate examples: %0.4f' % accuracy)
        report.adv_train_clean_eval = accuracy

        # Accuracy of the adversarially trained model on FGSM adversarial examples
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2_fgsm,
                              X_test,
                              Y_test,
                              args=eval_params)
        print('Test accuracy on FGSM adversarial examples: %0.4f' % accuracy)
        report.adv_train_adv_eval = accuracy

        # Accuracy of the DeepFool adv trained model on DeepFool examples
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2_adv,
                              X_test,
                              Y_test,
                              args=eval_params)
        print('Test accuracy on DeepFool adversarial examples: %0.4f' %
              accuracy)

    # Perform and evaluate adversarial training
    model_train(sess,
                x,
                y,
                preds_2,
                X_train,
                Y_train,
                predictions_adv=preds_2_adv,
                evaluate=evaluate_2,
                args=train_params,
                rng=rng)

    # Calculate training errors
    if testing:
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2,
                              X_train,
                              Y_train,
                              args=eval_params)
        report.train_adv_train_clean_eval = accuracy
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2_adv,
                              X_train,
                              Y_train,
                              args=eval_params)
        report.train_adv_train_adv_eval = accuracy

    return report
def mnist_tutorial(train_start=0, train_end=60000, test_start=0,
                   test_end=10000, nb_epochs=6, batch_size=128,
                   learning_rate=0.001, train_dir="/tmp",
                   filename="mnist.ckpt", load_model=False,
                   testing=False):
    """
    MNIST CleverHans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param train_dir: Directory storing the saved model
    :param filename: Filename to save model under
    :param load_model: True for load, False for not load
    :param testing: if true, test error is calculated
    :return: an AccuracyReport object
    """
    keras.layers.core.K.set_learning_phase(0)

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    if not hasattr(backend, "tf"):
        raise RuntimeError("This tutorial requires keras to be configured"
                           " to use the TensorFlow backend.")

    # Image dimensions ordering should follow the Theano convention
    if keras.backend.image_dim_ordering() != 'tf':
        keras.backend.set_image_dim_ordering('tf')
        print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to "
              "'th', temporarily setting to 'tf'")

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Use label smoothing
    assert Y_train.shape[1] == 10
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph
    model = cnn_model()
    preds = model(x)
    print("Defined TensorFlow model graph.")

    def evaluate():
        # Evaluate the accuracy of the MNIST model on legitimate test examples
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params)
        report.clean_train_clean_eval = acc
        assert X_test.shape[0] == test_end - test_start, X_test.shape
        print('Test accuracy on legitimate examples: %0.4f' % acc)

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': train_dir,
        'filename': filename
    }
    ckpt = tf.train.get_checkpoint_state(train_dir)
    ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path

    rng = np.random.RandomState([2017, 8, 30])
    if load_model and ckpt_path:
        saver = tf.train.Saver()
        saver.restore(sess, ckpt_path)
        print("Model loaded from: {}".format(ckpt_path))
        evaluate()
    else:
        print("Model was not loaded, training from scratch.")
        train(sess, x, y, preds, X_train, Y_train, evaluate=evaluate,
              args=train_params, save=True)

    # Calculate training error
    if testing:
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds, X_train, Y_train, args=eval_params)
        report.train_clean_train_clean_eval = acc

    # Initialize the Fast Gradient Sign Method (FGSM) attack object and graph
    wrap = KerasModelWrapper(model)
    fgsm = FastGradientMethod(wrap, sess=sess)
    fgsm_params = {'eps': 0.3}
    adv_x = fgsm.generate(x, **fgsm_params)
    # Consider the attack to be constant
    adv_x = tf.stop_gradient(adv_x)
    preds_adv = model(adv_x)

    # Evaluate the accuracy of the MNIST model on adversarial examples
    eval_par = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
    print('Test accuracy on adversarial examples: %0.4f\n' % acc)
    report.clean_train_adv_eval = acc

    # Calculating train error
    if testing:
        eval_par = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds_adv, X_train,
                         Y_train, args=eval_par)
        report.train_clean_train_adv_eval = acc

    print("Repeating the process, using adversarial training")
    # Redefine TF model graph
    model_2 = cnn_model()
    preds_2 = model_2(x)
    wrap_2 = KerasModelWrapper(model_2)
    fgsm2 = FastGradientMethod(wrap_2, sess=sess)
    preds_2_adv = model_2(fgsm2.generate(x, **fgsm_params))

    def evaluate_2():
        # Accuracy of adversarially trained model on legitimate test inputs
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess, x, y, preds_2, X_test, Y_test,
                              args=eval_params)
        print('Test accuracy on legitimate examples: %0.4f' % accuracy)
        report.adv_train_clean_eval = accuracy

        # Accuracy of the adversarially trained model on adversarial examples
        accuracy = model_eval(sess, x, y, preds_2_adv, X_test,
                              Y_test, args=eval_params)
        print('Test accuracy on adversarial examples: %0.4f' % accuracy)
        report.adv_train_adv_eval = accuracy

    # Perform and evaluate adversarial training
    train(sess, x, y, preds_2, X_train, Y_train,
          predictions_adv=preds_2_adv, evaluate=evaluate_2,
          args=train_params, save=False)

    # Get a random slice of the data for linear extrapolation plots
    random_idx = np.random.randint(0, X_train.shape[0])
    X_slice = X_train[random_idx]
    Y_slice = Y_train[random_idx]

    # Plot the linear extrapolation plot for clean model
    log_prob_adv_array = get_logits_over_interval(
        sess, wrap, X_slice, fgsm_params)
    linear_extrapolation_plot(log_prob_adv_array, Y_slice,
                              'lep_clean.png')

    # Plot the linear extrapolation plot for adv model
    log_prob_adv_array = get_logits_over_interval(
        sess, wrap_2, X_slice, fgsm_params)
    linear_extrapolation_plot(log_prob_adv_array, Y_slice,
                              'lep_adv.png')

    # Calculate training errors
    if testing:
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess, x, y, preds_2, X_train, Y_train,
                              args=eval_params)
        report.train_adv_train_clean_eval = accuracy
        accuracy = model_eval(sess, x, y, preds_2_adv, X_train,
                              Y_train, args=eval_params)
        report.train_adv_train_adv_eval = accuracy

    return report
예제 #37
0
    def helper_run_multi_gpu_madryetal(self, extra_flags=None):
        """
        Compare the single GPU performance to multiGPU performance.
        """
        # Run the trainers on a dataset of reduced size
        flags = {'train_start': 0,
                 'train_end': 5000,
                 'test_start': 0,
                 'test_end': 333,
                 'nb_epochs': 5,
                 'testing': True}

        # Run the multi-gpu trainer for adversarial training
        flags.update({'batch_size': 128, 'adam_lrn': 0.001,
                      'dataset': 'mnist', 'only_adv_train': False,
                      'eval_iters': 1, 'fast_tests': True,
                      'save_dir': None, 'save_steps': 10000,
                      'attack_nb_iter_train': 10, 'sync_step': None,
                      'adv_train': True,
                      'save': False,
                      'model_type': 'basic',
                      'attack_type_test': 'MadryEtAl_y',
                      })
        if extra_flags is not None:
            flags.update(extra_flags)

        # Run the multi-gpu trainer for adversarial training using 2 gpus
        # trainer_multigpu by default sets `allow_soft_placement=True`
        flags.update({'ngpu': 2,
                      'attack_type_train': 'MadryEtAl_y_multigpu',
                      'sync_step': 1})
        HParams = namedtuple('HParams', flags.keys())

        hparams = HParams(**flags)
        np.random.seed(42)
        tf.set_random_seed(42)
        with tf.variable_scope(None, 'runner'):
            report_dict = run_trainer(hparams)
        report_m = AccuracyReport()
        report_m.train_adv_train_clean_eval = report_dict['train']
        report_m.adv_train_clean_eval = report_dict['test']
        report_m.adv_train_adv_eval = report_dict['MadryEtAl_y']

        flags.update({'ngpu': 1,
                      'attack_type_train': 'MadryEtAl_y',
                      })
        hparams = HParams(**flags)
        np.random.seed(42)
        tf.set_random_seed(42)
        with tf.variable_scope(None, 'runner'):
            report_dict = run_trainer(hparams)
        report_s = AccuracyReport()
        report_s.train_adv_train_clean_eval = report_dict['train']
        report_s.adv_train_clean_eval = report_dict['test']
        report_s.adv_train_adv_eval = report_dict['MadryEtAl_y']

        self.assertClose(report_s.train_adv_train_clean_eval,
                         report_m.train_adv_train_clean_eval,
                         atol=5e-2)
        self.assertClose(report_s.adv_train_clean_eval,
                         report_m.adv_train_clean_eval,
                         atol=2e-2)
        self.assertClose(report_s.adv_train_adv_eval,
                         report_m.adv_train_adv_eval,
                         atol=5e-2)