Beispiel #1
0
def main(net_type):
    if keras.backend.image_dim_ordering() != 'th':
        keras.backend.set_image_dim_ordering('th')
        print "INFO: temporarily set 'image_dim_ordering' to 'th'"

    sess = get_session()
    keras.backend.set_session(sess)

    (train_xs, train_ys), (test_xs, test_ys) = data_cifar10.load_cifar10()
    print 'Loaded cifar10 data'

    x = tf.placeholder(tf.float32, shape=(None, 3, 32, 32))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    model, model_name = resnet_cifar10.resnet_cifar10(repetations=3, net_type=net_type)
    if net_type == 'squared_resnet':
        model = adam_pretrain(model, model_name, train_xs, train_ys, 1, test_xs, test_ys)

    predictions = model(x)
    tf_model_train(sess, x, y, predictions, train_xs, train_ys, test_xs, test_ys,
                   data_augmentor=data_cifar10.augment_batch)

    save_model(model, model_name)

    # Craft adversarial examples using Fast Gradient Sign Method (FGSM)
    adv_x = fgsm(x, predictions, eps=0.3)
    test_xs_adv, = batch_eval(sess, [x], [adv_x], [test_xs])
    assert test_xs_adv.shape[0] == 10000, test_xs_adv.shape

    # Evaluate the accuracy of the MNIST model on adversarial examples
    accuracy = tf_model_eval(sess, x, y, predictions, test_xs_adv, test_ys)
    print'Test accuracy on adversarial examples: ' + str(accuracy)

    print "Repeating the process, using adversarial training"
    # Redefine TF model graph
    model_2, _ = resnet_cifar10.resnet_cifar10(repetations=3, net_type=net_type)
    predictions_2 = model_2(x)
    adv_x_2 = fgsm(x, predictions_2, eps=0.3)
    predictions_2_adv = model_2(adv_x_2)

    # Perform adversarial training
    tf_model_train(sess, x, y, predictions_2, train_xs, train_ys, test_xs, test_ys,
                   predictions_adv=predictions_2_adv,
                   data_augmentor=data_cifar10.augment_batch)

    save_model(model, model_name+'_adv')

    # Craft adversarial examples using Fast Gradient Sign Method (FGSM) on
    # the new model, which was trained using adversarial training
    test_xs_adv_2, = batch_eval(sess, [x], [adv_x_2], [test_xs])
    assert test_xs_adv_2.shape[0] == 10000, test_xs_adv_2.shape

    # Evaluate the accuracy of the adversarially trained model on adversarial examples
    accuracy_adv = tf_model_eval(sess, x, y, predictions_2, test_xs_adv_2, test_ys)
    print'Test accuracy on adversarial examples: ' + str(accuracy_adv)
    def evaluate_2():
        # Evaluate the accuracy of the adversarialy trained MNIST model on
        # legitimate test examples
        accuracy = tf_model_eval(sess, x, y, predictions_2, X_test, Y_test)
        print('Test accuracy on legitimate test examples: ' + str(accuracy))

        # Evaluate the accuracy of the adversarially trained MNIST model on
        # adversarial examples
        accuracy_adv = tf_model_eval(sess, x, y, predictions_2_adv, X_test,
                                     Y_test)
        print('Test accuracy on adversarial examples: ' + str(accuracy_adv))
Beispiel #3
0
def main(argv=None):
    """
    Test the accuracy of the MNIST cleverhans tutorial model
    :return:
    """
    # Image dimensions ordering should follow the Theano convention
    if keras.backend.image_dim_ordering() != 'th':
        keras.backend.set_image_dim_ordering('th')
        print "INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to 'tf', temporarily setting to 'th'"

    # Create TF session and set as Keras backend session
    with tf.Session() as sess:
        keras.backend.set_session(sess)
        print "Created TensorFlow session and set Keras backend."

        # Get MNIST test data
        X_train, Y_train, X_test, Y_test = data_mnist()
        print "Loaded MNIST test data."

        # Define input TF placeholder
        x = tf.placeholder(tf.float32, shape=(None, 1, 28, 28))
        y = tf.placeholder(tf.float32, shape=(None, FLAGS.nb_classes))

        # Define TF model graph
        model = model_mnist()
        predictions = model(x)
        print "Defined TensorFlow model graph."

        # Train an MNIST model
        tf_model_train(sess, x, y, predictions, X_train, Y_train)

        # Evaluate the accuracy of the MNIST model on legitimate test examples
        accuracy = tf_model_eval(sess, x, y, predictions, X_test, Y_test)
        assert float(accuracy) >= 0.97, accuracy
def calculate_accuracy_adv_jsma(sess, x, y, predictions, Y_test, X_test, X_test_adv, output_csv_fpath):
    fieldnames = ['width', 'height', 'accuracy_legitimate', 'accuracy_malicious']
    to_csv = []

    print ("\n===Calculating the accuracy with feature squeezing...")
    for width in range(1, 11):
        # height = width
        for height in range(1, 11):
            X_squeezed = median_filter_np(X_test, width, height)
            X_adv_squeezed = median_filter_np(X_test_adv, width, height)

            accuracy_leg = tf_model_eval(sess, x, y, predictions, X_squeezed, Y_test)
            accuracy_mal = tf_model_eval(sess, x, y, predictions, X_adv_squeezed, Y_test)

            to_csv.append({'width': width, 'height': height, 'accuracy_legitimate': accuracy_leg, 'accuracy_malicious': accuracy_mal})
            print ("Width: %2d, Height: %2d, Accuracy_legitimate: %.2f, Accuracy_malicious: %.2f" % (width, height, accuracy_leg, accuracy_mal))

    write_to_csv(to_csv, output_csv_fpath, fieldnames)
def calculate_accuracy_adv_fgsm(sess, x, y, predictions, predictions_clip, predictions_bin, eps_list, Y_test, adv_x_dict, output_csv_fpath):
    fieldnames = ['eps', 'accuracy_raw', 'accuracy_clip', 'accuracy_bin']
    to_csv = []

    for eps in eps_list:
        X_test_adv = adv_x_dict[eps]

        # Evaluate the accuracy of the MNIST model on adversarial examples
        accuracy_raw = tf_model_eval(sess, x, y, predictions, X_test_adv, Y_test)
        accuracy_clip = tf_model_eval(sess, x, y, predictions_clip, X_test_adv, Y_test)
        accuracy_bin = tf_model_eval(sess, x, y, predictions_bin, X_test_adv, Y_test)

        print ('Test accuracy on adversarial examples: raw %.4f, clip %.4f, bin %.4f (eps=%.1f): '% (accuracy_raw, accuracy_clip, accuracy_bin, eps))

        to_csv.append({'eps': eps, 
                       'accuracy_raw': accuracy_raw, 
                       'accuracy_clip': accuracy_clip, 
                       'accuracy_bin': accuracy_bin,
                       })

    write_to_csv(to_csv, output_csv_fpath, fieldnames)
Beispiel #6
0
def main(argv=None):
    """
    MNIST cleverhans tutorial for the Jacobian-based saliency map approach (JSMA)
    :return:
    """

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    ###########################################################################
    # Define the dataset and model
    ###########################################################################

    # Image dimensions ordering should follow the Theano convention
    if keras.backend.image_dim_ordering() != 'th':
        keras.backend.set_image_dim_ordering('th')
        print(
            "INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to 'tf', temporarily setting to 'th'"
        )

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)
    print("Created TensorFlow session and set Keras backend.")

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist()
    print("Loaded MNIST test data.")

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 1, 28, 28))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph
    model = model_mnist()
    predictions = model(x)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model if it does not exist in the train_dir folder
    saver = tf.train.Saver()
    save_path = os.path.join(FLAGS.train_dir, FLAGS.filename)
    if os.path.isfile(save_path):
        saver.restore(sess, os.path.join(FLAGS.train_dir, FLAGS.filename))
    else:
        tf_model_train(sess, x, y, predictions, X_train, Y_train)
        saver.save(sess, save_path)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    accuracy = tf_model_eval(sess, x, y, predictions, X_test, Y_test)
    assert X_test.shape[0] == 10000, X_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))

    ###########################################################################
    # Craft adversarial examples using the Jacobian-based saliency map approach
    ###########################################################################
    print('Crafting ' + str(FLAGS.source_samples) + ' * ' +
          str(FLAGS.nb_classes) + ' adversarial examples')

    # This array indicates whether an adversarial example was found for each
    # test set sample and target class
    results = np.zeros((FLAGS.nb_classes, FLAGS.source_samples), dtype='i')

    # This array contains the fraction of perturbed features for each test set
    # sample and target class
    perturbations = np.zeros((FLAGS.nb_classes, FLAGS.source_samples),
                             dtype='f')

    # Define the TF graph for the model's Jacobian
    grads = jacobian_graph(predictions, x)

    # Loop over the samples we want to perturb into adversarial examples
    for sample_ind in xrange(FLAGS.source_samples):
        # We want to find an adversarial example for each possible target class
        # (i.e. all classes that differ from the label given in the dataset)
        target_classes = other_classes(FLAGS.nb_classes,
                                       int(np.argmax(Y_test[sample_ind])))

        # Loop over all target classes
        for target in target_classes:
            print('--------------------------------------')
            print('Creating adversarial example for target class ' +
                  str(target))

            # This call runs the Jacobian-based saliency map approach
            _, result, percentage_perterb = jsma(
                sess,
                x,
                predictions,
                grads,
                X_test[sample_ind:(sample_ind + 1)],
                target,
                theta=1,
                gamma=0.1,
                increase=True,
                back='tf',
                clip_min=0,
                clip_max=1)

            # Update the arrays for later analysis
            results[target, sample_ind] = result
            perturbations[target, sample_ind] = percentage_perterb

    # Compute the number of adversarial examples that were successfuly found
    success_rate = float(np.sum(results)) / (
        (FLAGS.nb_classes - 1) * FLAGS.source_samples)
    print('Avg. rate of successful misclassifcations {0}'.format(success_rate))

    # Compute the average distortion introduced by the algorithm
    percentage_perturbed = np.mean(perturbations)
    print('Avg. rate of perterbed features {0}'.format(percentage_perturbed))

    # Close TF session
    sess.close()
def main(argv=None):
    sess = load_tf_session()
    print ("\n===Loading MNIST data...")
    X_train, Y_train, X_test, Y_test = get_mnist_data()

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 1, 28, 28))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph
    with tf.variable_scope('mnist_original'):
        model = model_mnist()
        predictions = model(x)
        # predictions_bin = model(binary_filter_tf(x))
        predictions_bin = model(reduce_precision_tf(x, npp=2))
        predictions_clip = model(tf.clip_by_value(x, 0., 1.))
    print("\n===Defined TensorFlow model graph.")

    # Load an MNIST model
    maybe_download_mnist_model()
    model_name = 'mnist_epochs%d' % FLAGS.nb_epochs
    mnist_model_path = os.path.join(FLAGS.train_dir, model_name)
    original_variables = [k for k in tf.global_variables() if k.name.startswith('mnist_original')]
    tf_model_load_from_path(sess, mnist_model_path, original_variables)
    print ("---Loaded a pre-trained MNIST model.\n")

    accuracy = tf_model_eval(sess, x, y, predictions, X_test, Y_test)
    print('Test accuracy on raw legitimate examples ' + str(accuracy))

    result_folder = 'results/mnist'
    if not os.path.isdir(result_folder):
        os.makedirs(result_folder)

    if FLAGS.task == 'FGSM':
        nb_examples = 10000
        eps_list = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0]
        adv_x_dict = get_fgsm_adv_examples(sess, x, predictions, X_test, eps_list, model_name, nb_examples, result_folder)

        if FLAGS.visualize is True:
            img_fpath = os.path.join(result_folder, model_name + '_FGSM_examples.png')
            draw_fgsm_adv_examples(adv_x_dict, Y_test, img_fpath)
            print ('\n===Adversarial images are saved in ', img_fpath)

        csv_fpath = model_name + "_fgsm_squeezing_accuracy_%dexamples.csv" % nb_examples
        csv_fpath = os.path.join(result_folder, csv_fpath)

        print ("\n===Calculating the accuracy with feature squeezing...")
        if not os.path.isfile(csv_fpath):
            calculate_accuracy_adv_fgsm(sess, x, y, predictions, predictions_clip, predictions_bin, eps_list, Y_test, adv_x_dict, csv_fpath)
        print ("---Results are stored in ", csv_fpath, '\n')

    elif FLAGS.task == 'FGSM-adv-train':
        # Load an adversarially trained MNIST model for comparison.
        with tf.variable_scope('mnist_adv_train'):
            model_2 = model_mnist()
            predictions_at = model_2(x)
            predictions_at_bin = model_2(reduce_precision_tf(x, npp=2))
            predictions_at_clip = model_2(tf.clip_by_value(x, 0., 1.))

        model_name = 'mnist_adv_train_epochs%d' % FLAGS.nb_epochs
        mnist_model_path = os.path.join(FLAGS.adv_train_dir, model_name)
        adv_train_variables = [k for k in tf.global_variables() if k.name.startswith('mnist_adv_train')]
        tf_model_load_from_path(sess, mnist_model_path, adv_train_variables)
        print ("---Loaded an adversarially pre-trained MNIST model.\n")

        accuracy = tf_model_eval(sess, x, y, predictions_at, X_test, Y_test)
        print('Test accuracy on raw legitimate examples (adv-trained-model) ' + str(accuracy))

        # Get adversarial examples.
        nb_examples = 10000
        eps_list = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0]
        adv_x_dict = get_fgsm_adv_examples(sess, x, predictions_at, X_test, eps_list, model_name, nb_examples, result_folder)

        csv_fpath = model_name + "_fgsm_squeezing_accuracy_%dexamples.csv" % nb_examples
        csv_fpath = os.path.join(result_folder, csv_fpath)

        print ("\n===Calculating the accuracy with feature squeezing...")
        if not os.path.isfile(csv_fpath):
            calculate_accuracy_adv_fgsm(sess, x, y, predictions_at, predictions_at_clip, predictions_at_bin, eps_list, Y_test, adv_x_dict, csv_fpath)
        print ("---Results are stored in ", csv_fpath, '\n')

    elif FLAGS.task == 'JSMA':
        # Generate or load JSMA adversarial examples.
        nb_examples = 1000
        X_adv = get_jsma_adv_examples(sess, x, predictions, X_test, Y_test, model_name, nb_examples, result_folder)

        if FLAGS.visualize is True:
            img_fpath = os.path.join(result_folder, model_name + '_JSMA_examples.png')
            draw_jsma_adv_examples(X_adv, X_test, Y_test, img_fpath)
            print ('\n===Adversarial images are saved in ', img_fpath)

        csv_fpath = model_name + "_jsma_squeezing_accuracy_%dexamples.csv" % nb_examples
        csv_fpath = os.path.join(result_folder, csv_fpath)
        print ("\n===Calculating the accuracy with feature squeezing...")
        if not os.path.isfile(csv_fpath):
            calculate_accuracy_adv_jsma(sess, x, y, predictions, Y_test[:nb_examples], X_test[:nb_examples], X_adv, csv_fpath)
        print ("---Results are stored in ", csv_fpath, '\n')

    elif FLAGS.task == 'JSMA-detection':
        # Calculate L1 distance on prediction for JSMA adversarial detection.
        nb_examples = 1000
        X_adv = get_jsma_adv_examples(sess, x, predictions, X_test, Y_test, model_name, nb_examples, result_folder)

        csv_fpath = model_name + "_jsma_l1_distance_%dexamples.csv" % nb_examples
        csv_fpath = os.path.join(result_folder, csv_fpath)
        if not os.path.isfile(csv_fpath):
            l1_dist = calculate_l1_distance_jsma(sess, x, predictions_clip, X_test[:nb_examples], X_adv, csv_fpath)
        else:
            l1_dist = np.loadtxt(csv_fpath, delimiter=',')

        # Train a detector by selecting a threshold.
        print ("\n===Training an JSMA detector...")
        size_train = size_val = int(nb_examples/2)
        col_id_leg = [0]
        col_id_adv = [1]

        x_train = np.hstack( [ l1_dist[:size_train, col_id] for col_id in col_id_leg+col_id_adv ] )
        y_train = np.hstack([np.zeros(size_train*len(col_id_leg)), np.ones(size_train*len(col_id_adv))])

        x_val = np.hstack( [l1_dist[-size_val:, col_id] for col_id in col_id_leg+col_id_adv ])
        y_val = np.hstack([np.zeros(size_val*len(col_id_leg)), np.ones(size_val*len(col_id_adv))])

        train_detector(x_train, y_train, x_val, y_val)
        print ("---Done")

    elif FLAGS.task == 'FGSM-detection':
        nb_examples = 10000
        eps_list = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0]
        adv_x_dict = get_fgsm_adv_examples(sess, x, predictions, X_test, eps_list, model_name, nb_examples, result_folder)

        csv_fpath = model_name + "_fgsm_squeezing_accuracy_%dexamples.csv" % nb_examples
        csv_fpath = os.path.join(result_folder, csv_fpath)

        # Calculate L1 distance on prediction for adversarial detection.
        csv_fpath = model_name + "_fgsm_l1_distance_%dexamples.csv" % nb_examples
        csv_fpath = os.path.join(result_folder, csv_fpath)
        if not os.path.isfile(csv_fpath):
            l1_dist = calculate_l1_distance_fgsm(sess, x, predictions_clip, predictions_bin, adv_x_dict, csv_fpath)
        else:
            l1_dist = np.loadtxt(csv_fpath, delimiter=',')

        # Train a detector by selecting a threshold.
        print ("\n===Training a FGSM detector...")
        size_train = size_val = int(nb_examples/2)
        col_id_leg = [0]
        # Selected epsilon: 0.1, 0.2, 0.3
        col_id_adv = [1,2,3]

        x_train = np.hstack( [ l1_dist[:size_train, col_id] for col_id in col_id_leg+col_id_adv ] )
        y_train = np.hstack([np.zeros(size_train*len(col_id_leg)), np.ones(size_train*len(col_id_adv))])

        x_val = np.hstack( [l1_dist[-size_val:, col_id] for col_id in col_id_leg+col_id_adv ])
        y_val = np.hstack([np.zeros(size_val*len(col_id_leg)), np.ones(size_val*len(col_id_adv))])

        train_detector(x_train, y_train, x_val, y_val)
        print ("---Done")

    elif FLAGS.task == 'joint-detection':
        nb_examples_jsma = 1000
        nb_examples_fgsm = 10000
        nb_examples_detection = min(nb_examples_jsma, nb_examples_fgsm)

        eps_list = [0.3]
        fgsm_adv_x_dict = get_fgsm_adv_examples(sess, x, predictions, X_test, eps_list, model_name, nb_examples_fgsm, result_folder)
        X_test_adv_jsma = get_jsma_adv_examples(sess, x, predictions, X_test, Y_test, model_name, nb_examples_jsma, result_folder)

        X_test_adv_fgsm = fgsm_adv_x_dict[0.3][:nb_examples_fgsm]
        X_test_adv_jsma = X_test_adv_jsma[:nb_examples_jsma]

        csv_fpath = model_name + "_joint_l1_distance_%dexamples.csv" % nb_examples_detection
        csv_fpath = os.path.join(result_folder, csv_fpath)
        if not os.path.isfile(csv_fpath):
            l1_dist = calculate_l1_distance_joint(sess, x, predictions_clip, X_test, X_test_adv_fgsm, X_test_adv_jsma, csv_fpath)
            np.savetxt(csv_fpath, l1_dist, delimiter=',')
            print ("---Results are stored in ", csv_fpath, '\n')
        else:
            l1_dist = np.loadtxt(csv_fpath, delimiter=',')
        
        # Train a detector by selecting a threshold.
        print ("\n===Training a joint detector...")
        nb_examples_min = min(len(X_test), len(X_test_adv_fgsm), len(X_test_adv_jsma))
        size_train = size_val = int(nb_examples_min/2)
        col_id_leg = [0]
        col_id_adv = [1,2]

        x_train = np.hstack( [ l1_dist[:size_train, col_id] for col_id in col_id_leg+col_id_adv ] )
        y_train = np.hstack([np.zeros(size_train*len(col_id_leg)), np.ones(size_train*len(col_id_adv))])

        x_val = np.hstack( [l1_dist[size_train:size_train+size_val, col_id] for col_id in col_id_leg+col_id_adv ])
        y_val = np.hstack([np.zeros(size_val*len(col_id_leg)), np.ones(size_val*len(col_id_adv))])

        train_detector(x_train, y_train, x_val, y_val)
        print ("---Done")

    else:
        print ("Please specify a task: FGSM, JSMA, FGSM-detection, JSMA-detection, joint-detection.")
 def evaluate():
     # Evaluate the accuracy of the MNIST model on legitimate test examples
     accuracy = tf_model_eval(sess, x, y, predictions, X_test, Y_test)
     assert X_test.shape[0] == 10000, X_test.shape
     print('Test accuracy on legitimate test examples: ' + str(accuracy))
def main(argv=None):
    """
    MNIST cleverhans tutorial
    :return:
    """

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Image dimensions ordering should follow the Theano convention
    if keras.backend.image_dim_ordering() != 'th':
        keras.backend.set_image_dim_ordering('th')
        print(
            "INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to 'tf', temporarily setting to 'th'"
        )

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)
    print("Created TensorFlow session and set Keras backend.")

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist()
    print("Loaded MNIST test data.")

    assert Y_train.shape[1] == 10.
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 1, 28, 28))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph
    model = model_mnist()
    predictions = model(x)
    print("Defined TensorFlow model graph.")

    def evaluate():
        # Evaluate the accuracy of the MNIST model on legitimate test examples
        accuracy = tf_model_eval(sess, x, y, predictions, X_test, Y_test)
        assert X_test.shape[0] == 10000, X_test.shape
        print('Test accuracy on legitimate test examples: ' + str(accuracy))

    # Train an MNIST model
    tf_model_train(sess,
                   x,
                   y,
                   predictions,
                   X_train,
                   Y_train,
                   evaluate=evaluate)

    # Craft adversarial examples using Fast Gradient Sign Method (FGSM)
    adv_x = fgsm(x, predictions, eps=0.3)
    X_test_adv, = batch_eval(sess, [x], [adv_x], [X_test])
    assert X_test_adv.shape[0] == 10000, X_test_adv.shape

    # Evaluate the accuracy of the MNIST model on adversarial examples
    accuracy = tf_model_eval(sess, x, y, predictions, X_test_adv, Y_test)
    print('Test accuracy on adversarial examples: ' + str(accuracy))

    print("Repeating the process, using adversarial training")
    # Redefine TF model graph
    model_2 = model_mnist()
    predictions_2 = model_2(x)
    adv_x_2 = fgsm(x, predictions_2, eps=0.3)
    predictions_2_adv = model_2(adv_x_2)

    def evaluate_2():
        # Evaluate the accuracy of the adversarialy trained MNIST model on
        # legitimate test examples
        accuracy = tf_model_eval(sess, x, y, predictions_2, X_test, Y_test)
        print('Test accuracy on legitimate test examples: ' + str(accuracy))

        # Evaluate the accuracy of the adversarially trained MNIST model on
        # adversarial examples
        accuracy_adv = tf_model_eval(sess, x, y, predictions_2_adv, X_test,
                                     Y_test)
        print('Test accuracy on adversarial examples: ' + str(accuracy_adv))

    # Perform adversarial training
    tf_model_train(sess,
                   x,
                   y,
                   predictions_2,
                   X_train,
                   Y_train,
                   predictions_adv=predictions_2_adv,
                   evaluate=evaluate_2)
Beispiel #10
0
def main(argv=None):
    """
    MNIST cleverhans tutorial
    :return:
    """
    # Image dimensions ordering should follow the Theano convention
    if keras.backend.image_dim_ordering() != 'th':
        keras.backend.set_image_dim_ordering('th')
        print "INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to 'tf', temporarily setting to 'th'"

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)
    print "Created TensorFlow session and set Keras backend."

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist()
    print "Loaded MNIST test data."

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 1, 28, 28))
    y = tf.placeholder(tf.float32, shape=(None, FLAGS.nb_classes))

    # Define TF model graph
    model = model_mnist()
    predictions = model(x)
    print "Defined TensorFlow model graph."

    # Train an MNIST model
    tf_model_train(sess, x, y, predictions, X_train, Y_train)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    accuracy = tf_model_eval(sess, x, y, predictions, X_test, Y_test)
    assert X_test.shape[0] == 10000, X_test.shape
    print 'Test accuracy on legitimate test examples: ' + str(accuracy)

    # Craft adversarial examples using Fast Gradient Sign Method (FGSM)
    adv_x = fgsm(x, predictions, eps=0.3)
    X_test_adv, = batch_eval(sess, [x], [adv_x], [X_test])
    assert X_test_adv.shape[0] == 10000, X_test_adv.shape

    # Evaluate the accuracy of the MNIST model on adversarial examples
    accuracy = tf_model_eval(sess, x, y, predictions, X_test_adv, Y_test)
    print 'Test accuracy on adversarial examples: ' + str(accuracy)

    print "Repeating the process, using adversarial training"
    # Redefine TF model graph
    model_2 = model_mnist()
    predictions_2 = model_2(x)
    adv_x_2 = fgsm(x, predictions_2, eps=0.3)
    predictions_2_adv = model_2(adv_x_2)

    # Perform adversarial training
    tf_model_train(sess,
                   x,
                   y,
                   predictions_2,
                   X_train,
                   Y_train,
                   predictions_adv=predictions_2_adv)

    # Evaluate the accuracy of the adversarialy trained MNIST model on
    # legitimate test examples
    accuracy = tf_model_eval(sess, x, y, predictions_2, X_test, Y_test)
    print 'Test accuracy on legitimate test examples: ' + str(accuracy)

    # Craft adversarial examples using Fast Gradient Sign Method (FGSM) on
    # the new model, which was trained using adversarial training
    X_test_adv_2, = batch_eval(sess, [x], [adv_x_2], [X_test])
    assert X_test_adv_2.shape[0] == 10000, X_test_adv_2.shape

    # Evaluate the accuracy of the adversarially trained MNIST model on
    # adversarial examples
    accuracy_adv = tf_model_eval(sess, x, y, predictions_2, X_test_adv_2,
                                 Y_test)
    print 'Test accuracy on adversarial examples: ' + str(accuracy_adv)