def main(net_type): if keras.backend.image_dim_ordering() != 'th': keras.backend.set_image_dim_ordering('th') print "INFO: temporarily set 'image_dim_ordering' to 'th'" sess = get_session() keras.backend.set_session(sess) (train_xs, train_ys), (test_xs, test_ys) = data_cifar10.load_cifar10() print 'Loaded cifar10 data' x = tf.placeholder(tf.float32, shape=(None, 3, 32, 32)) y = tf.placeholder(tf.float32, shape=(None, 10)) model, model_name = resnet_cifar10.resnet_cifar10(repetations=3, net_type=net_type) if net_type == 'squared_resnet': model = adam_pretrain(model, model_name, train_xs, train_ys, 1, test_xs, test_ys) predictions = model(x) tf_model_train(sess, x, y, predictions, train_xs, train_ys, test_xs, test_ys, data_augmentor=data_cifar10.augment_batch) save_model(model, model_name) # Craft adversarial examples using Fast Gradient Sign Method (FGSM) adv_x = fgsm(x, predictions, eps=0.3) test_xs_adv, = batch_eval(sess, [x], [adv_x], [test_xs]) assert test_xs_adv.shape[0] == 10000, test_xs_adv.shape # Evaluate the accuracy of the MNIST model on adversarial examples accuracy = tf_model_eval(sess, x, y, predictions, test_xs_adv, test_ys) print'Test accuracy on adversarial examples: ' + str(accuracy) print "Repeating the process, using adversarial training" # Redefine TF model graph model_2, _ = resnet_cifar10.resnet_cifar10(repetations=3, net_type=net_type) predictions_2 = model_2(x) adv_x_2 = fgsm(x, predictions_2, eps=0.3) predictions_2_adv = model_2(adv_x_2) # Perform adversarial training tf_model_train(sess, x, y, predictions_2, train_xs, train_ys, test_xs, test_ys, predictions_adv=predictions_2_adv, data_augmentor=data_cifar10.augment_batch) save_model(model, model_name+'_adv') # Craft adversarial examples using Fast Gradient Sign Method (FGSM) on # the new model, which was trained using adversarial training test_xs_adv_2, = batch_eval(sess, [x], [adv_x_2], [test_xs]) assert test_xs_adv_2.shape[0] == 10000, test_xs_adv_2.shape # Evaluate the accuracy of the adversarially trained model on adversarial examples accuracy_adv = tf_model_eval(sess, x, y, predictions_2, test_xs_adv_2, test_ys) print'Test accuracy on adversarial examples: ' + str(accuracy_adv)
def evaluate_2(): # Evaluate the accuracy of the adversarialy trained MNIST model on # legitimate test examples accuracy = tf_model_eval(sess, x, y, predictions_2, X_test, Y_test) print('Test accuracy on legitimate test examples: ' + str(accuracy)) # Evaluate the accuracy of the adversarially trained MNIST model on # adversarial examples accuracy_adv = tf_model_eval(sess, x, y, predictions_2_adv, X_test, Y_test) print('Test accuracy on adversarial examples: ' + str(accuracy_adv))
def main(argv=None): """ Test the accuracy of the MNIST cleverhans tutorial model :return: """ # Image dimensions ordering should follow the Theano convention if keras.backend.image_dim_ordering() != 'th': keras.backend.set_image_dim_ordering('th') print "INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to 'tf', temporarily setting to 'th'" # Create TF session and set as Keras backend session with tf.Session() as sess: keras.backend.set_session(sess) print "Created TensorFlow session and set Keras backend." # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist() print "Loaded MNIST test data." # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 1, 28, 28)) y = tf.placeholder(tf.float32, shape=(None, FLAGS.nb_classes)) # Define TF model graph model = model_mnist() predictions = model(x) print "Defined TensorFlow model graph." # Train an MNIST model tf_model_train(sess, x, y, predictions, X_train, Y_train) # Evaluate the accuracy of the MNIST model on legitimate test examples accuracy = tf_model_eval(sess, x, y, predictions, X_test, Y_test) assert float(accuracy) >= 0.97, accuracy
def calculate_accuracy_adv_jsma(sess, x, y, predictions, Y_test, X_test, X_test_adv, output_csv_fpath): fieldnames = ['width', 'height', 'accuracy_legitimate', 'accuracy_malicious'] to_csv = [] print ("\n===Calculating the accuracy with feature squeezing...") for width in range(1, 11): # height = width for height in range(1, 11): X_squeezed = median_filter_np(X_test, width, height) X_adv_squeezed = median_filter_np(X_test_adv, width, height) accuracy_leg = tf_model_eval(sess, x, y, predictions, X_squeezed, Y_test) accuracy_mal = tf_model_eval(sess, x, y, predictions, X_adv_squeezed, Y_test) to_csv.append({'width': width, 'height': height, 'accuracy_legitimate': accuracy_leg, 'accuracy_malicious': accuracy_mal}) print ("Width: %2d, Height: %2d, Accuracy_legitimate: %.2f, Accuracy_malicious: %.2f" % (width, height, accuracy_leg, accuracy_mal)) write_to_csv(to_csv, output_csv_fpath, fieldnames)
def calculate_accuracy_adv_fgsm(sess, x, y, predictions, predictions_clip, predictions_bin, eps_list, Y_test, adv_x_dict, output_csv_fpath): fieldnames = ['eps', 'accuracy_raw', 'accuracy_clip', 'accuracy_bin'] to_csv = [] for eps in eps_list: X_test_adv = adv_x_dict[eps] # Evaluate the accuracy of the MNIST model on adversarial examples accuracy_raw = tf_model_eval(sess, x, y, predictions, X_test_adv, Y_test) accuracy_clip = tf_model_eval(sess, x, y, predictions_clip, X_test_adv, Y_test) accuracy_bin = tf_model_eval(sess, x, y, predictions_bin, X_test_adv, Y_test) print ('Test accuracy on adversarial examples: raw %.4f, clip %.4f, bin %.4f (eps=%.1f): '% (accuracy_raw, accuracy_clip, accuracy_bin, eps)) to_csv.append({'eps': eps, 'accuracy_raw': accuracy_raw, 'accuracy_clip': accuracy_clip, 'accuracy_bin': accuracy_bin, }) write_to_csv(to_csv, output_csv_fpath, fieldnames)
def main(argv=None): """ MNIST cleverhans tutorial for the Jacobian-based saliency map approach (JSMA) :return: """ # Set TF random seed to improve reproducibility tf.set_random_seed(1234) ########################################################################### # Define the dataset and model ########################################################################### # Image dimensions ordering should follow the Theano convention if keras.backend.image_dim_ordering() != 'th': keras.backend.set_image_dim_ordering('th') print( "INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to 'tf', temporarily setting to 'th'" ) # Create TF session and set as Keras backend session sess = tf.Session() keras.backend.set_session(sess) print("Created TensorFlow session and set Keras backend.") # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist() print("Loaded MNIST test data.") # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 1, 28, 28)) y = tf.placeholder(tf.float32, shape=(None, 10)) # Define TF model graph model = model_mnist() predictions = model(x) print("Defined TensorFlow model graph.") ########################################################################### # Training the model using TensorFlow ########################################################################### # Train an MNIST model if it does not exist in the train_dir folder saver = tf.train.Saver() save_path = os.path.join(FLAGS.train_dir, FLAGS.filename) if os.path.isfile(save_path): saver.restore(sess, os.path.join(FLAGS.train_dir, FLAGS.filename)) else: tf_model_train(sess, x, y, predictions, X_train, Y_train) saver.save(sess, save_path) # Evaluate the accuracy of the MNIST model on legitimate test examples accuracy = tf_model_eval(sess, x, y, predictions, X_test, Y_test) assert X_test.shape[0] == 10000, X_test.shape print('Test accuracy on legitimate test examples: {0}'.format(accuracy)) ########################################################################### # Craft adversarial examples using the Jacobian-based saliency map approach ########################################################################### print('Crafting ' + str(FLAGS.source_samples) + ' * ' + str(FLAGS.nb_classes) + ' adversarial examples') # This array indicates whether an adversarial example was found for each # test set sample and target class results = np.zeros((FLAGS.nb_classes, FLAGS.source_samples), dtype='i') # This array contains the fraction of perturbed features for each test set # sample and target class perturbations = np.zeros((FLAGS.nb_classes, FLAGS.source_samples), dtype='f') # Define the TF graph for the model's Jacobian grads = jacobian_graph(predictions, x) # Loop over the samples we want to perturb into adversarial examples for sample_ind in xrange(FLAGS.source_samples): # We want to find an adversarial example for each possible target class # (i.e. all classes that differ from the label given in the dataset) target_classes = other_classes(FLAGS.nb_classes, int(np.argmax(Y_test[sample_ind]))) # Loop over all target classes for target in target_classes: print('--------------------------------------') print('Creating adversarial example for target class ' + str(target)) # This call runs the Jacobian-based saliency map approach _, result, percentage_perterb = jsma( sess, x, predictions, grads, X_test[sample_ind:(sample_ind + 1)], target, theta=1, gamma=0.1, increase=True, back='tf', clip_min=0, clip_max=1) # Update the arrays for later analysis results[target, sample_ind] = result perturbations[target, sample_ind] = percentage_perterb # Compute the number of adversarial examples that were successfuly found success_rate = float(np.sum(results)) / ( (FLAGS.nb_classes - 1) * FLAGS.source_samples) print('Avg. rate of successful misclassifcations {0}'.format(success_rate)) # Compute the average distortion introduced by the algorithm percentage_perturbed = np.mean(perturbations) print('Avg. rate of perterbed features {0}'.format(percentage_perturbed)) # Close TF session sess.close()
def main(argv=None): sess = load_tf_session() print ("\n===Loading MNIST data...") X_train, Y_train, X_test, Y_test = get_mnist_data() # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 1, 28, 28)) y = tf.placeholder(tf.float32, shape=(None, 10)) # Define TF model graph with tf.variable_scope('mnist_original'): model = model_mnist() predictions = model(x) # predictions_bin = model(binary_filter_tf(x)) predictions_bin = model(reduce_precision_tf(x, npp=2)) predictions_clip = model(tf.clip_by_value(x, 0., 1.)) print("\n===Defined TensorFlow model graph.") # Load an MNIST model maybe_download_mnist_model() model_name = 'mnist_epochs%d' % FLAGS.nb_epochs mnist_model_path = os.path.join(FLAGS.train_dir, model_name) original_variables = [k for k in tf.global_variables() if k.name.startswith('mnist_original')] tf_model_load_from_path(sess, mnist_model_path, original_variables) print ("---Loaded a pre-trained MNIST model.\n") accuracy = tf_model_eval(sess, x, y, predictions, X_test, Y_test) print('Test accuracy on raw legitimate examples ' + str(accuracy)) result_folder = 'results/mnist' if not os.path.isdir(result_folder): os.makedirs(result_folder) if FLAGS.task == 'FGSM': nb_examples = 10000 eps_list = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0] adv_x_dict = get_fgsm_adv_examples(sess, x, predictions, X_test, eps_list, model_name, nb_examples, result_folder) if FLAGS.visualize is True: img_fpath = os.path.join(result_folder, model_name + '_FGSM_examples.png') draw_fgsm_adv_examples(adv_x_dict, Y_test, img_fpath) print ('\n===Adversarial images are saved in ', img_fpath) csv_fpath = model_name + "_fgsm_squeezing_accuracy_%dexamples.csv" % nb_examples csv_fpath = os.path.join(result_folder, csv_fpath) print ("\n===Calculating the accuracy with feature squeezing...") if not os.path.isfile(csv_fpath): calculate_accuracy_adv_fgsm(sess, x, y, predictions, predictions_clip, predictions_bin, eps_list, Y_test, adv_x_dict, csv_fpath) print ("---Results are stored in ", csv_fpath, '\n') elif FLAGS.task == 'FGSM-adv-train': # Load an adversarially trained MNIST model for comparison. with tf.variable_scope('mnist_adv_train'): model_2 = model_mnist() predictions_at = model_2(x) predictions_at_bin = model_2(reduce_precision_tf(x, npp=2)) predictions_at_clip = model_2(tf.clip_by_value(x, 0., 1.)) model_name = 'mnist_adv_train_epochs%d' % FLAGS.nb_epochs mnist_model_path = os.path.join(FLAGS.adv_train_dir, model_name) adv_train_variables = [k for k in tf.global_variables() if k.name.startswith('mnist_adv_train')] tf_model_load_from_path(sess, mnist_model_path, adv_train_variables) print ("---Loaded an adversarially pre-trained MNIST model.\n") accuracy = tf_model_eval(sess, x, y, predictions_at, X_test, Y_test) print('Test accuracy on raw legitimate examples (adv-trained-model) ' + str(accuracy)) # Get adversarial examples. nb_examples = 10000 eps_list = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0] adv_x_dict = get_fgsm_adv_examples(sess, x, predictions_at, X_test, eps_list, model_name, nb_examples, result_folder) csv_fpath = model_name + "_fgsm_squeezing_accuracy_%dexamples.csv" % nb_examples csv_fpath = os.path.join(result_folder, csv_fpath) print ("\n===Calculating the accuracy with feature squeezing...") if not os.path.isfile(csv_fpath): calculate_accuracy_adv_fgsm(sess, x, y, predictions_at, predictions_at_clip, predictions_at_bin, eps_list, Y_test, adv_x_dict, csv_fpath) print ("---Results are stored in ", csv_fpath, '\n') elif FLAGS.task == 'JSMA': # Generate or load JSMA adversarial examples. nb_examples = 1000 X_adv = get_jsma_adv_examples(sess, x, predictions, X_test, Y_test, model_name, nb_examples, result_folder) if FLAGS.visualize is True: img_fpath = os.path.join(result_folder, model_name + '_JSMA_examples.png') draw_jsma_adv_examples(X_adv, X_test, Y_test, img_fpath) print ('\n===Adversarial images are saved in ', img_fpath) csv_fpath = model_name + "_jsma_squeezing_accuracy_%dexamples.csv" % nb_examples csv_fpath = os.path.join(result_folder, csv_fpath) print ("\n===Calculating the accuracy with feature squeezing...") if not os.path.isfile(csv_fpath): calculate_accuracy_adv_jsma(sess, x, y, predictions, Y_test[:nb_examples], X_test[:nb_examples], X_adv, csv_fpath) print ("---Results are stored in ", csv_fpath, '\n') elif FLAGS.task == 'JSMA-detection': # Calculate L1 distance on prediction for JSMA adversarial detection. nb_examples = 1000 X_adv = get_jsma_adv_examples(sess, x, predictions, X_test, Y_test, model_name, nb_examples, result_folder) csv_fpath = model_name + "_jsma_l1_distance_%dexamples.csv" % nb_examples csv_fpath = os.path.join(result_folder, csv_fpath) if not os.path.isfile(csv_fpath): l1_dist = calculate_l1_distance_jsma(sess, x, predictions_clip, X_test[:nb_examples], X_adv, csv_fpath) else: l1_dist = np.loadtxt(csv_fpath, delimiter=',') # Train a detector by selecting a threshold. print ("\n===Training an JSMA detector...") size_train = size_val = int(nb_examples/2) col_id_leg = [0] col_id_adv = [1] x_train = np.hstack( [ l1_dist[:size_train, col_id] for col_id in col_id_leg+col_id_adv ] ) y_train = np.hstack([np.zeros(size_train*len(col_id_leg)), np.ones(size_train*len(col_id_adv))]) x_val = np.hstack( [l1_dist[-size_val:, col_id] for col_id in col_id_leg+col_id_adv ]) y_val = np.hstack([np.zeros(size_val*len(col_id_leg)), np.ones(size_val*len(col_id_adv))]) train_detector(x_train, y_train, x_val, y_val) print ("---Done") elif FLAGS.task == 'FGSM-detection': nb_examples = 10000 eps_list = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0] adv_x_dict = get_fgsm_adv_examples(sess, x, predictions, X_test, eps_list, model_name, nb_examples, result_folder) csv_fpath = model_name + "_fgsm_squeezing_accuracy_%dexamples.csv" % nb_examples csv_fpath = os.path.join(result_folder, csv_fpath) # Calculate L1 distance on prediction for adversarial detection. csv_fpath = model_name + "_fgsm_l1_distance_%dexamples.csv" % nb_examples csv_fpath = os.path.join(result_folder, csv_fpath) if not os.path.isfile(csv_fpath): l1_dist = calculate_l1_distance_fgsm(sess, x, predictions_clip, predictions_bin, adv_x_dict, csv_fpath) else: l1_dist = np.loadtxt(csv_fpath, delimiter=',') # Train a detector by selecting a threshold. print ("\n===Training a FGSM detector...") size_train = size_val = int(nb_examples/2) col_id_leg = [0] # Selected epsilon: 0.1, 0.2, 0.3 col_id_adv = [1,2,3] x_train = np.hstack( [ l1_dist[:size_train, col_id] for col_id in col_id_leg+col_id_adv ] ) y_train = np.hstack([np.zeros(size_train*len(col_id_leg)), np.ones(size_train*len(col_id_adv))]) x_val = np.hstack( [l1_dist[-size_val:, col_id] for col_id in col_id_leg+col_id_adv ]) y_val = np.hstack([np.zeros(size_val*len(col_id_leg)), np.ones(size_val*len(col_id_adv))]) train_detector(x_train, y_train, x_val, y_val) print ("---Done") elif FLAGS.task == 'joint-detection': nb_examples_jsma = 1000 nb_examples_fgsm = 10000 nb_examples_detection = min(nb_examples_jsma, nb_examples_fgsm) eps_list = [0.3] fgsm_adv_x_dict = get_fgsm_adv_examples(sess, x, predictions, X_test, eps_list, model_name, nb_examples_fgsm, result_folder) X_test_adv_jsma = get_jsma_adv_examples(sess, x, predictions, X_test, Y_test, model_name, nb_examples_jsma, result_folder) X_test_adv_fgsm = fgsm_adv_x_dict[0.3][:nb_examples_fgsm] X_test_adv_jsma = X_test_adv_jsma[:nb_examples_jsma] csv_fpath = model_name + "_joint_l1_distance_%dexamples.csv" % nb_examples_detection csv_fpath = os.path.join(result_folder, csv_fpath) if not os.path.isfile(csv_fpath): l1_dist = calculate_l1_distance_joint(sess, x, predictions_clip, X_test, X_test_adv_fgsm, X_test_adv_jsma, csv_fpath) np.savetxt(csv_fpath, l1_dist, delimiter=',') print ("---Results are stored in ", csv_fpath, '\n') else: l1_dist = np.loadtxt(csv_fpath, delimiter=',') # Train a detector by selecting a threshold. print ("\n===Training a joint detector...") nb_examples_min = min(len(X_test), len(X_test_adv_fgsm), len(X_test_adv_jsma)) size_train = size_val = int(nb_examples_min/2) col_id_leg = [0] col_id_adv = [1,2] x_train = np.hstack( [ l1_dist[:size_train, col_id] for col_id in col_id_leg+col_id_adv ] ) y_train = np.hstack([np.zeros(size_train*len(col_id_leg)), np.ones(size_train*len(col_id_adv))]) x_val = np.hstack( [l1_dist[size_train:size_train+size_val, col_id] for col_id in col_id_leg+col_id_adv ]) y_val = np.hstack([np.zeros(size_val*len(col_id_leg)), np.ones(size_val*len(col_id_adv))]) train_detector(x_train, y_train, x_val, y_val) print ("---Done") else: print ("Please specify a task: FGSM, JSMA, FGSM-detection, JSMA-detection, joint-detection.")
def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test examples accuracy = tf_model_eval(sess, x, y, predictions, X_test, Y_test) assert X_test.shape[0] == 10000, X_test.shape print('Test accuracy on legitimate test examples: ' + str(accuracy))
def main(argv=None): """ MNIST cleverhans tutorial :return: """ # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Image dimensions ordering should follow the Theano convention if keras.backend.image_dim_ordering() != 'th': keras.backend.set_image_dim_ordering('th') print( "INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to 'tf', temporarily setting to 'th'" ) # Create TF session and set as Keras backend session sess = tf.Session() keras.backend.set_session(sess) print("Created TensorFlow session and set Keras backend.") # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist() print("Loaded MNIST test data.") assert Y_train.shape[1] == 10. label_smooth = .1 Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 1, 28, 28)) y = tf.placeholder(tf.float32, shape=(None, 10)) # Define TF model graph model = model_mnist() predictions = model(x) print("Defined TensorFlow model graph.") def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test examples accuracy = tf_model_eval(sess, x, y, predictions, X_test, Y_test) assert X_test.shape[0] == 10000, X_test.shape print('Test accuracy on legitimate test examples: ' + str(accuracy)) # Train an MNIST model tf_model_train(sess, x, y, predictions, X_train, Y_train, evaluate=evaluate) # Craft adversarial examples using Fast Gradient Sign Method (FGSM) adv_x = fgsm(x, predictions, eps=0.3) X_test_adv, = batch_eval(sess, [x], [adv_x], [X_test]) assert X_test_adv.shape[0] == 10000, X_test_adv.shape # Evaluate the accuracy of the MNIST model on adversarial examples accuracy = tf_model_eval(sess, x, y, predictions, X_test_adv, Y_test) print('Test accuracy on adversarial examples: ' + str(accuracy)) print("Repeating the process, using adversarial training") # Redefine TF model graph model_2 = model_mnist() predictions_2 = model_2(x) adv_x_2 = fgsm(x, predictions_2, eps=0.3) predictions_2_adv = model_2(adv_x_2) def evaluate_2(): # Evaluate the accuracy of the adversarialy trained MNIST model on # legitimate test examples accuracy = tf_model_eval(sess, x, y, predictions_2, X_test, Y_test) print('Test accuracy on legitimate test examples: ' + str(accuracy)) # Evaluate the accuracy of the adversarially trained MNIST model on # adversarial examples accuracy_adv = tf_model_eval(sess, x, y, predictions_2_adv, X_test, Y_test) print('Test accuracy on adversarial examples: ' + str(accuracy_adv)) # Perform adversarial training tf_model_train(sess, x, y, predictions_2, X_train, Y_train, predictions_adv=predictions_2_adv, evaluate=evaluate_2)
def main(argv=None): """ MNIST cleverhans tutorial :return: """ # Image dimensions ordering should follow the Theano convention if keras.backend.image_dim_ordering() != 'th': keras.backend.set_image_dim_ordering('th') print "INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to 'tf', temporarily setting to 'th'" # Create TF session and set as Keras backend session sess = tf.Session() keras.backend.set_session(sess) print "Created TensorFlow session and set Keras backend." # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist() print "Loaded MNIST test data." # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 1, 28, 28)) y = tf.placeholder(tf.float32, shape=(None, FLAGS.nb_classes)) # Define TF model graph model = model_mnist() predictions = model(x) print "Defined TensorFlow model graph." # Train an MNIST model tf_model_train(sess, x, y, predictions, X_train, Y_train) # Evaluate the accuracy of the MNIST model on legitimate test examples accuracy = tf_model_eval(sess, x, y, predictions, X_test, Y_test) assert X_test.shape[0] == 10000, X_test.shape print 'Test accuracy on legitimate test examples: ' + str(accuracy) # Craft adversarial examples using Fast Gradient Sign Method (FGSM) adv_x = fgsm(x, predictions, eps=0.3) X_test_adv, = batch_eval(sess, [x], [adv_x], [X_test]) assert X_test_adv.shape[0] == 10000, X_test_adv.shape # Evaluate the accuracy of the MNIST model on adversarial examples accuracy = tf_model_eval(sess, x, y, predictions, X_test_adv, Y_test) print 'Test accuracy on adversarial examples: ' + str(accuracy) print "Repeating the process, using adversarial training" # Redefine TF model graph model_2 = model_mnist() predictions_2 = model_2(x) adv_x_2 = fgsm(x, predictions_2, eps=0.3) predictions_2_adv = model_2(adv_x_2) # Perform adversarial training tf_model_train(sess, x, y, predictions_2, X_train, Y_train, predictions_adv=predictions_2_adv) # Evaluate the accuracy of the adversarialy trained MNIST model on # legitimate test examples accuracy = tf_model_eval(sess, x, y, predictions_2, X_test, Y_test) print 'Test accuracy on legitimate test examples: ' + str(accuracy) # Craft adversarial examples using Fast Gradient Sign Method (FGSM) on # the new model, which was trained using adversarial training X_test_adv_2, = batch_eval(sess, [x], [adv_x_2], [X_test]) assert X_test_adv_2.shape[0] == 10000, X_test_adv_2.shape # Evaluate the accuracy of the adversarially trained MNIST model on # adversarial examples accuracy_adv = tf_model_eval(sess, x, y, predictions_2, X_test_adv_2, Y_test) print 'Test accuracy on adversarial examples: ' + str(accuracy_adv)