def main(argv=None): """ Test the accuracy of the MNIST cleverhans tutorial model :return: """ # Image dimensions ordering should follow the Theano convention if keras.backend.image_dim_ordering() != 'th': keras.backend.set_image_dim_ordering('th') print "INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to 'tf', temporarily setting to 'th'" # Create TF session and set as Keras backend session with tf.Session() as sess: keras.backend.set_session(sess) print "Created TensorFlow session and set Keras backend." # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist() print "Loaded MNIST test data." # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 1, 28, 28)) y = tf.placeholder(tf.float32, shape=(None, FLAGS.nb_classes)) # Define TF model graph model = model_mnist() predictions = model(x) print "Defined TensorFlow model graph." # Train an MNIST model tf_model_train(sess, x, y, predictions, X_train, Y_train) # Evaluate the accuracy of the MNIST model on legitimate test examples accuracy = tf_model_eval(sess, x, y, predictions, X_test, Y_test) assert float(accuracy) >= 0.97, accuracy
def adv_train_mnist(sess, X_train, Y_train, X_test, Y_test, save_path): # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 1, 28, 28)) y = tf.placeholder(tf.float32, shape=(None, 10)) # Redefine TF model graph with tf.variable_scope('mnist_adv_train'): model_2 = model_mnist() predictions_2 = model_2(x) adv_x_2 = fgsm(x, predictions_2, eps=0.3, clip_min=0., clip_max=1.) predictions_2_adv = model_2(adv_x_2) def evaluate_2(): # Evaluate the accuracy of the adversarialy trained MNIST model on # legitimate test examples accuracy = tf_model_eval(sess, x, y, predictions_2, X_test, Y_test) print('Test accuracy on legitimate test examples: ' + str(accuracy)) # Evaluate the accuracy of the adversarially trained MNIST model on # adversarial examples accuracy_adv = tf_model_eval(sess, x, y, predictions_2_adv, X_test, Y_test) print('Test accuracy on adversarial examples: ' + str(accuracy_adv)) # Perform adversarial training tf_model_train_and_save(sess, x, y, predictions_2, X_train, Y_train, save_path=save_path, predictions_adv=predictions_2_adv, evaluate=evaluate_2)
def prep_bbox(sess, x, y, X_train, Y_train, X_test, Y_test): """ Define and train a model that simulates the "remote" black-box oracle described in the original paper. :param sess: the TF session :param x: the input placeholder for MNIST :param y: the ouput placeholder for MNIST :param X_train: the training data for the oracle :param Y_train: the training labels for the oracle :param X_test: the testing data for the oracle :param Y_test: the testing labels for the oracle :return: """ # Define TF model graph (for the black-box model) model = model_mnist() predictions = model(x) print("Defined TensorFlow model graph.") # Train an MNIST model model_train(sess, x, y, predictions, X_train, Y_train, verbose=False) # Print out the accuracy on legitimate data accuracy = model_eval(sess, x, y, predictions, X_test, Y_test) print('Test accuracy of black-box on legitimate test ' 'examples: ' + str(accuracy)) return predictions
def train_mnist(sess, X_train, Y_train, X_test, Y_test, save_path): # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 1, 28, 28)) y = tf.placeholder(tf.float32, shape=(None, 10)) # Define TF model graph with tf.variable_scope('mnist_original'): model = model_mnist() predictions = model(x) print("Defined TensorFlow model graph.") def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test examples accuracy = tf_model_eval(sess, x, y, predictions, X_test, Y_test) assert X_test.shape[0] == 10000, X_test.shape print('Test accuracy on legitimate test examples: ' + str(accuracy)) # Train and save an MNIST model. tf_model_train_and_save(sess, x, y, predictions, X_train, Y_train, save_path=save_path, evaluate=evaluate)
def main(): """ Test the accuracy of the MNIST cleverhans tutorial model :return: """ if not hasattr(backend, "theano"): raise RuntimeError("This tutorial requires keras to be configured" " to use the Theano backend.") # Image dimensions ordering should follow the Theano convention if keras.backend.image_dim_ordering() != 'th': keras.backend.set_image_dim_ordering('th') print( "INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to 'tf', temporarily setting to 'th'" ) import argparse parser = argparse.ArgumentParser() parser.add_argument('--batch_size', '-b', default=128, help='Size of training batches') parser.add_argument('--train_dir', '-d', default='/tmp', help='Directory storing the saved model.') parser.add_argument('--filename', '-f', default='mnist.ckpt', help='Filename to save model under.') parser.add_argument('--nb_epochs', '-e', default=6, type=int, help='Number of epochs to train model') parser.add_argument('--learning_rate', '-lr', default=0.5, type=float, help='Learning rate for training') args = parser.parse_args() # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist() print("Loaded MNIST test data.") # Define input Theano placeholder x_shape = (None, 1, 28, 28) y_shape = (None, 10) x = T.tensor4('x') y = T.matrix('y') # Define Theano model graph model = model_mnist() model.build(x_shape) predictions = model(x) print("Defined Theano model graph.") # Train an MNIST model th_model_train(x, y, predictions, model.trainable_weights, X_train, Y_train, args=args) # Evaluate the accuracy of the MNIST model on legitimate test examples accuracy = th_model_eval(x, y, predictions, X_test, Y_test, args=args) assert float(accuracy) >= 0.98, accuracy # Craft adversarial examples using Fast Gradient Sign Method (FGSM) adv_x = fgsm(x, predictions, eps=0.3, back='th') X_test_adv, = batch_eval([x], [adv_x], [X_test], args=args) assert X_test_adv.shape[0] == 10000, X_test_adv.shape # Evaluate the accuracy of the MNIST model on adversarial examples accuracy = th_model_eval(x, y, predictions, X_test_adv, Y_test, args=args) assert float(accuracy) <= 0.1, accuracy
def main(argv=None): """ MNIST cleverhans tutorial for the Jacobian-based saliency map approach (JSMA) :return: """ # Set TF random seed to improve reproducibility tf.set_random_seed(1234) ########################################################################### # Define the dataset and model ########################################################################### # Image dimensions ordering should follow the Theano convention if keras.backend.image_dim_ordering() != 'th': keras.backend.set_image_dim_ordering('th') print( "INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to 'tf', temporarily setting to 'th'" ) # Create TF session and set as Keras backend session sess = tf.Session() keras.backend.set_session(sess) print("Created TensorFlow session and set Keras backend.") # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist() print("Loaded MNIST test data.") # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 1, 28, 28)) y = tf.placeholder(tf.float32, shape=(None, 10)) # Define TF model graph model = model_mnist() predictions = model(x) print("Defined TensorFlow model graph.") ########################################################################### # Training the model using TensorFlow ########################################################################### # Train an MNIST model if it does not exist in the train_dir folder saver = tf.train.Saver() save_path = os.path.join(FLAGS.train_dir, FLAGS.filename) if os.path.isfile(save_path): saver.restore(sess, os.path.join(FLAGS.train_dir, FLAGS.filename)) else: tf_model_train(sess, x, y, predictions, X_train, Y_train) saver.save(sess, save_path) # Evaluate the accuracy of the MNIST model on legitimate test examples accuracy = tf_model_eval(sess, x, y, predictions, X_test, Y_test) assert X_test.shape[0] == 10000, X_test.shape print('Test accuracy on legitimate test examples: {0}'.format(accuracy)) ########################################################################### # Craft adversarial examples using the Jacobian-based saliency map approach ########################################################################### print('Crafting ' + str(FLAGS.source_samples) + ' * ' + str(FLAGS.nb_classes) + ' adversarial examples') # This array indicates whether an adversarial example was found for each # test set sample and target class results = np.zeros((FLAGS.nb_classes, FLAGS.source_samples), dtype='i') # This array contains the fraction of perturbed features for each test set # sample and target class perturbations = np.zeros((FLAGS.nb_classes, FLAGS.source_samples), dtype='f') # Define the TF graph for the model's Jacobian grads = jacobian_graph(predictions, x) # Loop over the samples we want to perturb into adversarial examples for sample_ind in xrange(FLAGS.source_samples): # We want to find an adversarial example for each possible target class # (i.e. all classes that differ from the label given in the dataset) target_classes = other_classes(FLAGS.nb_classes, int(np.argmax(Y_test[sample_ind]))) # Loop over all target classes for target in target_classes: print('--------------------------------------') print('Creating adversarial example for target class ' + str(target)) # This call runs the Jacobian-based saliency map approach _, result, percentage_perterb = jsma( sess, x, predictions, grads, X_test[sample_ind:(sample_ind + 1)], target, theta=1, gamma=0.1, increase=True, back='tf', clip_min=0, clip_max=1) # Update the arrays for later analysis results[target, sample_ind] = result perturbations[target, sample_ind] = percentage_perterb # Compute the number of adversarial examples that were successfuly found success_rate = float(np.sum(results)) / ( (FLAGS.nb_classes - 1) * FLAGS.source_samples) print('Avg. rate of successful misclassifcations {0}'.format(success_rate)) # Compute the average distortion introduced by the algorithm percentage_perturbed = np.mean(perturbations) print('Avg. rate of perterbed features {0}'.format(percentage_perturbed)) # Close TF session sess.close()
# Create TF session and set as Keras backend session sess = tf.Session() keras.backend.set_session(sess) # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist() assert Y_train.shape[1] == 10. label_smooth = .1 Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 1, 28, 28)) y = tf.placeholder(tf.float32, shape=(None, 10)) # Define TF model graph model = model_mnist() predictions = model(x) print("Defined TensorFlow model graph.") # Train an MNIST model model_train(sess, x, y, predictions, X_train, Y_train, evaluate=evaluate) ############################################################################## # Create Adversarials ############################################################################## # Craft adversarial examples using Fast Gradient Sign Method (FGSM) adv_x = fgsm(x, predictions, eps=0.2) X_test_adv, = batch_eval(sess, [x], [adv_x], [X_test]) assert X_test_adv.shape[0] == 10000, X_test_adv.shape # Evaluate the accuracy of the MNIST model on adversarial examples
def main(): """ Test the accuracy of the MNIST cleverhans tutorial model :return: """ import argparse parser = argparse.ArgumentParser() parser.add_argument('--batch_size', '-b', default=128, help='Size of training batches') parser.add_argument('--train_dir', '-d', default='/tmp', help='Directory storing the saved model.') parser.add_argument('--filename', '-f', default='mnist.ckpt', help='Filename to save model under.') parser.add_argument('--nb_epochs', '-e', default=6, type=int, help='Number of epochs to train model') parser.add_argument('--learning_rate', '-lr', default=0.5, type=float, help='Learning rate for training') args = parser.parse_args() # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist() print("Loaded MNIST test data.") # Define input Theano placeholder x_shape = (None, 1, 28, 28) y_shape = (None, 10) x = T.tensor4('x') y = T.matrix('y') # Define Theano model graph model = model_mnist() model.build(x_shape) predictions = model(x) print("Defined Theano model graph.") # Train an MNIST model th_model_train(x, y, predictions, model.trainable_weights, X_train, Y_train, args=args) # Evaluate the accuracy of the MNIST model on legitimate test examples accuracy = th_model_eval(x, y, predictions, X_test, Y_test, args=args) assert float(accuracy) >= 0.98, accuracy # Craft adversarial examples using Fast Gradient Sign Method (FGSM) adv_x = fgsm(x, predictions, eps=0.3, back='th') X_test_adv, = batch_eval([x], [adv_x], [X_test], args=args) assert X_test_adv.shape[0] == 10000, X_test_adv.shape # Evaluate the accuracy of the MNIST model on adversarial examples accuracy = th_model_eval(x, y, predictions, X_test_adv, Y_test, args=args) assert float(accuracy) <= 0.1, accuracy
def main(argv=None): sess = load_tf_session() print ("\n===Loading MNIST data...") X_train, Y_train, X_test, Y_test = get_mnist_data() # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 1, 28, 28)) y = tf.placeholder(tf.float32, shape=(None, 10)) # Define TF model graph with tf.variable_scope('mnist_original'): model = model_mnist() predictions = model(x) # predictions_bin = model(binary_filter_tf(x)) predictions_bin = model(reduce_precision_tf(x, npp=2)) predictions_clip = model(tf.clip_by_value(x, 0., 1.)) print("\n===Defined TensorFlow model graph.") # Load an MNIST model maybe_download_mnist_model() model_name = 'mnist_epochs%d' % FLAGS.nb_epochs mnist_model_path = os.path.join(FLAGS.train_dir, model_name) original_variables = [k for k in tf.global_variables() if k.name.startswith('mnist_original')] tf_model_load_from_path(sess, mnist_model_path, original_variables) print ("---Loaded a pre-trained MNIST model.\n") accuracy = tf_model_eval(sess, x, y, predictions, X_test, Y_test) print('Test accuracy on raw legitimate examples ' + str(accuracy)) result_folder = 'results/mnist' if not os.path.isdir(result_folder): os.makedirs(result_folder) if FLAGS.task == 'FGSM': nb_examples = 10000 eps_list = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0] adv_x_dict = get_fgsm_adv_examples(sess, x, predictions, X_test, eps_list, model_name, nb_examples, result_folder) if FLAGS.visualize is True: img_fpath = os.path.join(result_folder, model_name + '_FGSM_examples.png') draw_fgsm_adv_examples(adv_x_dict, Y_test, img_fpath) print ('\n===Adversarial images are saved in ', img_fpath) csv_fpath = model_name + "_fgsm_squeezing_accuracy_%dexamples.csv" % nb_examples csv_fpath = os.path.join(result_folder, csv_fpath) print ("\n===Calculating the accuracy with feature squeezing...") if not os.path.isfile(csv_fpath): calculate_accuracy_adv_fgsm(sess, x, y, predictions, predictions_clip, predictions_bin, eps_list, Y_test, adv_x_dict, csv_fpath) print ("---Results are stored in ", csv_fpath, '\n') elif FLAGS.task == 'FGSM-adv-train': # Load an adversarially trained MNIST model for comparison. with tf.variable_scope('mnist_adv_train'): model_2 = model_mnist() predictions_at = model_2(x) predictions_at_bin = model_2(reduce_precision_tf(x, npp=2)) predictions_at_clip = model_2(tf.clip_by_value(x, 0., 1.)) model_name = 'mnist_adv_train_epochs%d' % FLAGS.nb_epochs mnist_model_path = os.path.join(FLAGS.adv_train_dir, model_name) adv_train_variables = [k for k in tf.global_variables() if k.name.startswith('mnist_adv_train')] tf_model_load_from_path(sess, mnist_model_path, adv_train_variables) print ("---Loaded an adversarially pre-trained MNIST model.\n") accuracy = tf_model_eval(sess, x, y, predictions_at, X_test, Y_test) print('Test accuracy on raw legitimate examples (adv-trained-model) ' + str(accuracy)) # Get adversarial examples. nb_examples = 10000 eps_list = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0] adv_x_dict = get_fgsm_adv_examples(sess, x, predictions_at, X_test, eps_list, model_name, nb_examples, result_folder) csv_fpath = model_name + "_fgsm_squeezing_accuracy_%dexamples.csv" % nb_examples csv_fpath = os.path.join(result_folder, csv_fpath) print ("\n===Calculating the accuracy with feature squeezing...") if not os.path.isfile(csv_fpath): calculate_accuracy_adv_fgsm(sess, x, y, predictions_at, predictions_at_clip, predictions_at_bin, eps_list, Y_test, adv_x_dict, csv_fpath) print ("---Results are stored in ", csv_fpath, '\n') elif FLAGS.task == 'JSMA': # Generate or load JSMA adversarial examples. nb_examples = 1000 X_adv = get_jsma_adv_examples(sess, x, predictions, X_test, Y_test, model_name, nb_examples, result_folder) if FLAGS.visualize is True: img_fpath = os.path.join(result_folder, model_name + '_JSMA_examples.png') draw_jsma_adv_examples(X_adv, X_test, Y_test, img_fpath) print ('\n===Adversarial images are saved in ', img_fpath) csv_fpath = model_name + "_jsma_squeezing_accuracy_%dexamples.csv" % nb_examples csv_fpath = os.path.join(result_folder, csv_fpath) print ("\n===Calculating the accuracy with feature squeezing...") if not os.path.isfile(csv_fpath): calculate_accuracy_adv_jsma(sess, x, y, predictions, Y_test[:nb_examples], X_test[:nb_examples], X_adv, csv_fpath) print ("---Results are stored in ", csv_fpath, '\n') elif FLAGS.task == 'JSMA-detection': # Calculate L1 distance on prediction for JSMA adversarial detection. nb_examples = 1000 X_adv = get_jsma_adv_examples(sess, x, predictions, X_test, Y_test, model_name, nb_examples, result_folder) csv_fpath = model_name + "_jsma_l1_distance_%dexamples.csv" % nb_examples csv_fpath = os.path.join(result_folder, csv_fpath) if not os.path.isfile(csv_fpath): l1_dist = calculate_l1_distance_jsma(sess, x, predictions_clip, X_test[:nb_examples], X_adv, csv_fpath) else: l1_dist = np.loadtxt(csv_fpath, delimiter=',') # Train a detector by selecting a threshold. print ("\n===Training an JSMA detector...") size_train = size_val = int(nb_examples/2) col_id_leg = [0] col_id_adv = [1] x_train = np.hstack( [ l1_dist[:size_train, col_id] for col_id in col_id_leg+col_id_adv ] ) y_train = np.hstack([np.zeros(size_train*len(col_id_leg)), np.ones(size_train*len(col_id_adv))]) x_val = np.hstack( [l1_dist[-size_val:, col_id] for col_id in col_id_leg+col_id_adv ]) y_val = np.hstack([np.zeros(size_val*len(col_id_leg)), np.ones(size_val*len(col_id_adv))]) train_detector(x_train, y_train, x_val, y_val) print ("---Done") elif FLAGS.task == 'FGSM-detection': nb_examples = 10000 eps_list = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0] adv_x_dict = get_fgsm_adv_examples(sess, x, predictions, X_test, eps_list, model_name, nb_examples, result_folder) csv_fpath = model_name + "_fgsm_squeezing_accuracy_%dexamples.csv" % nb_examples csv_fpath = os.path.join(result_folder, csv_fpath) # Calculate L1 distance on prediction for adversarial detection. csv_fpath = model_name + "_fgsm_l1_distance_%dexamples.csv" % nb_examples csv_fpath = os.path.join(result_folder, csv_fpath) if not os.path.isfile(csv_fpath): l1_dist = calculate_l1_distance_fgsm(sess, x, predictions_clip, predictions_bin, adv_x_dict, csv_fpath) else: l1_dist = np.loadtxt(csv_fpath, delimiter=',') # Train a detector by selecting a threshold. print ("\n===Training a FGSM detector...") size_train = size_val = int(nb_examples/2) col_id_leg = [0] # Selected epsilon: 0.1, 0.2, 0.3 col_id_adv = [1,2,3] x_train = np.hstack( [ l1_dist[:size_train, col_id] for col_id in col_id_leg+col_id_adv ] ) y_train = np.hstack([np.zeros(size_train*len(col_id_leg)), np.ones(size_train*len(col_id_adv))]) x_val = np.hstack( [l1_dist[-size_val:, col_id] for col_id in col_id_leg+col_id_adv ]) y_val = np.hstack([np.zeros(size_val*len(col_id_leg)), np.ones(size_val*len(col_id_adv))]) train_detector(x_train, y_train, x_val, y_val) print ("---Done") elif FLAGS.task == 'joint-detection': nb_examples_jsma = 1000 nb_examples_fgsm = 10000 nb_examples_detection = min(nb_examples_jsma, nb_examples_fgsm) eps_list = [0.3] fgsm_adv_x_dict = get_fgsm_adv_examples(sess, x, predictions, X_test, eps_list, model_name, nb_examples_fgsm, result_folder) X_test_adv_jsma = get_jsma_adv_examples(sess, x, predictions, X_test, Y_test, model_name, nb_examples_jsma, result_folder) X_test_adv_fgsm = fgsm_adv_x_dict[0.3][:nb_examples_fgsm] X_test_adv_jsma = X_test_adv_jsma[:nb_examples_jsma] csv_fpath = model_name + "_joint_l1_distance_%dexamples.csv" % nb_examples_detection csv_fpath = os.path.join(result_folder, csv_fpath) if not os.path.isfile(csv_fpath): l1_dist = calculate_l1_distance_joint(sess, x, predictions_clip, X_test, X_test_adv_fgsm, X_test_adv_jsma, csv_fpath) np.savetxt(csv_fpath, l1_dist, delimiter=',') print ("---Results are stored in ", csv_fpath, '\n') else: l1_dist = np.loadtxt(csv_fpath, delimiter=',') # Train a detector by selecting a threshold. print ("\n===Training a joint detector...") nb_examples_min = min(len(X_test), len(X_test_adv_fgsm), len(X_test_adv_jsma)) size_train = size_val = int(nb_examples_min/2) col_id_leg = [0] col_id_adv = [1,2] x_train = np.hstack( [ l1_dist[:size_train, col_id] for col_id in col_id_leg+col_id_adv ] ) y_train = np.hstack([np.zeros(size_train*len(col_id_leg)), np.ones(size_train*len(col_id_adv))]) x_val = np.hstack( [l1_dist[size_train:size_train+size_val, col_id] for col_id in col_id_leg+col_id_adv ]) y_val = np.hstack([np.zeros(size_val*len(col_id_leg)), np.ones(size_val*len(col_id_adv))]) train_detector(x_train, y_train, x_val, y_val) print ("---Done") else: print ("Please specify a task: FGSM, JSMA, FGSM-detection, JSMA-detection, joint-detection.")
def main(argv=None): """ MNIST cleverhans tutorial :return: """ # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Image dimensions ordering should follow the Theano convention if keras.backend.image_dim_ordering() != 'th': keras.backend.set_image_dim_ordering('th') print( "INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to 'tf', temporarily setting to 'th'" ) # Create TF session and set as Keras backend session sess = tf.Session() keras.backend.set_session(sess) print("Created TensorFlow session and set Keras backend.") # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist() print("Loaded MNIST test data.") assert Y_train.shape[1] == 10. label_smooth = .1 Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 1, 28, 28)) y = tf.placeholder(tf.float32, shape=(None, 10)) # Define TF model graph model = model_mnist() predictions = model(x) print("Defined TensorFlow model graph.") def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test examples accuracy = tf_model_eval(sess, x, y, predictions, X_test, Y_test) assert X_test.shape[0] == 10000, X_test.shape print('Test accuracy on legitimate test examples: ' + str(accuracy)) # Train an MNIST model tf_model_train(sess, x, y, predictions, X_train, Y_train, evaluate=evaluate) # Craft adversarial examples using Fast Gradient Sign Method (FGSM) adv_x = fgsm(x, predictions, eps=0.3) X_test_adv, = batch_eval(sess, [x], [adv_x], [X_test]) assert X_test_adv.shape[0] == 10000, X_test_adv.shape # Evaluate the accuracy of the MNIST model on adversarial examples accuracy = tf_model_eval(sess, x, y, predictions, X_test_adv, Y_test) print('Test accuracy on adversarial examples: ' + str(accuracy)) print("Repeating the process, using adversarial training") # Redefine TF model graph model_2 = model_mnist() predictions_2 = model_2(x) adv_x_2 = fgsm(x, predictions_2, eps=0.3) predictions_2_adv = model_2(adv_x_2) def evaluate_2(): # Evaluate the accuracy of the adversarialy trained MNIST model on # legitimate test examples accuracy = tf_model_eval(sess, x, y, predictions_2, X_test, Y_test) print('Test accuracy on legitimate test examples: ' + str(accuracy)) # Evaluate the accuracy of the adversarially trained MNIST model on # adversarial examples accuracy_adv = tf_model_eval(sess, x, y, predictions_2_adv, X_test, Y_test) print('Test accuracy on adversarial examples: ' + str(accuracy_adv)) # Perform adversarial training tf_model_train(sess, x, y, predictions_2, X_train, Y_train, predictions_adv=predictions_2_adv, evaluate=evaluate_2)
def main(): """ MNIST cleverhans tutorial :return: """ import argparse parser = argparse.ArgumentParser() parser.add_argument('--batch_size', '-b', default=1000, type=int, help='Size of training batches') parser.add_argument('--train_dir', '-d', default='/tmp', help='Directory storing the saved model.') parser.add_argument('--filename', '-f', default='mnist.ckpt', help='Filename to save model under.') parser.add_argument('--nb_epochs', '-e', default=6, type=int, help='Number of epochs to train model') parser.add_argument('--nb_iters', '-i', default=10000, type=int, help='Number of iterations for crafting adversarial examples') parser.add_argument('--learning_rate', '-lr', default=0.1, type=float, help='Learning rate for training') parser.add_argument('--eps', default=0.01, type=float, help='Epsilon for Carlini L2 Attack') parser.add_argument('--kappa', default=0.01, type=float, help='Kappa for Carlini L2 Attack') parser.add_argument('--c', default=20, type=float) parser.add_argument('--load', default=None, type=str, help='Model path to load') parser.add_argument('--dump', default=None, type=str, help='Model path to dump') args = parser.parse_args() np.random.seed(126) # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist() print("Loaded MNIST test data.") assert Y_train.shape[1] == 10. label_smooth = .1 Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) # Define input Theano placeholder x_shape = (None, 1, 28, 28) y_shape = (None, 10) x = T.tensor4('x') y = T.matrix('y') if args.load: model = pickle.load(open(args.load, "rb")) predictions = model(x) else: # Define Theano model graph model = model_mnist() model.build(x_shape) predictions = model(x) print("Defined Theano model graph.") def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test examples accuracy = th_model_eval(x, y, predictions, X_test, Y_test, args=args) assert X_test.shape[0] == 10000, X_test.shape print('Test accuracy on legitimate test examples: ' + str(accuracy)) pass # Train an MNIST model th_model_train(x, y, predictions, model.trainable_weights, X_train, Y_train, evaluate=evaluate, args=args) if args.dump: pickle.dump(model, open(args.dump, "wb")) # Craft adversarial examples using Fast Gradient Sign Method (FGSM) #for i in range(10): carlini_L2(x, predictions, X_test, Y_test, eps=args.eps, kappa=args.kappa, c=args.c, nb_iters=args.nb_iters, batch_size=args.batch_size)
def main(): """ MNIST cleverhans tutorial :return: """ import argparse parser = argparse.ArgumentParser() parser.add_argument('--batch_size', '-b', default=128, help='Size of training batches') parser.add_argument('--train_dir', '-d', default='/tmp', help='Directory storing the saved model.') parser.add_argument('--filename', '-f', default='mnist.ckpt', help='Filename to save model under.') parser.add_argument('--nb_epochs', '-e', default=6, type=int, help='Number of epochs to train model') parser.add_argument('--learning_rate', '-lr', default=0.5, type=float, help='Learning rate for training') args = parser.parse_args() # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist() print("Loaded MNIST test data.") assert Y_train.shape[1] == 10. label_smooth = .1 Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) # Define input Theano placeholder x_shape = (None, 1, 28, 28) y_shape = (None, 10) x = T.tensor4('x') y = T.matrix('y') # Define Theano model graph model = model_mnist() model.build(x_shape) predictions = model(x) print("Defined Theano model graph.") def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test examples accuracy = th_model_eval(x, y, predictions, X_test, Y_test, args=args) assert X_test.shape[0] == 10000, X_test.shape print('Test accuracy on legitimate test examples: ' + str(accuracy)) pass # Train an MNIST model th_model_train(x, y, predictions, model.trainable_weights, X_train, Y_train, evaluate=evaluate, args=args) # Craft adversarial examples using Fast Gradient Sign Method (FGSM) adv_x = fgsm(x, predictions, eps=0.3) X_test_adv, = batch_eval([x], [adv_x], [X_test], args=args) assert X_test_adv.shape[0] == 10000, X_test_adv.shape # Evaluate the accuracy of the MNIST model on adversarial examples accuracy = th_model_eval(x, y, predictions, X_test_adv, Y_test, args=args) print('Test accuracy on adversarial examples: ' + str(accuracy)) print("Repeating the process, using adversarial training") # Redefine Theano model graph model_2 = model_mnist() model_2.build(x_shape) predictions_2 = model_2(x) adv_x_2 = fgsm(x, predictions_2, eps=0.3) predictions_2_adv = model_2(adv_x_2) def evaluate_2(): # Evaluate the accuracy of the adversarialy trained MNIST model on # legitimate test examples accuracy = th_model_eval(x, y, predictions_2, X_test, Y_test, args=args) print('Test accuracy on legitimate test examples: ' + str(accuracy)) # Evaluate the accuracy of the adversarially trained MNIST model on # adversarial examples accuracy_adv = th_model_eval(x, y, predictions_2_adv, X_test, Y_test, args=args) print('Test accuracy on adversarial examples: ' + str(accuracy_adv)) # Perform adversarial training th_model_train(x, y, predictions_2, model_2.trainable_weights, X_train, Y_train, predictions_adv=predictions_2_adv, evaluate=evaluate_2, args=args)
def main(argv=None): """ MNIST cleverhans tutorial :return: """ # Image dimensions ordering should follow the Theano convention if keras.backend.image_dim_ordering() != 'th': keras.backend.set_image_dim_ordering('th') print "INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to 'tf', temporarily setting to 'th'" # Create TF session and set as Keras backend session sess = tf.Session() keras.backend.set_session(sess) print "Created TensorFlow session and set Keras backend." # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist() print "Loaded MNIST test data." # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 1, 28, 28)) y = tf.placeholder(tf.float32, shape=(None, FLAGS.nb_classes)) # Define TF model graph model = model_mnist() predictions = model(x) print "Defined TensorFlow model graph." # Train an MNIST model tf_model_train(sess, x, y, predictions, X_train, Y_train) # Evaluate the accuracy of the MNIST model on legitimate test examples accuracy = tf_model_eval(sess, x, y, predictions, X_test, Y_test) assert X_test.shape[0] == 10000, X_test.shape print 'Test accuracy on legitimate test examples: ' + str(accuracy) # Craft adversarial examples using Fast Gradient Sign Method (FGSM) adv_x = fgsm(x, predictions, eps=0.3) X_test_adv, = batch_eval(sess, [x], [adv_x], [X_test]) assert X_test_adv.shape[0] == 10000, X_test_adv.shape # Evaluate the accuracy of the MNIST model on adversarial examples accuracy = tf_model_eval(sess, x, y, predictions, X_test_adv, Y_test) print 'Test accuracy on adversarial examples: ' + str(accuracy) print "Repeating the process, using adversarial training" # Redefine TF model graph model_2 = model_mnist() predictions_2 = model_2(x) adv_x_2 = fgsm(x, predictions_2, eps=0.3) predictions_2_adv = model_2(adv_x_2) # Perform adversarial training tf_model_train(sess, x, y, predictions_2, X_train, Y_train, predictions_adv=predictions_2_adv) # Evaluate the accuracy of the adversarialy trained MNIST model on # legitimate test examples accuracy = tf_model_eval(sess, x, y, predictions_2, X_test, Y_test) print 'Test accuracy on legitimate test examples: ' + str(accuracy) # Craft adversarial examples using Fast Gradient Sign Method (FGSM) on # the new model, which was trained using adversarial training X_test_adv_2, = batch_eval(sess, [x], [adv_x_2], [X_test]) assert X_test_adv_2.shape[0] == 10000, X_test_adv_2.shape # Evaluate the accuracy of the adversarially trained MNIST model on # adversarial examples accuracy_adv = tf_model_eval(sess, x, y, predictions_2, X_test_adv_2, Y_test) print 'Test accuracy on adversarial examples: ' + str(accuracy_adv)