def main(net_type): if keras.backend.image_dim_ordering() != 'th': keras.backend.set_image_dim_ordering('th') print "INFO: temporarily set 'image_dim_ordering' to 'th'" sess = get_session() keras.backend.set_session(sess) (train_xs, train_ys), (test_xs, test_ys) = data_cifar10.load_cifar10() print 'Loaded cifar10 data' x = tf.placeholder(tf.float32, shape=(None, 3, 32, 32)) y = tf.placeholder(tf.float32, shape=(None, 10)) model, model_name = resnet_cifar10.resnet_cifar10(repetations=3, net_type=net_type) if net_type == 'squared_resnet': model = adam_pretrain(model, model_name, train_xs, train_ys, 1, test_xs, test_ys) predictions = model(x) tf_model_train(sess, x, y, predictions, train_xs, train_ys, test_xs, test_ys, data_augmentor=data_cifar10.augment_batch) save_model(model, model_name) # Craft adversarial examples using Fast Gradient Sign Method (FGSM) adv_x = fgsm(x, predictions, eps=0.3) test_xs_adv, = batch_eval(sess, [x], [adv_x], [test_xs]) assert test_xs_adv.shape[0] == 10000, test_xs_adv.shape # Evaluate the accuracy of the MNIST model on adversarial examples accuracy = tf_model_eval(sess, x, y, predictions, test_xs_adv, test_ys) print'Test accuracy on adversarial examples: ' + str(accuracy) print "Repeating the process, using adversarial training" # Redefine TF model graph model_2, _ = resnet_cifar10.resnet_cifar10(repetations=3, net_type=net_type) predictions_2 = model_2(x) adv_x_2 = fgsm(x, predictions_2, eps=0.3) predictions_2_adv = model_2(adv_x_2) # Perform adversarial training tf_model_train(sess, x, y, predictions_2, train_xs, train_ys, test_xs, test_ys, predictions_adv=predictions_2_adv, data_augmentor=data_cifar10.augment_batch) save_model(model, model_name+'_adv') # Craft adversarial examples using Fast Gradient Sign Method (FGSM) on # the new model, which was trained using adversarial training test_xs_adv_2, = batch_eval(sess, [x], [adv_x_2], [test_xs]) assert test_xs_adv_2.shape[0] == 10000, test_xs_adv_2.shape # Evaluate the accuracy of the adversarially trained model on adversarial examples accuracy_adv = tf_model_eval(sess, x, y, predictions_2, test_xs_adv_2, test_ys) print'Test accuracy on adversarial examples: ' + str(accuracy_adv)
def adv_train_mnist(sess, X_train, Y_train, X_test, Y_test, save_path): # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 1, 28, 28)) y = tf.placeholder(tf.float32, shape=(None, 10)) # Redefine TF model graph with tf.variable_scope('mnist_adv_train'): model_2 = model_mnist() predictions_2 = model_2(x) adv_x_2 = fgsm(x, predictions_2, eps=0.3, clip_min=0., clip_max=1.) predictions_2_adv = model_2(adv_x_2) def evaluate_2(): # Evaluate the accuracy of the adversarialy trained MNIST model on # legitimate test examples accuracy = tf_model_eval(sess, x, y, predictions_2, X_test, Y_test) print('Test accuracy on legitimate test examples: ' + str(accuracy)) # Evaluate the accuracy of the adversarially trained MNIST model on # adversarial examples accuracy_adv = tf_model_eval(sess, x, y, predictions_2_adv, X_test, Y_test) print('Test accuracy on adversarial examples: ' + str(accuracy_adv)) # Perform adversarial training tf_model_train_and_save(sess, x, y, predictions_2, X_train, Y_train, save_path=save_path, predictions_adv=predictions_2_adv, evaluate=evaluate_2)
def main(config, data_module, model_path): """Run analysis.""" model = load_model(model_path) data = data_module.load_data(config) x = data['x_train'][:64] predictions = model.predict(x) adv_x = fgsm(x, predictions, eps=0.3) scipy.misc.imshow(x[0]) scipy.misc.imshow(adv_x[0])
def main(argv=None): """ MNIST cleverhans tutorial :return: """ # Perform tutorial setup assert setup_tutorial() # Create TF session and set as Keras backend session sess = tf.Session() keras.backend.set_session(sess) # Get MNIST data X_train, Y_train, X_test, Y_test = data_mnist() # Initialize substitute training set reserved for adversary X_sub = X_test[:FLAGS.holdout] Y_sub = np.argmax(Y_test[:FLAGS.holdout], axis=1) # Redefine test set as remaining samples unavailable to adversaries X_test = X_test[FLAGS.holdout:] Y_test = Y_test[FLAGS.holdout:] # Define input and output TF placeholders x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) y = tf.placeholder(tf.float32, shape=(None, 10)) # Simulate the black-box model locally # You could replace this by a remote labeling API for instance print("Preparing the black-box model.") bbox_preds = prep_bbox(sess, x, y, X_train, Y_train, X_test, Y_test) print("Training the substitute model.") # Train substitute using method from https://arxiv.org/abs/1602.02697 substitute_preds = train_substitute(sess, x, y, bbox_preds, X_sub, Y_sub) # Craft adversarial examples using the substitute adv_x = fgsm(x, substitute_preds, eps=0.2) eval_params = {'batch_size': FLAGS.batch_size} X_test_adv, = batch_eval(sess, [x], [adv_x], [X_test], args=eval_params) np.save("xorg_black.npy", X_test) np.save("xadv_black.npy",X_test_adv) np.save("ytest_black.npy",Y_test) np.save("xtrain.npy", X_train) np.save("ytrain.npy",Y_train) # Evaluate the accuracy of the "black-box" model on adversarial examples accuracy = model_eval(sess, x, y, bbox_preds, X_test_adv, Y_test, args=eval_params) print('Test accuracy of oracle on adversarial examples generated ' 'using the substitute: ' + str(accuracy))
def main(): """ Test the accuracy of the MNIST cleverhans tutorial model :return: """ if not hasattr(backend, "theano"): raise RuntimeError("This tutorial requires keras to be configured" " to use the Theano backend.") # Image dimensions ordering should follow the Theano convention if keras.backend.image_dim_ordering() != 'th': keras.backend.set_image_dim_ordering('th') print( "INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to 'tf', temporarily setting to 'th'" ) import argparse parser = argparse.ArgumentParser() parser.add_argument('--batch_size', '-b', default=128, help='Size of training batches') parser.add_argument('--train_dir', '-d', default='/tmp', help='Directory storing the saved model.') parser.add_argument('--filename', '-f', default='mnist.ckpt', help='Filename to save model under.') parser.add_argument('--nb_epochs', '-e', default=6, type=int, help='Number of epochs to train model') parser.add_argument('--learning_rate', '-lr', default=0.5, type=float, help='Learning rate for training') args = parser.parse_args() # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist() print("Loaded MNIST test data.") # Define input Theano placeholder x_shape = (None, 1, 28, 28) y_shape = (None, 10) x = T.tensor4('x') y = T.matrix('y') # Define Theano model graph model = model_mnist() model.build(x_shape) predictions = model(x) print("Defined Theano model graph.") # Train an MNIST model th_model_train(x, y, predictions, model.trainable_weights, X_train, Y_train, args=args) # Evaluate the accuracy of the MNIST model on legitimate test examples accuracy = th_model_eval(x, y, predictions, X_test, Y_test, args=args) assert float(accuracy) >= 0.98, accuracy # Craft adversarial examples using Fast Gradient Sign Method (FGSM) adv_x = fgsm(x, predictions, eps=0.3, back='th') X_test_adv, = batch_eval([x], [adv_x], [X_test], args=args) assert X_test_adv.shape[0] == 10000, X_test_adv.shape # Evaluate the accuracy of the MNIST model on adversarial examples accuracy = th_model_eval(x, y, predictions, X_test_adv, Y_test, args=args) assert float(accuracy) <= 0.1, accuracy
def main(argv=None): """ MNIST cleverhans tutorial :return: """ # Set TF random seed to improve reproducibility tf.set_random_seed(1234) if not hasattr(backend, "tf"): raise RuntimeError("This tutorial requires keras to be configured" " to use the TensorFlow backend.") # Image dimensions ordering should follow the Theano convention if keras.backend.image_dim_ordering() != 'tf': keras.backend.set_image_dim_ordering('tf') print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to " "'th', temporarily setting to 'tf'") # Create TF session and set as Keras backend session sess = tf.Session() keras.backend.set_session(sess) # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist() assert Y_train.shape[1] == 10. label_smooth = .1 Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) y = tf.placeholder(tf.float32, shape=(None, 10)) # Define TF model graph model = cnn_model() predictions = model(x) print("Defined TensorFlow model graph.") def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': FLAGS.batch_size} accuracy = model_eval(sess, x, y, predictions, X_test, Y_test, args=eval_params) assert X_test.shape[0] == 10000, X_test.shape print('Test accuracy on legitimate test examples: ' + str(accuracy)) # Train an MNIST model train_params = { 'nb_epochs': FLAGS.nb_epochs, 'batch_size': FLAGS.batch_size, 'learning_rate': FLAGS.learning_rate } model_train(sess, x, y, predictions, X_train, Y_train, evaluate=evaluate, args=train_params) # Craft adversarial examples using Fast Gradient Sign Method (FGSM) adv_x = fgsm(x, predictions, eps=0.3) eval_params = {'batch_size': FLAGS.batch_size} X_test_adv, = batch_eval(sess, [x], [adv_x], [X_test], args=eval_params) assert X_test_adv.shape[0] == 10000, X_test_adv.shape # Evaluate the accuracy of the MNIST model on adversarial examples accuracy = model_eval(sess, x, y, predictions, X_test_adv, Y_test, args=eval_params) print('Test accuracy on adversarial examples: ' + str(accuracy)) print("Repeating the process, using adversarial training") # Redefine TF model graph model_2 = cnn_model() predictions_2 = model_2(x) adv_x_2 = fgsm(x, predictions_2, eps=0.3) predictions_2_adv = model_2(adv_x_2) def evaluate_2(): # Evaluate the accuracy of the adversarialy trained MNIST model on # legitimate test examples eval_params = {'batch_size': FLAGS.batch_size} accuracy = model_eval(sess, x, y, predictions_2, X_test, Y_test, args=eval_params) print('Test accuracy on legitimate test examples: ' + str(accuracy)) # Evaluate the accuracy of the adversarially trained MNIST model on # adversarial examples accuracy_adv = model_eval(sess, x, y, predictions_2_adv, X_test, Y_test, args=eval_params) print('Test accuracy on adversarial examples: ' + str(accuracy_adv)) # Perform adversarial training model_train(sess, x, y, predictions_2, X_train, Y_train, predictions_adv=predictions_2_adv, evaluate=evaluate_2, args=train_params)
def main(argv=None): """ MNIST cleverhans tutorial :return: """ # Set TF random seed to improve reproducibility tf.set_random_seed(1234) if not hasattr(backend, "tf"): raise RuntimeError("This tutorial requires keras to be configured" " to use the TensorFlow backend.") # Image dimensions ordering should follow the Theano convention if keras.backend.image_dim_ordering() != 'tf': keras.backend.set_image_dim_ordering('tf') print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to " "'th', temporarily setting to 'tf'") # Create TF session and set as Keras backend session sess = tf.Session() keras.backend.set_session(sess) # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist() assert Y_train.shape[1] == 10. label_smooth = .1 Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) y = tf.placeholder(tf.float32, shape=(None, 10)) # Define TF model graph model = cnn_model() predictions = model(x) print("Defined TensorFlow model graph.") # Redefine TF model graph model_2 = cnn_model() predictions_2 = model_2(x) adv_x_2 = fgsm(x, predictions_2, eps=0.2) predictions_2_adv = model_2(adv_x_2) train_params = { 'nb_epochs': FLAGS.nb_epochs, 'batch_size': FLAGS.batch_size, 'learning_rate': FLAGS.learning_rate } ytest_fgsm = np.load("fgsm/ytest.npy") xadv_fgsm = np.load("fgsm/xadv.npy") xtest_fgsm = np.load("fgsm/xtest.npy") xtest_black = np.load("black/xtest.npy") ytest_black = np.load("black/ytest.npy") xadv_black = np.load("black/xadv.npy") xtest_jsma = np.load("jsma/xtest.npy") ytest_jsma = np.load("jsma/ytest.npy") xadv_jsma = np.load("jsma/xadv.npy") yadv_jsma = np.load("jsma/ytest2.npy") b = [] for a in xadv_jsma: b.append(a[0]) xadv_jsma = np.array(b) print(ytest_jsma.shape) print(yadv_jsma.shape) #ytest_jsma = get_yarray((len(ytest_jsma), 10),ytest_jsma) yadv_jsma = get_yarray((len(yadv_jsma), 10), yadv_jsma) def evaluate_2(): # Evaluate the accuracy of the adversarialy trained MNIST model on # legitimate test examples eval_params = {'batch_size': FLAGS.batch_size} accuracy = model_eval(sess, x, y, predictions_2, X_test, Y_test, args=eval_params) print('Test accuracy on legitimate test examples: ' + str(accuracy)) # Evaluate the accuracy of the adversarially trained MNIST model on # adversarial examples accuracy_adv = model_eval(sess, x, y, predictions_2_adv, X_test, Y_test, args=eval_params) print('Test accuracy on adversarial examples: ' + str(accuracy_adv)) accuracy_adv = model_eval(sess, x, y, predictions_2_adv, xtest_fgsm, ytest_fgsm, args=eval_params) print('Test accuracy on adversarial examples1: ' + str(accuracy_adv)) accuracy_adv = model_eval(sess, x, y, predictions_2_adv, xadv_fgsm, ytest_fgsm, args=eval_params) print('Test accuracy on adversarial examples2: ' + str(accuracy_adv)) accuracy_adv = model_eval(sess, x, y, predictions_2_adv, xtest_black, ytest_black, args=eval_params) print('Test accuracy on adversarial examples3: ' + str(accuracy_adv)) accuracy_adv = model_eval(sess, x, y, predictions_2_adv, xadv_black, ytest_black, args=eval_params) print('Test accuracy on adversarial examples4: ' + str(accuracy_adv)) accuracy_adv = model_eval(sess, x, y, predictions_2_adv, xtest_jsma, ytest_jsma, args=eval_params) print('Test accuracy on adversarial examples5: ' + str(accuracy_adv)) accuracy_adv = model_eval(sess, x, y, predictions_2_adv, xadv_jsma, yadv_jsma, args=eval_params) print('Test accuracy on adversarial examples6: ' + str(accuracy_adv)) # Perform adversarial training model_train(sess, x, y, predictions_2, X_train, Y_train, predictions_adv=predictions_2_adv, evaluate=evaluate_2, args=train_params)
x = tf.placeholder(tf.float32, shape=(None, 1, 28, 28)) y = tf.placeholder(tf.float32, shape=(None, 10)) # Define TF model graph model = model_mnist() predictions = model(x) print("Defined TensorFlow model graph.") # Train an MNIST model model_train(sess, x, y, predictions, X_train, Y_train, evaluate=evaluate) ############################################################################## # Create Adversarials ############################################################################## # Craft adversarial examples using Fast Gradient Sign Method (FGSM) adv_x = fgsm(x, predictions, eps=0.2) X_test_adv, = batch_eval(sess, [x], [adv_x], [X_test]) assert X_test_adv.shape[0] == 10000, X_test_adv.shape # Evaluate the accuracy of the MNIST model on adversarial examples accuracy = model_eval(sess, x, y, predictions, X_test_adv, Y_test) print('Test accuracy on adversarial examples: ' + str(accuracy)) # save instances np.save('data/x_train.npy', X_train) np.save('data/y_train.npy', Y_train) np.save('data/x_test.npy', X_test) np.save('data/y_test.npy', Y_test) np.save('data/adversarials.npy', X_test_adv) # load instances
def generate_images(): print('==> Preparing data..') if not hasattr(backend, "tf"): raise RuntimeError("This tutorial requires keras to be configured" " to use the TensorFlow backend.") # Image dimensions ordering should follow the Theano convention if keras.backend.image_dim_ordering() != 'tf': keras.backend.set_image_dim_ordering('tf') print( "INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to " "'th', temporarily setting to 'tf'") # Create TF session and set as Keras backend session config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.5 sess = tf.Session(config=config) keras.backend.set_session(sess) print "==> Beginning Session" # Get CIFAR10 test data X_train, Y_train, X_test, Y_test = data_cifar10() assert Y_train.shape[1] == 10. label_smooth = .1 Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3)) y = tf.placeholder(tf.float32, shape=(None, 10)) # Load model print "==> loading vgg model" args = load_args() if args.model == 'vgg6': model = vggbn(top=True, pool=args.pool) if args.model == 'vgg15': model = vgg15(top=True, pool=args.pool) if args.model == 'generic': model = generic(top=True, pool=args.pool) if args.model == 'resnet18': model = resnet.build_resnet_18(args.pool) predictions = model(x) model.load_weights(args.load) eval_params = {'batch_size': FLAGS.batch_size} accuracy = model_eval(sess, x, y, predictions, X_test, Y_test, args=eval_params) print '==> Accuracy : {}'.format(accuracy) def evaluate(): # Evaluate the accuracy of the CIFAR10 model on legitimate test examples eval_params = {'batch_size': FLAGS.batch_size} accuracy = model_eval(sess, x, y, predictions, X_test, Y_test, args=eval_params) assert X_test.shape[0] == 10000, X_test.shape print('Test accuracy on legitimate test examples: ' + str(accuracy)) # Train an CIFAR10 model train_params = { 'nb_epochs': FLAGS.nb_epochs, 'batch_size': FLAGS.batch_size, 'learning_rate': FLAGS.learning_rate } im_base = '/im_' model_name = args.model + '_p' + str(args.pool) if args.attack == 'fgsm' or args.attack == 'FGSM': result_dir = os.getcwd() + '/images/fgsm/' print "==> creating fgsm adversarial wrapper" adv_x = fgsm(x, predictions, eps=0.3) print "==> sending to batch evaluator to finalize adversarial images" eval_params = {'batch_size': FLAGS.batch_size} X_train_adv, = batch_eval(sess, [x], [adv_x], [X_train], args=eval_params) i = 0 if not os.path.exists(result_dir + model_name): os.makedirs(result_dir + model_name) print "==> saving images to {}".format(result_dir + model_name) for ad in X_train_adv: scipy.misc.imsave( result_dir + model_name + im_base + str(i) + '.png', ad) i += 1 sess.close() """ JSMA """ if args.attack == 'jsma' or args.attack == 'JSMA': result_dir = os.getcwd() + '/images/jsma/trial_single_adv' print('Crafting ' + str(FLAGS.source_samples) + ' * ' + str(FLAGS.nb_classes - 1) + ' adversarial examples') results = np.zeros((FLAGS.nb_classes, FLAGS.source_samples), dtype='i') # This array contains the fraction of perturbed features for each test set perturbations = np.zeros((FLAGS.nb_classes, FLAGS.source_samples), dtype='f') # Define the TF graph for the model's Jacobian grads = jacobian_graph(predictions, x, FLAGS.nb_classes) # Initialize our array for grid visualization grid_shape = (FLAGS.nb_classes, FLAGS.nb_classes, FLAGS.img_rows, FLAGS.img_cols, FLAGS.nb_channels) grid_viz_data = np.zeros(grid_shape, dtype='f') i_saved = 0 n_image = 0 # Loop over the samples we want to perturb into adversarial examples print "==> saving images to {}".format(result_dir + model_name) for sample_ind in xrange(7166, FLAGS.source_samples): # We want to find an adversarial example for each possible target class current_class = int(np.argmax(Y_train[sample_ind])) target_classes = other_classes(FLAGS.nb_classes, current_class) # For the grid visualization, keep original images along the diagonal grid_viz_data[current_class, current_class, :, :, :] = np.reshape( X_train[sample_ind:(sample_ind + 1)], (FLAGS.img_rows, FLAGS.img_cols, FLAGS.nb_channels)) # Loop over all target classes adversarials = [] for idx, target in enumerate(target_classes): print "image {}".format(sample_ind) # here we hold all successful adversarials for this iteration # since we dont want 500k images, we will uniformly sample an image to save after each target print('--------------------------------------') print('Creating adv. example for target class ' + str(target)) # This call runs the Jacobian-based saliency map approach adv_x, res, percent_perturb = jsma( sess, x, predictions, grads, X_train[sample_ind:(sample_ind + 1)], target, theta=1, gamma=0.1, increase=True, back='tf', clip_min=0, clip_max=1) # Display the original and adversarial images side-by-side adversarial = np.reshape( adv_x, (FLAGS.img_rows, FLAGS.img_cols, FLAGS.nb_channels)) original = np.reshape( X_train[sample_ind:(sample_ind + 1)], (FLAGS.img_rows, FLAGS.img_cols, FLAGS.nb_channels)) if FLAGS.viz_enabled: if 'figure' not in vars(): figure = pair_visual(original, adversarial) else: figure = pair_visual(original, adversarial, figure) if not os.path.exists(result_dir + model_name): os.makedirs(result_dir + model_name) if res == 1: adversarials.append(adversarial) if idx == FLAGS.nb_classes - 2: try: if len(adversarials) == 1: idx_uniform = 0 else: idx_uniform = np.random.randint( 0, len(adversarials) - 1) print idx_uniform scipy.misc.imsave( result_dir + model_name + im_base + str(sample_ind) + '.png', adversarials[idx_uniform]) i_saved += 1 print "==> images saved: {}".format(i_saved) except: print "No adversarials generated" # Add our adversarial example to our grid data grid_viz_data[target, current_class, :, :, :] = np.reshape( adv_x, (FLAGS.img_rows, FLAGS.img_cols, FLAGS.nb_channels)) # Update the arrays for later analysis results[target, sample_ind] = res perturbations[target, sample_ind] = percent_perturb n_image += 1 # Compute the number of adversarial examples that were successfuly found nb_targets_tried = ((FLAGS.nb_classes - 1) * FLAGS.source_samples) succ_rate = float(np.sum(results)) / nb_targets_tried print( 'Avg. rate of successful adv. examples {0:.2f}'.format(succ_rate)) # Compute the average distortion introduced by the algorithm percent_perturbed = np.mean(perturbations) print('Avg. rate of perturbed features {0:.2f}'.format( percent_perturbed)) # Compute the average distortion introduced for successful samples only percent_perturb_succ = np.mean(perturbations * (results == 1)) print( 'Avg. rate of perturbed features for successful ' 'adversarial examples {0:.2f}'.format(percent_perturb_succ)) # Close TF session sess.close() # Finally, block & display a grid of all the adversarial examples if FLAGS.viz_enabled: _ = grid_visual(grid_viz_data)
def main(): """ Test the accuracy of the MNIST cleverhans tutorial model :return: """ import argparse parser = argparse.ArgumentParser() parser.add_argument('--batch_size', '-b', default=128, help='Size of training batches') parser.add_argument('--train_dir', '-d', default='/tmp', help='Directory storing the saved model.') parser.add_argument('--filename', '-f', default='mnist.ckpt', help='Filename to save model under.') parser.add_argument('--nb_epochs', '-e', default=6, type=int, help='Number of epochs to train model') parser.add_argument('--learning_rate', '-lr', default=0.5, type=float, help='Learning rate for training') args = parser.parse_args() # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist() print("Loaded MNIST test data.") # Define input Theano placeholder x_shape = (None, 1, 28, 28) y_shape = (None, 10) x = T.tensor4('x') y = T.matrix('y') # Define Theano model graph model = model_mnist() model.build(x_shape) predictions = model(x) print("Defined Theano model graph.") # Train an MNIST model th_model_train(x, y, predictions, model.trainable_weights, X_train, Y_train, args=args) # Evaluate the accuracy of the MNIST model on legitimate test examples accuracy = th_model_eval(x, y, predictions, X_test, Y_test, args=args) assert float(accuracy) >= 0.98, accuracy # Craft adversarial examples using Fast Gradient Sign Method (FGSM) adv_x = fgsm(x, predictions, eps=0.3, back='th') X_test_adv, = batch_eval([x], [adv_x], [X_test], args=args) assert X_test_adv.shape[0] == 10000, X_test_adv.shape # Evaluate the accuracy of the MNIST model on adversarial examples accuracy = th_model_eval(x, y, predictions, X_test_adv, Y_test, args=args) assert float(accuracy) <= 0.1, accuracy
def main(argv=None): """ MNIST cleverhans tutorial :return: """ # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Image dimensions ordering should follow the Theano convention if keras.backend.image_dim_ordering() != 'th': keras.backend.set_image_dim_ordering('th') print( "INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to 'tf', temporarily setting to 'th'" ) # Create TF session and set as Keras backend session sess = tf.Session() keras.backend.set_session(sess) print("Created TensorFlow session and set Keras backend.") # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist() print("Loaded MNIST test data.") assert Y_train.shape[1] == 10. label_smooth = .1 Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 1, 28, 28)) y = tf.placeholder(tf.float32, shape=(None, 10)) # Define TF model graph model = model_mnist() predictions = model(x) print("Defined TensorFlow model graph.") def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test examples accuracy = tf_model_eval(sess, x, y, predictions, X_test, Y_test) assert X_test.shape[0] == 10000, X_test.shape print('Test accuracy on legitimate test examples: ' + str(accuracy)) # Train an MNIST model tf_model_train(sess, x, y, predictions, X_train, Y_train, evaluate=evaluate) # Craft adversarial examples using Fast Gradient Sign Method (FGSM) adv_x = fgsm(x, predictions, eps=0.3) X_test_adv, = batch_eval(sess, [x], [adv_x], [X_test]) assert X_test_adv.shape[0] == 10000, X_test_adv.shape # Evaluate the accuracy of the MNIST model on adversarial examples accuracy = tf_model_eval(sess, x, y, predictions, X_test_adv, Y_test) print('Test accuracy on adversarial examples: ' + str(accuracy)) print("Repeating the process, using adversarial training") # Redefine TF model graph model_2 = model_mnist() predictions_2 = model_2(x) adv_x_2 = fgsm(x, predictions_2, eps=0.3) predictions_2_adv = model_2(adv_x_2) def evaluate_2(): # Evaluate the accuracy of the adversarialy trained MNIST model on # legitimate test examples accuracy = tf_model_eval(sess, x, y, predictions_2, X_test, Y_test) print('Test accuracy on legitimate test examples: ' + str(accuracy)) # Evaluate the accuracy of the adversarially trained MNIST model on # adversarial examples accuracy_adv = tf_model_eval(sess, x, y, predictions_2_adv, X_test, Y_test) print('Test accuracy on adversarial examples: ' + str(accuracy_adv)) # Perform adversarial training tf_model_train(sess, x, y, predictions_2, X_train, Y_train, predictions_adv=predictions_2_adv, evaluate=evaluate_2)
def main(argv=None): """ MNIST cleverhans tutorial :return: """ # Image dimensions ordering should follow the Theano convention if keras.backend.image_dim_ordering() != 'th': keras.backend.set_image_dim_ordering('th') print "INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to 'tf', temporarily setting to 'th'" # Create TF session and set as Keras backend session sess = tf.Session() keras.backend.set_session(sess) print "Created TensorFlow session and set Keras backend." # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist() print "Loaded MNIST test data." # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 1, 28, 28)) y = tf.placeholder(tf.float32, shape=(None, FLAGS.nb_classes)) # Define TF model graph model = model_mnist() predictions = model(x) print "Defined TensorFlow model graph." # Train an MNIST model tf_model_train(sess, x, y, predictions, X_train, Y_train) # Evaluate the accuracy of the MNIST model on legitimate test examples accuracy = tf_model_eval(sess, x, y, predictions, X_test, Y_test) assert X_test.shape[0] == 10000, X_test.shape print 'Test accuracy on legitimate test examples: ' + str(accuracy) # Craft adversarial examples using Fast Gradient Sign Method (FGSM) adv_x = fgsm(x, predictions, eps=0.3) X_test_adv, = batch_eval(sess, [x], [adv_x], [X_test]) assert X_test_adv.shape[0] == 10000, X_test_adv.shape # Evaluate the accuracy of the MNIST model on adversarial examples accuracy = tf_model_eval(sess, x, y, predictions, X_test_adv, Y_test) print 'Test accuracy on adversarial examples: ' + str(accuracy) print "Repeating the process, using adversarial training" # Redefine TF model graph model_2 = model_mnist() predictions_2 = model_2(x) adv_x_2 = fgsm(x, predictions_2, eps=0.3) predictions_2_adv = model_2(adv_x_2) # Perform adversarial training tf_model_train(sess, x, y, predictions_2, X_train, Y_train, predictions_adv=predictions_2_adv) # Evaluate the accuracy of the adversarialy trained MNIST model on # legitimate test examples accuracy = tf_model_eval(sess, x, y, predictions_2, X_test, Y_test) print 'Test accuracy on legitimate test examples: ' + str(accuracy) # Craft adversarial examples using Fast Gradient Sign Method (FGSM) on # the new model, which was trained using adversarial training X_test_adv_2, = batch_eval(sess, [x], [adv_x_2], [X_test]) assert X_test_adv_2.shape[0] == 10000, X_test_adv_2.shape # Evaluate the accuracy of the adversarially trained MNIST model on # adversarial examples accuracy_adv = tf_model_eval(sess, x, y, predictions_2, X_test_adv_2, Y_test) print 'Test accuracy on adversarial examples: ' + str(accuracy_adv)
def main(argv=None): """ CIFAR10 CleverHans tutorial :return: """ # Set TF random seed to improve reproducibility tf.set_random_seed(1234) if not hasattr(backend, "tf"): raise RuntimeError("This tutorial requires keras to be configured" " to use the TensorFlow backend.") if keras.backend.image_dim_ordering() != 'tf': keras.backend.set_image_dim_ordering('tf') print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to " "'th', temporarily setting to 'tf'") # Create TF session and set as Keras backend session sess = tf.Session() keras.backend.set_session(sess) # Get CIFAR10 test data X_train, Y_train, X_test, Y_test = data_cifar10() assert Y_train.shape[1] == 10. label_smooth = .1 Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3)) y = tf.placeholder(tf.float32, shape=(None, 10)) # Define TF model graph model = cnn_model(img_rows=32, img_cols=32, channels=3) predictions = model(x) print("Defined TensorFlow model graph.") def evaluate(): # Evaluate the accuracy of the CIFAR10 model on legitimate test # examples eval_params = {'batch_size': FLAGS.batch_size} accuracy = model_eval(sess, x, y, predictions, X_test, Y_test, args=eval_params) assert X_test.shape[0] == 10000, X_test.shape print('Test accuracy on legitimate test examples: ' + str(accuracy)) # Train an CIFAR10 model train_params = { 'nb_epochs': FLAGS.nb_epochs, 'batch_size': FLAGS.batch_size, 'learning_rate': FLAGS.learning_rate } model_train(sess, x, y, predictions, X_train, Y_train, evaluate=evaluate, args=train_params) # Craft adversarial examples using Fast Gradient Sign Method (FGSM) adv_x = fgsm(x, predictions, eps=0.3) eval_params = {'batch_size': FLAGS.batch_size} X_test_adv, = batch_eval(sess, [x], [adv_x], [X_test], args=eval_params) assert X_test_adv.shape[0] == 10000, X_test_adv.shape from scipy.misc import imsave path = '/home/neale/repos/adversarial-toolbox/images/adversarials/fgsm/cifar/symmetric/' for i, (real, adv) in enumerate(zip(X_test, X_test_adv)): imsave(path + 'real/im_{}.png'.format(i), real) imsave(path + 'adv/adv_{}.png'.format(i), adv)