def mnist_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=6, batch_size=128, learning_rate=0.001, clean_train=True, testing=False, backprop_through_attack=False, nb_filters=64, num_threads=None): """ MNIST cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param clean_train: if true, train on clean examples :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) # Use label smoothing assert Y_train.shape[1] == 10 label_smooth = .1 Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) y = tf.placeholder(tf.float32, shape=(None, 10)) model_path = "models/mnist" # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} rng = np.random.RandomState([2017, 8, 30]) if clean_train: model = make_basic_cnn(nb_filters=nb_filters) preds = model.get_probs(x) def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test # examples eval_params = {'batch_size': batch_size} acc = model_eval( sess, x, y, preds, X_test, Y_test, args=eval_params) report.clean_train_clean_eval = acc assert X_test.shape[0] == test_end - test_start, X_test.shape print('Test accuracy on legitimate examples: %0.4f' % acc) model_train(sess, x, y, preds, X_train, Y_train, evaluate=evaluate, args=train_params, rng=rng) # Calculate training error if testing: eval_params = {'batch_size': batch_size} acc = model_eval( sess, x, y, preds, X_train, Y_train, args=eval_params) report.train_clean_train_clean_eval = acc # Initialize the Fast Gradient Sign Method (FGSM) attack object and # graph fgsm = FastGradientMethod(model, sess=sess) adv_x = fgsm.generate(x, **fgsm_params) preds_adv = model.get_probs(adv_x) # Evaluate the accuracy of the MNIST model on adversarial examples eval_par = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print('Test accuracy on adversarial examples: %0.4f\n' % acc) report.clean_train_adv_eval = acc # Calculate training error if testing: eval_par = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_adv, X_train, Y_train, args=eval_par) report.train_clean_train_adv_eval = acc print("Repeating the process, using adversarial training") # Redefine TF model graph model_2 = make_basic_cnn(nb_filters=nb_filters) preds_2 = model_2(x) fgsm2 = FastGradientMethod(model_2, sess=sess) adv_x_2 = fgsm2.generate(x, **fgsm_params) if not backprop_through_attack: # For the fgsm attack used in this tutorial, the attack has zero # gradient so enabling this flag does not change the gradient. # For some other attacks, enabling this flag increases the cost of # training, but gives the defender the ability to anticipate how # the atacker will change their strategy in response to updates to # the defender's parameters. adv_x_2 = tf.stop_gradient(adv_x_2) preds_2_adv = model_2(adv_x_2) def evaluate_2(): # Accuracy of adversarially trained model on legitimate test inputs eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds_2, X_test, Y_test, args=eval_params) print('Test accuracy on legitimate examples: %0.4f' % accuracy) report.adv_train_clean_eval = accuracy # Accuracy of the adversarially trained model on adversarial examples accuracy = model_eval(sess, x, y, preds_2_adv, X_test, Y_test, args=eval_params) print('Test accuracy on adversarial examples: %0.4f' % accuracy) report.adv_train_adv_eval = accuracy # Perform and evaluate adversarial training model_train(sess, x, y, preds_2, X_train, Y_train, predictions_adv=preds_2_adv, evaluate=evaluate_2, args=train_params, rng=rng) # Calculate training errors if testing: eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds_2, X_train, Y_train, args=eval_params) report.train_adv_train_clean_eval = accuracy accuracy = model_eval(sess, x, y, preds_2_adv, X_train, Y_train, args=eval_params) report.train_adv_train_adv_eval = accuracy return report
def mnist_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, testing=False, label_smoothing=0.1): """ MNIST CleverHans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param testing: if true, training error is calculated :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Force TensorFlow to use single thread to improve reproducibility config = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) if keras.backend.image_data_format() != 'channels_last': raise NotImplementedError("this tutorial requires keras to be configured to channels_last format") # Create TF session and set as Keras backend session sess = tf.Session(config=config) keras.backend.set_session(sess) # Get MNIST test data mnist = MNIST(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) x_train, y_train = mnist.get_set('train') x_test, y_test = mnist.get_set('test') # Obtain Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] # Label smoothing y_train -= label_smoothing * (y_train - 1. / nb_classes) # Define Keras model model = cnn_model(img_rows=img_rows, img_cols=img_cols, channels=nchannels, nb_filters=64, nb_classes=nb_classes) print("Defined Keras model.") # To be able to call the model in the custom loss, we need to call it once # before, see https://github.com/tensorflow/tensorflow/issues/23769 model(model.input) # Initialize the Fast Gradient Sign Method (FGSM) attack object wrap = KerasModelWrapper(model) fgsm = FastGradientMethod(wrap, sess=sess) fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} adv_acc_metric = get_adversarial_acc_metric(model, fgsm, fgsm_params) model.compile( optimizer=keras.optimizers.Adam(learning_rate), loss='categorical_crossentropy', metrics=['accuracy', adv_acc_metric] ) # Train an MNIST model model.fit(x_train, y_train, batch_size=batch_size, epochs=nb_epochs, validation_data=(x_test, y_test), verbose=2) # Evaluate the accuracy on legitimate and adversarial test examples _, acc, adv_acc = model.evaluate(x_test, y_test, batch_size=batch_size, verbose=0) report.clean_train_clean_eval = acc report.clean_train_adv_eval = adv_acc print('Test accuracy on legitimate examples: %0.4f' % acc) print('Test accuracy on adversarial examples: %0.4f\n' % adv_acc) # Calculate training error if testing: _, train_acc, train_adv_acc = model.evaluate(x_train, y_train, batch_size=batch_size, verbose=0) report.train_clean_train_clean_eval = train_acc report.train_clean_train_adv_eval = train_adv_acc print("Repeating the process, using adversarial training") # Redefine Keras model model_2 = cnn_model(img_rows=img_rows, img_cols=img_cols, channels=nchannels, nb_filters=64, nb_classes=nb_classes) model_2(model_2.input) wrap_2 = KerasModelWrapper(model_2) fgsm_2 = FastGradientMethod(wrap_2, sess=sess) # Use a loss function based on legitimate and adversarial examples adv_loss_2 = get_adversarial_loss(model_2, fgsm_2, fgsm_params) adv_acc_metric_2 = get_adversarial_acc_metric(model_2, fgsm_2, fgsm_params) model_2.compile( optimizer=keras.optimizers.Adam(learning_rate), loss=adv_loss_2, metrics=['accuracy', adv_acc_metric_2] ) # Train an MNIST model model_2.fit(x_train, y_train, batch_size=batch_size, epochs=nb_epochs, validation_data=(x_test, y_test), verbose=2) # Evaluate the accuracy on legitimate and adversarial test examples _, acc, adv_acc = model_2.evaluate(x_test, y_test, batch_size=batch_size, verbose=0) report.adv_train_clean_eval = acc report.adv_train_adv_eval = adv_acc print('Test accuracy on legitimate examples: %0.4f' % acc) print('Test accuracy on adversarial examples: %0.4f\n' % adv_acc) # Calculate training error if testing: _, train_acc, train_adv_acc = model_2.evaluate(x_train, y_train, batch_size=batch_size, verbose=0) report.train_adv_train_clean_eval = train_acc report.train_adv_train_adv_eval = train_adv_acc return report
def baseline_jsma(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=6, batch_size=128, learning_rate=0.001, clean_train=True, testing=False, nb_filters=64): """ MNIST cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param clean_train: if true, train on clean examples :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session sess = tf.Session() # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) # Use label smoothing # assert Y_train.shape[1] == 10 # label_smooth = .1 # Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) y = tf.placeholder(tf.float32, shape=(None, 10)) model_path = "models/mnist" # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } jsma_params = {'theta': 1., 'gamma': 0.1, 'clip_min': 0., 'clip_max': 1., 'y_target': None} rng = np.random.RandomState([2017, 8, 30]) if clean_train: model = make_basic_cnn(nb_filters=nb_filters) preds = model.get_probs(x) def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test # examples eval_params = {'batch_size': batch_size} acc = model_eval( sess, x, y, preds, X_test, Y_test, args=eval_params) report.clean_train_clean_eval = acc assert X_test.shape[0] == test_end - test_start, X_test.shape print('Test accuracy on legitimate examples: %0.4f' % acc) # # HERE already trained model, thus we need a new one (model_2) model_train(sess, x, y, preds, X_train, Y_train, evaluate=evaluate, args=train_params, rng=rng) # Calculate training error if testing: eval_params = {'batch_size': batch_size} acc = model_eval( sess, x, y, preds, X_train, Y_train, args=eval_params) report.train_clean_train_clean_eval = acc # Initialize the JSMA attack object and # graph jsma = SaliencyMapMethod(model, sess=sess) adv_x = jsma.generate(x, **jsma_params) preds_adv = model.get_probs(adv_x) # Evaluate the accuracy of the MNIST model on adversarial examples eval_par = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print('Test accuracy on adversarial examples: %0.4f\n' % acc) report.clean_train_adv_eval = acc # Calculate training error if testing: eval_par = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_adv, X_train, Y_train, args=eval_par) report.train_clean_train_adv_eval = acc print("Repeating the process, using adversarial training") # Redefine TF model graph model_2 = make_basic_cnn(nb_filters=nb_filters) preds_2 = model_2(x) jsma2 = SaliencyMapMethod(model_2, sess=sess) adv_x_2 = jsma2.generate(x, **jsma_params) preds_2_adv = model_2(adv_x_2) # # let's generate FGSM examples for model_2 # fgsm = FastGradientMethod(model_2, sess=sess) fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} adv_x_fgsm = fgsm.generate(x, **fgsm_params) preds_2_fgsm = model_2(adv_x_fgsm) # DON'T WANT TO TRAIN on FGSM adv examples yet def evaluate_2(): # Accuracy of adversarially trained model on legitimate test inputs eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds_2, X_test, Y_test, args=eval_params) print('Test accuracy on legitimate examples: %0.4f' % accuracy) report.adv_train_clean_eval = accuracy # Accuracy of the adversarially trained model on JSMA adversarial examples accuracy = model_eval(sess, x, y, preds_2_adv, X_test, Y_test, args=eval_params) print('Test accuracy on FGSM adversarial examples: %0.4f' % accuracy) report.adv_train_adv_eval = accuracy # Accuracy of the JSMA adv trained model on FGSM adv examples eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds_2_fgsm, X_test, Y_test, args=eval_params) print('Test accuracy on SaliencyMapMethod adversarial examples: %0.4f' % accuracy) # Perform and evaluate adversarial training model_train(sess, x, y, preds_2, X_train, Y_train, predictions_adv=preds_2_adv, evaluate=evaluate_2, args=train_params, rng=rng) # Calculate training errors if testing: eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds_2, X_train, Y_train, args=eval_params) report.train_adv_train_clean_eval = accuracy accuracy = model_eval(sess, x, y, preds_2_adv, X_train, Y_train, args=eval_params) report.train_adv_train_adv_eval = accuracy return report
def test_run_single_gpu_fgsm(self): """ Test the basic single GPU performance by comparing to the FGSM tutorial. """ from cleverhans_tutorials import mnist_tutorial_tf # Run the MNIST tutorial on a dataset of reduced size flags = {'train_start': 0, 'train_end': 5000, 'test_start': 0, 'test_end': 333, 'nb_epochs': 5, 'testing': True} report = mnist_tutorial_tf.mnist_tutorial(**flags) # Run the multi-gpu trainer for clean training flags.update({'batch_size': 128, 'adam_lrn': 0.001, 'dataset': 'mnist', 'only_adv_train': False, 'eval_iters': 1, 'ngpu': 1, 'fast_tests': False, 'attack_type_train': '', 'save_dir': None, 'save_steps': 10000, 'attack_nb_iter_train': None, 'save': False, 'model_type': 'basic', 'attack_type_test': 'FGSM'}) flags.update({'adv_train': False}) HParams = namedtuple('HParams', flags.keys()) hparams = HParams(**flags) np.random.seed(42) tf.set_random_seed(42) with tf.variable_scope(None, 'runner'): report_dict = run_trainer(hparams) report_2 = AccuracyReport() report_2.train_clean_train_clean_eval = report_dict['train'] report_2.clean_train_clean_eval = report_dict['test'] report_2.clean_train_adv_eval = report_dict['FGSM'] # Run the multi-gpu trainer for adversarial training flags.update({'adv_train': True, 'attack_type_train': 'FGSM', }) HParams = namedtuple('HParams', flags.keys()) hparams = HParams(**flags) np.random.seed(42) tf.set_random_seed(42) with tf.variable_scope(None, 'runner'): report_dict = run_trainer(hparams) report_2.train_adv_train_clean_eval = report_dict['train'] report_2.adv_train_clean_eval = report_dict['test'] report_2.adv_train_adv_eval = report_dict['FGSM'] self.assertClose(report.train_clean_train_clean_eval, report_2.train_clean_train_clean_eval, atol=5e-2) self.assertClose(report.clean_train_clean_eval, report_2.clean_train_clean_eval, atol=2e-2) self.assertClose(report.clean_train_adv_eval, report_2.clean_train_adv_eval, atol=5e-2) self.assertClose(report.train_adv_train_clean_eval, report_2.train_adv_train_clean_eval, atol=1e-1) self.assertClose(report.adv_train_clean_eval, report_2.adv_train_clean_eval, atol=2e-2) self.assertClose(report.adv_train_adv_eval, report_2.adv_train_adv_eval, atol=1e-1)
def mnist_tutorial( train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, train_dir=TRAIN_DIR, filename=FILENAME, load_model=LOAD_MODEL, testing=False, label_smoothing=0.1, ): """ MNIST CleverHans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param train_dir: Directory storing the saved model :param filename: Filename to save model under :param load_model: True for load, False for not load :param testing: if true, test error is calculated :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ tf.keras.backend.set_learning_phase(0) # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) if keras.backend.image_data_format() != "channels_last": raise NotImplementedError( "this tutorial requires keras to be configured to channels_last format" ) # Create TF session and set as Keras backend session sess = tf.Session() keras.backend.set_session(sess) # Get MNIST test data mnist = MNIST( train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end, ) x_train, y_train = mnist.get_set("train") x_test, y_test = mnist.get_set("test") # Obtain Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Define TF model graph model = cnn_model( img_rows=img_rows, img_cols=img_cols, channels=nchannels, nb_filters=64, nb_classes=nb_classes, ) preds = model(x) print("Defined TensorFlow model graph.") def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {"batch_size": batch_size} acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params) report.clean_train_clean_eval = acc # assert X_test.shape[0] == test_end - test_start, X_test.shape print("Test accuracy on legitimate examples: %0.4f" % acc) # Train an MNIST model train_params = { "nb_epochs": nb_epochs, "batch_size": batch_size, "learning_rate": learning_rate, "train_dir": train_dir, "filename": filename, } rng = np.random.RandomState([2017, 8, 30]) if not os.path.exists(train_dir): os.mkdir(train_dir) ckpt = tf.train.get_checkpoint_state(train_dir) print(train_dir, ckpt) ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path wrap = KerasModelWrapper(model) if load_model and ckpt_path: saver = tf.train.Saver() print(ckpt_path) saver.restore(sess, ckpt_path) print("Model loaded from: {}".format(ckpt_path)) evaluate() else: print("Model was not loaded, training from scratch.") loss = CrossEntropy(wrap, smoothing=label_smoothing) train( sess, loss, x_train, y_train, evaluate=evaluate, args=train_params, rng=rng ) # Calculate training error if testing: eval_params = {"batch_size": batch_size} acc = model_eval(sess, x, y, preds, x_train, y_train, args=eval_params) report.train_clean_train_clean_eval = acc # Initialize the Fast Gradient Sign Method (FGSM) attack object and graph fgsm = FastGradientMethod(wrap, sess=sess) fgsm_params = {"eps": 0.3, "clip_min": 0.0, "clip_max": 1.0} adv_x = fgsm.generate(x, **fgsm_params) # Consider the attack to be constant adv_x = tf.stop_gradient(adv_x) preds_adv = model(adv_x) # Evaluate the accuracy of the MNIST model on adversarial examples eval_par = {"batch_size": batch_size} acc = model_eval(sess, x, y, preds_adv, x_test, y_test, args=eval_par) print("Test accuracy on adversarial examples: %0.4f\n" % acc) report.clean_train_adv_eval = acc # Calculating train error if testing: eval_par = {"batch_size": batch_size} acc = model_eval(sess, x, y, preds_adv, x_train, y_train, args=eval_par) report.train_clean_train_adv_eval = acc print("Repeating the process, using adversarial training") # Redefine TF model graph model_2 = cnn_model( img_rows=img_rows, img_cols=img_cols, channels=nchannels, nb_filters=64, nb_classes=nb_classes, ) wrap_2 = KerasModelWrapper(model_2) preds_2 = model_2(x) fgsm2 = FastGradientMethod(wrap_2, sess=sess) def attack(x): return fgsm2.generate(x, **fgsm_params) preds_2_adv = model_2(attack(x)) loss_2 = CrossEntropy(wrap_2, smoothing=label_smoothing, attack=attack) def evaluate_2(): # Accuracy of adversarially trained model on legitimate test inputs eval_params = {"batch_size": batch_size} accuracy = model_eval(sess, x, y, preds_2, x_test, y_test, args=eval_params) print("Test accuracy on legitimate examples: %0.4f" % accuracy) report.adv_train_clean_eval = accuracy # Accuracy of the adversarially trained model on adversarial examples accuracy = model_eval(sess, x, y, preds_2_adv, x_test, y_test, args=eval_params) print("Test accuracy on adversarial examples: %0.4f" % accuracy) report.adv_train_adv_eval = accuracy # Perform and evaluate adversarial training train( sess, loss_2, x_train, y_train, evaluate=evaluate_2, args=train_params, rng=rng ) # Calculate training errors if testing: eval_params = {"batch_size": batch_size} accuracy = model_eval(sess, x, y, preds_2, x_train, y_train, args=eval_params) report.train_adv_train_clean_eval = accuracy accuracy = model_eval( sess, x, y, preds_2_adv, x_train, y_train, args=eval_params ) report.train_adv_train_adv_eval = accuracy return report
def mnist_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=6, batch_size=128, learning_rate=0.001, train_dir="train_dir", filename="mnist.ckpt", load_model=False, testing=False, label_smoothing=0.1): """ MNIST CleverHans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param train_dir: Directory storing the saved model :param filename: Filename to save model under :param load_model: True for load, False for not load :param testing: if true, test error is calculated :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ keras.layers.core.K.set_learning_phase(0) # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) if not hasattr(backend, "tf"): raise RuntimeError("This tutorial requires keras to be configured" " to use the TensorFlow backend.") if keras.backend.image_dim_ordering() != 'tf': keras.backend.set_image_dim_ordering('tf') print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to " "'th', temporarily setting to 'tf'") # Create TF session and set as Keras backend session sess = tf.Session() keras.backend.set_session(sess) # Get MNIST test data x_train, y_train, x_test, y_test = data_mnist(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) # Obtain Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Define TF model graph model = cnn_model(img_rows=img_rows, img_cols=img_cols, channels=nchannels, nb_filters=64, nb_classes=nb_classes) preds = model(x) print("Defined TensorFlow model graph.") def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params) report.clean_train_clean_eval = acc # assert X_test.shape[0] == test_end - test_start, X_test.shape print('Test accuracy on legitimate examples: %0.4f' % acc) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': train_dir, 'filename': filename } rng = np.random.RandomState([2017, 8, 30]) if not os.path.exists(train_dir): os.mkdir(train_dir) ckpt = tf.train.get_checkpoint_state(train_dir) print(train_dir, ckpt) ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path wrap = KerasModelWrapper(model) if load_model and ckpt_path: saver = tf.train.Saver() print(ckpt_path) saver.restore(sess, ckpt_path) print("Model loaded from: {}".format(ckpt_path)) evaluate() else: print("Model was not loaded, training from scratch.") loss = LossCrossEntropy(wrap, smoothing=label_smoothing) train(sess, loss, x, y, x_train, y_train, evaluate=evaluate, args=train_params, save=True, rng=rng) # Calculate training error if testing: eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, x_train, y_train, args=eval_params) report.train_clean_train_clean_eval = acc # Initialize the Fast Gradient Sign Method (FGSM) attack object and graph fgsm = FastGradientMethod(wrap, sess=sess) fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} adv_x = fgsm.generate(x, **fgsm_params) # Consider the attack to be constant adv_x = tf.stop_gradient(adv_x) preds_adv = model(adv_x) # Evaluate the accuracy of the MNIST model on adversarial examples eval_par = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_adv, x_test, y_test, args=eval_par) print('Test accuracy on adversarial examples: %0.4f\n' % acc) report.clean_train_adv_eval = acc # Calculating train error if testing: eval_par = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_adv, x_train, y_train, args=eval_par) report.train_clean_train_adv_eval = acc print("Repeating the process, using adversarial training") # Redefine TF model graph model_2 = cnn_model(img_rows=img_rows, img_cols=img_cols, channels=nchannels, nb_filters=64, nb_classes=nb_classes) wrap_2 = KerasModelWrapper(model_2) preds_2 = model_2(x) fgsm2 = FastGradientMethod(wrap_2, sess=sess) def attack(x): return fgsm2.generate(x, **fgsm_params) preds_2_adv = model_2(attack(x)) loss_2 = LossCrossEntropy(wrap_2, smoothing=label_smoothing, attack=attack) def evaluate_2(): # Accuracy of adversarially trained model on legitimate test inputs eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds_2, x_test, y_test, args=eval_params) print('Test accuracy on legitimate examples: %0.4f' % accuracy) report.adv_train_clean_eval = accuracy # Accuracy of the adversarially trained model on adversarial examples accuracy = model_eval(sess, x, y, preds_2_adv, x_test, y_test, args=eval_params) print('Test accuracy on adversarial examples: %0.4f' % accuracy) report.adv_train_adv_eval = accuracy # Perform and evaluate adversarial training train(sess, loss_2, x, y, x_train, y_train, evaluate=evaluate_2, args=train_params, save=False, rng=rng) # Calculate training errors if testing: eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds_2, x_train, y_train, args=eval_params) report.train_adv_train_clean_eval = accuracy accuracy = model_eval(sess, x, y, preds_2_adv, x_train, y_train, args=eval_params) report.train_adv_train_adv_eval = accuracy return report
def mnist_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=6, batch_size=128, learning_rate=0.001, train_dir="/tmp", filename="mnist.ckpt", load_model=False, testing=False): """ MNIST CleverHans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param train_dir: Directory storing the saved model :param filename: Filename to save model under :param load_model: True for load, False for not load :param testing: if true, test error is calculated :return: an AccuracyReport object """ keras.layers.core.K.set_learning_phase(0) # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) if not hasattr(backend, "tf"): raise RuntimeError("This tutorial requires keras to be configured" " to use the TensorFlow backend.") # Image dimensions ordering should follow the Theano convention if keras.backend.image_dim_ordering() != 'tf': keras.backend.set_image_dim_ordering('tf') print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to " "'th', temporarily setting to 'tf'") # Create TF session and set as Keras backend session sess = tf.Session() keras.backend.set_session(sess) # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) # Use label smoothing assert Y_train.shape[1] == 10 label_smooth = .1 Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) y = tf.placeholder(tf.float32, shape=(None, 10)) # Define TF model graph model = cnn_model() preds = model(x) print("Defined TensorFlow model graph.") def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params) report.clean_train_clean_eval = acc assert X_test.shape[0] == test_end - test_start, X_test.shape print('Test accuracy on legitimate examples: %0.4f' % acc) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': train_dir, 'filename': filename } ckpt = tf.train.get_checkpoint_state(train_dir) ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path rng = np.random.RandomState([2017, 8, 30]) if load_model and ckpt_path: saver = tf.train.Saver() saver.restore(sess, ckpt_path) print("Model loaded from: {}".format(ckpt_path)) evaluate() else: print("Model was not loaded, training from scratch.") train(sess, x, y, preds, X_train, Y_train, evaluate=evaluate, args=train_params, save=True) # Calculate training error if testing: eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, X_train, Y_train, args=eval_params) report.train_clean_train_clean_eval = acc # Initialize the Fast Gradient Sign Method (FGSM) attack object and graph wrap = KerasModelWrapper(model) fgsm = FastGradientMethod(wrap, sess=sess) fgsm_params = {'eps': 0.3} adv_x = fgsm.generate(x, **fgsm_params) # Consider the attack to be constant adv_x = tf.stop_gradient(adv_x) preds_adv = model(adv_x) # Evaluate the accuracy of the MNIST model on adversarial examples eval_par = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print('Test accuracy on adversarial examples: %0.4f\n' % acc) report.clean_train_adv_eval = acc # Calculating train error if testing: eval_par = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_adv, X_train, Y_train, args=eval_par) report.train_clean_train_adv_eval = acc print("Repeating the process, using adversarial training") # Redefine TF model graph model_2 = cnn_model() preds_2 = model_2(x) wrap_2 = KerasModelWrapper(model_2) fgsm2 = FastGradientMethod(wrap_2, sess=sess) preds_2_adv = model_2(fgsm2.generate(x, **fgsm_params)) def evaluate_2(): # Accuracy of adversarially trained model on legitimate test inputs eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds_2, X_test, Y_test, args=eval_params) print('Test accuracy on legitimate examples: %0.4f' % accuracy) report.adv_train_clean_eval = accuracy # Accuracy of the adversarially trained model on adversarial examples accuracy = model_eval(sess, x, y, preds_2_adv, X_test, Y_test, args=eval_params) print('Test accuracy on adversarial examples: %0.4f' % accuracy) report.adv_train_adv_eval = accuracy # Perform and evaluate adversarial training train(sess, x, y, preds_2, X_train, Y_train, predictions_adv=preds_2_adv, evaluate=evaluate_2, args=train_params, save=False) # Get a random slice of the data for linear extrapolation plots random_idx = np.random.randint(0, X_train.shape[0]) X_slice = X_train[random_idx] Y_slice = Y_train[random_idx] # Plot the linear extrapolation plot for clean model log_prob_adv_array = get_logits_over_interval( sess, wrap, X_slice, fgsm_params) linear_extrapolation_plot(log_prob_adv_array, Y_slice, 'lep_clean.png') # Plot the linear extrapolation plot for adv model log_prob_adv_array = get_logits_over_interval( sess, wrap_2, X_slice, fgsm_params) linear_extrapolation_plot(log_prob_adv_array, Y_slice, 'lep_adv.png') # Calculate training errors if testing: eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds_2, X_train, Y_train, args=eval_params) report.train_adv_train_clean_eval = accuracy accuracy = model_eval(sess, x, y, preds_2_adv, X_train, Y_train, args=eval_params) report.train_adv_train_adv_eval = accuracy return report
def evaluate_2(): # Accuracy of adversarially trained model on legitimate test inputs eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds_2, X_test, Y_test, args=eval_params) print('Test accuracy on legitimate examples: %0.4f' % accuracy) report.adv_train_clean_eval = accuracy # Accuracy of the adversarially trained model on adversarial examples accuracy = model_eval(sess, x, y, preds_2_adv, X_test, Y_test, args=eval_params) print('Test accuracy on adversarial examples: %0.4f' % accuracy) report.adv_train_adv_eval = accuracy # Perform and evaluate adversarial training model_train(sess, x, y, preds_2, X_train, Y_train, predictions_adv=preds_2_adv, evaluate=evaluate_2, args=train_params, save=False, rng=rng) # Calculate training errors if testing: eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds_2, X_train, Y_train, args=eval_params) report.train_adv_train_clean_eval = accuracy accuracy = model_eval(sess, x, y, preds_2_adv, X_train, Y_train, args=eval_params) report.train_adv_train_adv_eval = accuracy print(report)
def helper_run_multi_gpu_madryetal(self, extra_flags=None): """ Compare the single GPU performance to multiGPU performance. """ # Run the trainers on a dataset of reduced size flags = { "train_start": 0, "train_end": 5000, "test_start": 0, "test_end": 333, "nb_epochs": 5, "testing": True, } # Run the multi-gpu trainer for adversarial training flags.update( { "batch_size": 128, "adam_lrn": 0.001, "dataset": "mnist", "only_adv_train": False, "eval_iters": 1, "fast_tests": True, "save_dir": None, "save_steps": 10000, "attack_nb_iter_train": 10, "sync_step": None, "adv_train": True, "save": False, "model_type": "basic", "attack_type_test": "MadryEtAl_y", } ) if extra_flags is not None: flags.update(extra_flags) # Run the multi-gpu trainer for adversarial training using 2 gpus # trainer_multigpu by default sets `allow_soft_placement=True` flags.update( {"ngpu": 2, "attack_type_train": "MadryEtAl_y_multigpu", "sync_step": 1} ) HParams = namedtuple("HParams", flags.keys()) hparams = HParams(**flags) np.random.seed(42) tf.set_random_seed(42) with tf.variable_scope(None, "runner"): report_dict = run_trainer(hparams) report_m = AccuracyReport() report_m.train_adv_train_clean_eval = report_dict["train"] report_m.adv_train_clean_eval = report_dict["test"] report_m.adv_train_adv_eval = report_dict["MadryEtAl_y"] flags.update({"ngpu": 1, "attack_type_train": "MadryEtAl_y"}) hparams = HParams(**flags) np.random.seed(42) tf.set_random_seed(42) with tf.variable_scope(None, "runner"): report_dict = run_trainer(hparams) report_s = AccuracyReport() report_s.train_adv_train_clean_eval = report_dict["train"] report_s.adv_train_clean_eval = report_dict["test"] report_s.adv_train_adv_eval = report_dict["MadryEtAl_y"] self.assertClose( report_s.train_adv_train_clean_eval, report_m.train_adv_train_clean_eval, atol=5e-2, ) self.assertClose( report_s.adv_train_clean_eval, report_m.adv_train_clean_eval, atol=2e-2 ) self.assertClose( report_s.adv_train_adv_eval, report_m.adv_train_adv_eval, atol=5e-2 )
def mnist_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=100, batch_size=128, learning_rate=0.001, train_dir="/tmp", filename="mnist.ckpt", load_model=False, testing=True, w=2, rel=0): """ MNIST CleverHans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param train_dir: Directory storing the saved model :param filename: Filename to save model under :param load_model: True for load, False for not load :param testing: if true, test error is calculated :param w : number of perceptive neurons :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility #tf.set_random_seed(1234) if not hasattr(backend, "tf"): raise RuntimeError("This tutorial requires keras to be configured" " to use the TensorFlow backend.") if keras.backend.image_dim_ordering() != 'tf': keras.backend.set_image_dim_ordering('tf') print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to " "'th', temporarily setting to 'tf'") # Create TF session and set as Keras backend session sess = tf.Session() keras.backend.set_session(sess) keras.layers.core.K.set_learning_phase(1) # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) # Use label smoothing assert Y_train.shape[1] == 10 label_smooth = .1 Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) y = tf.placeholder(tf.float32, shape=(None, 10)) # Define TF model graph models = spartan_network(sess=sess,w=w,train=True,create_surrogate=True) model_to_attack = models[0] spartan_model = models[1] ckpt = tf.train.get_checkpoint_state(train_dir) print(ckpt) trainvalue=True if ckpt is None else False #model_to_attack = unprotected_network(sess=sess, train=trainvalue, save=True) preds = model_to_attack(x) print("Defined TensorFlow model graph.") def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params) report.clean_train_clean_eval = acc assert X_test.shape[0] == test_end - test_start, X_test.shape print('Test accuracy on legitimate examples: %0.4f' % acc) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': train_dir, 'filename': filename } rng = None ckpt = tf.train.get_checkpoint_state(train_dir) ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path if load_model and ckpt_path: saver = tf.train.Saver() saver.restore(sess, ckpt_path) print("Model loaded from: {}".format(ckpt_path)) evaluate() else: print("Model was not loaded, training from scratch.") keras.layers.core.K.set_learning_phase(1) # model_train(sess, x, y, preds, X_train, Y_train, evaluate=evaluate, # args=train_params, save=True, rng=rng) keras.layers.core.K.set_learning_phase(0) # Calculate training error if testing: eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params) print("With no Dropout : %s" % acc) report.train_clean_train_clean_eval = acc # Initialize the Fast Gradient Sign Method (FGSM) attack object and graph wrap = KerasModelWrapper(model_to_attack) global mdl mdl = model_to_attack fgsm = FastGradientMethod(wrap, sess=sess) for epstep in range(40): fgsm_params = {'eps': 0.01+0.02*epstep, 'clip_min': 0., 'clip_max': 1.} # cw_params = {'confidence': 0.5, # 'batch_size': 4, # 'learning_rate': 2e-2, # 'max_iterations': 400, # 'clip_min': 0., # 'clip_max': 1.} adv_x = fgsm.generate(x, **fgsm_params) # Consider the attack to be constant adv_x = tf.stop_gradient(adv_x) # cwattack = atk.CarliniWagnerL2(model_to_attack,sess=sess) # adv_x = cwattack.generate(x, **cw_params) # adv_x = tf.stop_gradient(adv_x) # adv_x_np = cwattack.generate_np(X_test[500:704], **cw_params) # from matplotlib import pyplot as plt # plt.rc('figure', figsize=(12.0, 12.0)) # for j in range(40): # # plt.imshow(adv_x_np[j].reshape((28, 28)), # cmap="gray") # plt.pause(0.15) # return preds_adv = spartan_model(adv_x) # Evaluate the accuracy of the MNIST model on adversarial examples eval_par = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print('Test accuracy on epsilon-%0.4f-adversarial examples: %0.4f\n' % (fgsm_params["eps"],acc)) return report.clean_train_adv_eval = acc # Calculating train error if testing: eval_par = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_adv, X_train, Y_train, args=eval_par) report.train_clean_train_adv_eval = acc print("Repeating the process, using adversarial training") # Redefine TF model graph model_2 = cnn_model(w=w, rel=rel) preds_2 = model_2(x) wrap_2 = KerasModelWrapper(model_2) fgsm2 = FastGradientMethod(wrap_2, sess=sess) preds_2_adv = model_2(fgsm2.generate(x, **fgsm_params)) def evaluate_2(): # Accuracy of adversarially trained model on legitimate test inputs eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds_2, X_test, Y_test, args=eval_params) print('Test accuracy on legitimate examples: %0.4f' % accuracy) report.adv_train_clean_eval = accuracy # Accuracy of the adversarially trained model on adversarial examples accuracy = model_eval(sess, x, y, preds_2_adv, X_test, Y_test, args=eval_params) print('Test accuracy on adversarial examples: %0.4f' % accuracy) report.adv_train_adv_eval = accuracy # Perform and evaluate adversarial training model_train(sess, x, y, preds_2, X_train, Y_train, predictions_adv=preds_2_adv, evaluate=evaluate_2, args=train_params, save=False, rng=rng) # Calculate training errors if testing: eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds_2, X_train, Y_train, args=eval_params) report.train_adv_train_clean_eval = accuracy accuracy = model_eval(sess, x, y, preds_2_adv, X_train, Y_train, args=eval_params) report.train_adv_train_adv_eval = accuracy
def mnist_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=6, batch_size=128, learning_rate=0.001, clean_train=True, testing=False, backprop_through_attack=False, nb_filters=64): """ MNIST cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param clean_train: if true, train on clean examples :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session sess = tf.Session() # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) # Use label smoothing assert Y_train.shape[1] == 10 label_smooth = .1 Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) y = tf.placeholder(tf.float32, shape=(None, 10)) model_path = "models/mnist" # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} rng = np.random.RandomState([2017, 8, 30]) if clean_train: model = make_basic_cnn(nb_filters=nb_filters) preds = model.get_probs(x) def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test # examples eval_params = {'batch_size': batch_size} acc = model_eval( sess, x, y, preds, X_test, Y_test, args=eval_params) report.clean_train_clean_eval = acc assert X_test.shape[0] == test_end - test_start, X_test.shape print('Test accuracy on legitimate examples: %0.4f' % acc) model_train(sess, x, y, preds, X_train, Y_train, evaluate=evaluate, args=train_params, rng=rng) # Calculate training error if testing: eval_params = {'batch_size': batch_size} acc = model_eval( sess, x, y, preds, X_train, Y_train, args=eval_params) report.train_clean_train_clean_eval = acc # Initialize the Fast Gradient Sign Method (FGSM) attack object and # graph fgsm = FastGradientMethod(model, sess=sess) adv_x = fgsm.generate(x, **fgsm_params) print(adv_x) preds_adv = model.get_probs(adv_x) # Evaluate the accuracy of the MNIST model on adversarial examples eval_par = {'batch_size': batch_size} # Define accuracy symbolically if LooseVersion(tf.__version__) >= LooseVersion('1.0.0'): correct_preds = tf.not_equal(tf.argmax(y, axis=-1), tf.argmax(preds_adv, axis=-1)) else: correct_preds = tf.not_equal(tf.argmax(y, axis=tf.rank(y) - 1), tf.argmax(preds_adv, axis=tf.rank(preds_adv) - 1)) # print("the shape of correct_preds is ", correct_preds.get_shape()) # correct_preds is a boolean Tensor with shape (size,) success_adv_x = tf.boolean_mask(adv_x, correct_preds) success_clean_x = tf.boolean_mask(x, correct_preds) success_clean_y = tf.boolean_mask(y, correct_preds) fgsm_adv_x, fgsm_clean_x, fgsm_clean_y = sess.run([success_adv_x, success_clean_x, success_clean_y], feed_dict={x:X_test,y:Y_test}) np.savez('adversarial_fgsm',adv_examples=fgsm_adv_x, adv_clean_labels=fgsm_clean_y, adv_clean_examples=fgsm_clean_x) print("the shape of adversarial examples we save is ", np.shape(fgsm_adv_x)) print("the shape of clean targets we save is ", np.shape(fgsm_clean_y)) acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print('Test accuracy on adversarial examples fgsm: %0.4f\n' % acc) report.clean_train_adv_eval = acc adv_x_test_for_save = sess.run(adv_x, {x: X_test}) np.savez("adv_test_fgsm_data.npz", adv_examples=adv_x_test_for_save, adv_clean_labels=Y_test, adv_clean_examples=X_test) # Calculate training error if testing: eval_par = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_adv, X_train, Y_train, args=eval_par) report.train_clean_train_adv_eval = acc print("Repeating the process, using adversarial training") # Redefine TF model graph model_2 = make_basic_cnn(nb_filters=nb_filters) preds_2 = model_2(x) fgsm2 = FastGradientMethod(model_2, sess=sess) adv_x_2 = fgsm2.generate(x, **fgsm_params) if not backprop_through_attack: # For the fgsm attack used in this tutorial, the attack has zero # gradient so enabling this flag does not change the gradient. # For some other attacks, enabling this flag increases the cost of # training, but gives the defender the ability to anticipate how # the atacker will change their strategy in response to updates to # the defender's parameters. adv_x_2 = tf.stop_gradient(adv_x_2) preds_2_adv = model_2(adv_x_2) def evaluate_2(): # Accuracy of adversarially trained model on legitimate test inputs eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds_2, X_test, Y_test, args=eval_params) print('Test accuracy on legitimate examples: %0.4f' % accuracy) report.adv_train_clean_eval = accuracy # Accuracy of the adversarially trained model on adversarial examples accuracy = model_eval(sess, x, y, preds_2_adv, X_test, Y_test, args=eval_params) print('Test accuracy on adversarial examples: %0.4f' % accuracy) report.adv_train_adv_eval = accuracy # Perform and evaluate adversarial training print("pred_adv", preds_2_adv.get_shape()) model_train(sess, x, y, preds_2, X_train, Y_train, predictions_adv=preds_2_adv, evaluate=evaluate_2, args=train_params, rng=rng) # Calculate training errors if testing: eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds_2, X_train, Y_train, args=eval_params) report.train_adv_train_clean_eval = accuracy accuracy = model_eval(sess, x, y, preds_2_adv, X_train, Y_train, args=eval_params) report.train_adv_train_adv_eval = accuracy # Initialize the Fast Gradient Sign Method (FGSM) attack object and # graph ########################################################################### # Craft adversarial examples using the Jacobian-based saliency map approach ########################################################################### source_samples = 10000 nb_classes = 10 print('Crafting ' + str(source_samples) + ' * ' + str(nb_classes-1) + ' adversarial examples') # Keep track of success (adversarial example classified in target) results = np.zeros((nb_classes, source_samples), dtype='i') # Rate of perturbed features for each test set example and target class perturbations = np.zeros((nb_classes, source_samples), dtype='f') # Instantiate a SaliencyMapMethod attack object jsma = SaliencyMapMethod(model_2, back='tf', sess=sess) jsma_params = {'theta': 1., 'gamma': 0.1, 'clip_min': 0., 'clip_max': 1., 'y_target': None} figure = None # create an array for storing adv examples adv_examples = np.empty([1,28,28,1]) # for target labels adv_targets = np.empty([1,10]) # corresponding clean/correct label adv_clean_labels = np.empty([1,10]) # correspongding clean data adv_clean_examples = np.empty([1,28,28,1]) # Loop over the samples we want to perturb into adversarial examples for sample_ind in xrange(0, source_samples): print('--------------------------------------') print('Attacking input %i/%i' % (sample_ind + 1, source_samples)) sample = X_test[sample_ind:(sample_ind+1)] # generate from testing data # We want to find an adversarial example for each possible target class # (i.e. all classes that differ from the label given in the dataset) current_class = int(np.argmax(Y_test[sample_ind])) # generate from testing data target_classes = other_classes(nb_classes, current_class) # For the grid visualization, keep original images along the diagonal # grid_viz_data[current_class, current_class, :, :, :] = np.reshape( # sample, (img_rows, img_cols, channels)) # Loop over all target classes for target in target_classes: print('Generating adv. example for target class %i' % target) # This call runs the Jacobian-based saliency map approach one_hot_target = np.zeros((1, nb_classes), dtype=np.float32) #create fake target one_hot_target[0, target] = 1 jsma_params['y_target'] = one_hot_target adv_x = jsma.generate_np(sample, **jsma_params) # print('adv_x\'shape is ', np.shape(adv_x)) # (1,28,28,1) # Check if success was achieved res = int(model_argmax(sess, x, preds, adv_x) == target) # if succeeds if res == 1: # append new adv_x to adv_examples array # append sample here, so that the number of times sample is appended mmatches number of adv_ex. adv_examples = np.append(adv_examples, adv_x, axis=0) adv_targets = np.append(adv_targets, one_hot_target, axis=0) adv_clean_labels = np.append(adv_clean_labels, np.expand_dims(Y_test[sample_ind],axis=0), axis=0) # generate from testing data adv_clean_examples = np.append(adv_clean_examples, sample, axis=0) adv_x_reshape = adv_x.reshape(-1) test_in_reshape = X_test[sample_ind].reshape(-1) nb_changed = np.where(adv_x_reshape != test_in_reshape)[0].shape[0] percent_perturb = float(nb_changed) / adv_x.reshape(-1).shape[0] # Update the arrays for later analysis results[target, sample_ind] = res perturbations[target, sample_ind] = percent_perturb print('--------------------------------------') adv_examples = adv_examples[1:,:,:,:] adv_targets = adv_targets[1:,:] adv_clean_labels = adv_clean_labels[1:,:] adv_clean_examples = adv_clean_examples[1:,:,:,:] np.savez('adversarial_jsma_actual_full',adv_examples=adv_examples, adv_targets=adv_targets, adv_clean_labels=adv_clean_labels,adv_clean_examples=adv_clean_examples) print(np.shape(adv_targets)[0], "adversarial examples have been saved.") print('--------------------------------------') # Compute the number of adversarial examples that were successfully found nb_targets_tried = ((nb_classes - 1) * source_samples) succ_rate = float(np.sum(results)) / nb_targets_tried print('Avg. rate of successful adv. examples {0:.4f}'.format(succ_rate)) report.clean_test_adv_eval = 1. - succ_rate # Compute the average distortion introduced by the algorithm percent_perturbed = np.mean(perturbations) print('Avg. rate of perturbed features {0:.4f}'.format(percent_perturbed)) # Compute the average distortion introduced for successful samples only percent_perturb_succ = np.mean(perturbations * (results == 1)) print('Avg. rate of perturbed features for successful ' 'adversarial examples {0:.4f}'.format(percent_perturb_succ)) return report
def minist_fgsm_saliency( train_start=0, train_end=10, test_start=0, test_end=5, nb_epochs=2, batch_size=128, learning_rate=0.001, clean_train=True, testing=False, backprop_through_attack=False, nb_filters=64, nb_classes=10, source_samples=10, ): """ MNIST cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param clean_train: if true, train on clean examples :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session sess = tf.Session() # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) # Use label smoothing assert Y_train.shape[1] == 10 label_smooth = .1 # this way, all the 9 zeroes -> 0.1/9 because # the one-bit becomes 0.9 Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) y = tf.placeholder(tf.float32, shape=(None, 10)) # placeholder for y_target --> for saliency tensor y_target = tf.placeholder(tf.float32, shape=(None, 10)) model_path = "models/mnist" # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} rng = np.random.RandomState([2017, 8, 30]) ########################################################################### # Training the CNN model using TensorFlow: model --> base model ########################################################################### model = make_basic_cnn(nb_filters=nb_filters) preds = model.get_probs(x) if clean_train: # omg -> creates a cnn model # model = make_basic_cnn(nb_filters=nb_filters) # preds = model.get_probs(x) def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test # examples eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params) report.clean_train_clean_eval = acc assert X_test.shape[0] == test_end - test_start, X_test.shape print('Test accuracy on legitimate examples: %0.4f' % acc) ########################################################################### # MODEL Train!!!!!!!!!!!! ########################################################################### # training the basic model, using train_params model_train(sess, x, y, preds, X_train, Y_train, evaluate=evaluate, args=train_params, rng=rng) # Calculate training error if testing: eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, X_train, Y_train, args=eval_params) report.train_clean_train_clean_eval = acc ########################################################################### # Generate FGSM Adversarial based on model, and # Compute Base Model Accuracy ########################################################################### # Initialize the Fast Gradient Sign Method (FGSM) attack object and # graph fgsm = FastGradientMethod(model, sess=sess) # todo: follow the paper and run Cleverhans Output? fgsm_params_y = {'eps': 0.3, 'y': y, 'clip_min': 0., 'clip_max': 1.} #adv_x = fgsm.generate(x, **fgsm_params) adv_x = fgsm.generate(x, **fgsm_params_y) preds_adv = model.get_probs(adv_x) # Evaluate the accuracy of the MNIST model on adversarial examples eval_par = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print('Test accuracy on FGSM adversarial examples: %0.4f\n' % acc) report.clean_train_adv_eval = acc # Calculate training error if testing: eval_par = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_adv, X_train, Y_train, args=eval_par) report.train_clean_train_adv_eval = acc ########################################################################### # Generate Saliency Map Adversarial Example and # Compute base model accuracy (only 10) ########################################################################### print("Saliency Map Attack On The Base Model") print('Crafting ' + str(source_samples) + ' * ' + str(nb_classes - 1) + ' adversarial examples') # Instantiate a SaliencyMapMethod attack object --> modify y_target for each test_data again jsma = SaliencyMapMethod(model, back='tf', sess=sess) jsma_params = { 'theta': 1., 'gamma': 0.1, 'clip_min': 0., 'clip_max': 1., 'y_target': None } # Keep track of success (adversarial example classified in target) # Need this info to compute the success rate results = np.zeros((nb_classes, source_samples), dtype='i') # each sample will get 9 adversarial samples # adv_x_set: place_holder for all the x variations # correct_y_set: correct_y_output used for training adv_x_set = None adv_y_target = None # we need multi x_train_saliency / y_train_saliency # x_train_saliency = None y_train_saliency = None for sample_ind in xrange(0, source_samples): print('--------------------------------------') print('Saliency Attacking input %i/%i' % (sample_ind + 1, source_samples)) sample = X_train[sample_ind:(sample_ind + 1)] y_sample = Y_train[sample_ind:(sample_ind + 1)] current_class = int(np.argmax(Y_train[sample_ind])) target_classes = other_classes(nb_classes, current_class) # Loop over all target classes for target in target_classes: print('Generating adv. example for target class %i' % target) # Create x_train_saliency, corresponding to y_train_saliency if x_train_saliency is not None: x_train_saliency = np.concatenate( (x_train_saliency, sample), axis=0) y_train_saliency = np.concatenate( (y_train_saliency, y_sample), axis=0) else: x_train_saliency = sample y_train_saliency = y_sample print("sample shape: ", x_train_saliency.shape) print("y_sample shape: ", y_train_saliency.shape) # This call runs the Jacobian-based saliency map approach one_hot_target = np.zeros((1, nb_classes), dtype=np.float32) one_hot_target[0, target] = 1 jsma_params['y_target'] = one_hot_target adv_x_np = jsma.generate_np(sample, **jsma_params) # Add to adv_x_set, correct_y_set if adv_x_set is not None: adv_y_target = np.concatenate( (adv_y_target, one_hot_target), axis=0) adv_x_set = np.concatenate((adv_x_np, adv_x_set), axis=0) else: adv_y_target = one_hot_target adv_x_set = adv_x_np print("adv_y_target shape(one-hot-encoding): ", adv_y_target.shape) print("adv_x_set(np) shape: ", adv_x_np.shape) # Check if success was achieved res = int(model_argmax(sess, x, preds, adv_x_np) == target) # Update the arrays for later analysis results[target, sample_ind] = res print('--------------------------------------') # Compute the number of adversarial examples that were successfully found nb_targets_tried = ((nb_classes - 1) * source_samples) succ_rate = float(np.sum(results)) / nb_targets_tried print('Avg. rate of successful Saliency adv. examples {0:.4f}'.format( succ_rate)) report.clean_train_adv_eval = 1. - succ_rate # here we have successfully stacked up x_adversarial_set, y_correct_set # these can be used to provide training to our model now print("\n\n\n*****************************") print("Checking x_adv_set shape: ", adv_x_set.shape) print("Checking correct_y_set shape: ", adv_y_target.shape) print("x_training_saliency shape:", x_train_saliency.shape) print("y_training_saliency shape:", y_train_saliency.shape) # now construct model 3, define output -> input relationship tensor model_3 = make_basic_cnn(nb_filters=nb_filters) # define the x, the placeholder input - > preds_3 output preds_3 = model_3(x) # jsma3 = SaliencyMapMethod(model_3, sess=sess) # # jsma_params = {'theta': 1., 'gamma': 0.1, # 'clip_min': 0., 'clip_max': 1., # 'y_target': y_target} # # # create adv_saliency set tensor, using x_train data and jsma_params containing adv_y_target # adv_jsma = jsma3.generate(x, jsma_params) # # create adv preds tensor # preds_jsma_adv = model_3(adv_jsma) # define saliency training model accuracy def evaluate_saliency(): # Accuracy of adversarially trained model on legitimate test inputs eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds_3, x_train_saliency, y_train_saliency, args=eval_params) print('Test accuracy on legitimate examples: %0.4f' % accuracy) report.adv_train_clean_eval = accuracy ########################################################################### # MODEL Train for Saliency Map ########################################################################### # Perform and evaluate adversarial training with FSGM MODEL!!! # Train the model with samples of normal and adversarial examples! model_train(sess, x, y, model_3, x_train_saliency, y_train_saliency, evaluate=evaluate_saliency(), args=train_params, rng=rng) #todo: use jsma to create adversarial testing??? or training??? # Redefine TF model FGSM!!! model_2 = make_basic_cnn(nb_filters=nb_filters) preds_2 = model_2(x) fgsm2 = FastGradientMethod(model_2, sess=sess) # parameter for FGSM fgsm_params_y = {'eps': 0.3, 'y': y, 'clip_min': 0., 'clip_max': 1.} adv_x_2 = fgsm2.generate(x, **fgsm_params_y) if not backprop_through_attack: # For the fgsm attack used in this tutorial, the attack has zero # gradient so enabling this flag does not change the gradient. # For some other attacks, enabling this flag increases the cost of # training, but gives the defender the ability to anticipate how # the atacker will change their strategy in response to updates to # the defender's parameters. adv_x_2 = tf.stop_gradient(adv_x_2) preds_2_adv = model_2(adv_x_2) def evaluate_2(): # Accuracy of adversarially trained model on legitimate test inputs eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds_2, X_test, Y_test, args=eval_params) print('Test accuracy on legitimate examples: %0.4f' % accuracy) report.adv_train_clean_eval = accuracy # Accuracy of the adversarially trained model on adversarial examples accuracy = model_eval(sess, x, y, preds_2_adv, X_test, Y_test, args=eval_params) print('Test accuracy on adversarial examples: %0.4f' % accuracy) report.adv_train_adv_eval = accuracy ########################################################################### # MODEL Train for FGSM ########################################################################### # Perform and evaluate adversarial training with FSGM MODEL!!! model_train(sess, x, y, preds_2, X_train, Y_train, predictions_adv=preds_2_adv, evaluate=evaluate_2, args=train_params, rng=rng) # Calculate training errors if testing: eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds_2, X_train, Y_train, args=eval_params) report.train_adv_train_clean_eval = accuracy accuracy = model_eval(sess, x, y, preds_2_adv, X_train, Y_train, args=eval_params) report.train_adv_train_adv_eval = accuracy return report
def mnist_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=6, batch_size=128, learning_rate=0.001, train_dir="train_dir", filename="mnist.ckpt", load_model=False, testing=False, label_smoothing=True): """ MNIST CleverHans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param train_dir: Directory storing the saved model :param filename: Filename to save model under :param load_model: True for load, False for not load :param testing: if true, test error is calculated :return: an AccuracyReport object """ keras.layers.core.K.set_learning_phase(0) # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) if not hasattr(backend, "tf"): raise RuntimeError("This tutorial requires keras to be configured" " to use the TensorFlow backend.") if keras.backend.image_dim_ordering() != 'tf': keras.backend.set_image_dim_ordering('tf') print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to " "'th', temporarily setting to 'tf'") # Create TF session and set as Keras backend session sess = tf.Session() keras.backend.set_session(sess) # Get MNIST test data x_train, y_train, x_test, y_test = data_mnist(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) # Obtain Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] if label_smoothing: label_smooth = .1 y_train = y_train.clip(label_smooth / (nb_classes-1), 1. - label_smooth) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Define TF model graph model = cnn_model(img_rows=img_rows, img_cols=img_cols, channels=nchannels, nb_filters=64, nb_classes=nb_classes) preds = model(x) print("Defined TensorFlow model graph.") def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params) report.clean_train_clean_eval = acc # assert X_test.shape[0] == test_end - test_start, X_test.shape print('Test accuracy on legitimate examples: %0.4f' % acc) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': train_dir, 'filename': filename } rng = np.random.RandomState([2017, 8, 30]) if not os.path.exists(train_dir): os.mkdir(train_dir) ckpt = tf.train.get_checkpoint_state(train_dir) print(train_dir, ckpt) ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path wrap = KerasModelWrapper(model) if load_model and ckpt_path: saver = tf.train.Saver() print(ckpt_path) saver.restore(sess, ckpt_path) print("Model loaded from: {}".format(ckpt_path)) evaluate() else: print("Model was not loaded, training from scratch.") loss = LossCrossEntropy(wrap, smoothing=0.1) train(sess, loss, x, y, x_train, y_train, evaluate=evaluate, args=train_params, save=True, rng=rng) # Calculate training error if testing: eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, x_train, y_train, args=eval_params) report.train_clean_train_clean_eval = acc # Initialize the Fast Gradient Sign Method (FGSM) attack object and graph fgsm = FastGradientMethod(wrap, sess=sess) fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} adv_x = fgsm.generate(x, **fgsm_params) # Consider the attack to be constant adv_x = tf.stop_gradient(adv_x) preds_adv = model(adv_x) # Evaluate the accuracy of the MNIST model on adversarial examples eval_par = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_adv, x_test, y_test, args=eval_par) print('Test accuracy on adversarial examples: %0.4f\n' % acc) report.clean_train_adv_eval = acc # Calculating train error if testing: eval_par = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_adv, x_train, y_train, args=eval_par) report.train_clean_train_adv_eval = acc print("Repeating the process, using adversarial training") # Redefine TF model graph model_2 = cnn_model(img_rows=img_rows, img_cols=img_cols, channels=nchannels, nb_filters=64, nb_classes=nb_classes) wrap_2 = KerasModelWrapper(model_2) preds_2 = model_2(x) fgsm2 = FastGradientMethod(wrap_2, sess=sess) def attack(x): return fgsm2.generate(x, **fgsm_params) preds_2_adv = model_2(attack(x)) loss_2 = LossCrossEntropy(wrap_2, smoothing=0.1, attack=attack) def evaluate_2(): # Accuracy of adversarially trained model on legitimate test inputs eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds_2, x_test, y_test, args=eval_params) print('Test accuracy on legitimate examples: %0.4f' % accuracy) report.adv_train_clean_eval = accuracy # Accuracy of the adversarially trained model on adversarial examples accuracy = model_eval(sess, x, y, preds_2_adv, x_test, y_test, args=eval_params) print('Test accuracy on adversarial examples: %0.4f' % accuracy) report.adv_train_adv_eval = accuracy # Perform and evaluate adversarial training train(sess, loss_2, x, y, x_train, y_train, evaluate=evaluate_2, args=train_params, save=False, rng=rng) # Calculate training errors if testing: eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds_2, x_train, y_train, args=eval_params) report.train_adv_train_clean_eval = accuracy accuracy = model_eval(sess, x, y, preds_2_adv, x_train, y_train, args=eval_params) report.train_adv_train_adv_eval = accuracy return report
def train(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=6, batch_size=128, learning_rate=0.001, clean_train=True, testing=False, use_rec_err=True, backprop_through_attack=False, nb_filters=64, num_threads=None, model_arch=None, use_cross_error=True, attack_name=None, dataset_name='mnist', blocking_option=None, opt_type='adam', rec_error_weight=None): """ Train model :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param clean_train: if true, train on clean examples :return: an AccuracyReport object """ attacks = { 'fgsm': FastGradientMethod, 'pgd': MadryEtAl, 'pgd_restart': MadryEtAl_WithRestarts, 'cw': CarliniWagnerL2 } attack_params = { 'mnist': { 'fgsm': { 'eps': 0.3, 'ord': np.inf, 'clip_min': 0., 'clip_max': 1. }, 'pgd': { 'eps': 0.3, 'eps_iter': 0.01, 'nb_iter': 40 }, 'pgd_restart': { 'eps': 0.3, 'eps_iter': 0.01, 'nb_iter': 40, 'nb_restarts': 1 }, 'cw': { 'batch_size': batch_size }, }, 'cifar': { 'fgsm': { 'eps': 0.1 + 0.0 * 8.0 / 255, 'ord': np.inf, 'clip_min': 0., 'clip_max': 1. }, 'pgd': { 'eps': 0.1 + 0.0 * 8.0 / 255, 'eps_iter': 0.01, 'nb_iter': 40 }, 'pgd_restart': { 'eps': 8.0 / 255, 'eps_iter': 0.01, 'nb_iter': 40, 'nb_restarts': 1 }, 'cw': { 'batch_size': batch_size }, }, 'fashion_mnist': { 'fgsm': { 'eps': 0.1, 'ord': np.inf, 'clip_min': 0., 'clip_max': 1. }, 'pgd': { 'eps': 0.3, 'eps_iter': 0.01, 'nb_iter': 40 }, 'pgd_restart': { 'eps': 0.3, 'eps_iter': 0.01, 'nb_iter': 40, 'nb_restarts': 1 }, 'cw': { 'batch_size': batch_size }, }, }[dataset_name] print("attack parameters:", attack_params[attack_name]) def get_rec_err(pre_, post_): if not use_rec_err: return None return compute_rec_err(pre_, post_, blocking_option) def weight_rec_err(err, wgt): if err is None: return err return err * wgt # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get datasets datasets = { 'mnist': data_mnist, 'cifar': data_cifar10, 'fashion_mnist': fashion_mnist, } X_train, Y_train, X_test, Y_test = datasets[dataset_name]( train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) # Use label smoothing assert Y_train.shape[1] == 10 label_smooth = .1 Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) # Define input TF placeholder input_shape = { 'mnist': (None, 28, 28, 1), 'cifar': (None, 32, 32, 3), 'fashion_mnist': (None, 28, 28, 1) }[dataset_name] x = tf.placeholder(tf.float32, shape=input_shape) #tf.summary.image('input', x) y = tf.placeholder(tf.float32, shape=(None, 10)) if dataset_name == 'cifar': x = tf.map_fn(lambda frame: preprocess_image(frame, True), x) # Train a model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } model_params = { 'nb_filters': nb_filters, 'model_arch': model_arch, 'blocking_option': blocking_option, 'input_shape': input_shape } rng = np.random.RandomState([2017, 8, 30]) merged = tf.summary.merge_all() if clean_train: print('.. using clean training') model = make_basic_model(**model_params) preds = model.get_probs(x) pre_ae_states, post_ae_states = model.get_ae_states() rec_err = get_rec_err(pre_ae_states, post_ae_states) class Evaluate(object): def __init__(self): self.best_accuracy = 0. def __call__(self): # Evaluate the accuracy of the model on legitimate test # examples eval_params = {'batch_size': batch_size} acc, rec_loss = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params, aux_loss_lst=[rec_err]) self.best_accuracy = max(self.best_accuracy, acc) report.clean_train_clean_eval = acc assert X_test.shape[0] == test_end - test_start, X_test.shape print('Test accuracy on legitimate examples: %0.4f' % acc) print('Best accuracy so far: {:0.4f}'.format( self.best_accuracy)) print('reconstruction error on legit examples: %0.4f' % rec_loss) evaluate = Evaluate() model_train(sess, x, y, preds, X_train, Y_train, evaluate=evaluate, args=train_params, rng=rng, aux_loss=weight_rec_err(rec_err, rec_error_weight), opt_type=opt_type, summary=merged) # Calculate training error if testing: eval_params = {'batch_size': batch_size} acc, rec_loss = model_eval(sess, x, y, preds, X_train, Y_train, args=eval_params, aux_loss_lst=[rec_err]) report.train_clean_train_clean_eval = acc # Initialize the Fast Gradient Sign Method (FGSM) attack object and # graph attack = attacks[attack_name](model, sess=sess) adv_x = attack.generate(x, **attack_params[attack_name]) preds_adv = model.get_probs(adv_x) pre_ae_states_adv, post_ae_states_adv = model.get_ae_states() rec_err_adv = get_rec_err(pre_ae_states_adv, post_ae_states_adv) # Evaluate the accuracy of the model on adversarial examples eval_par = {'batch_size': batch_size} acc, rec_loss = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par, aux_loss_lst=[rec_err_adv]) print('Test accuracy on adversarial examples: %0.4f\n' % acc) print('reconstruction error on adv examples: %0.4f' % rec_loss) report.clean_train_adv_eval = acc # Calculate training error if testing: eval_par = {'batch_size': batch_size} acc, rec_loss = model_eval(sess, x, y, preds_adv, X_train, Y_train, args=eval_par, aux_loss_lst=[rec_err_adv]) report.train_clean_train_adv_eval = acc print('.. using adversarial training') # Redefine TF model graph model_2 = make_basic_model(**model_params) tf.summary.image('input', x) preds_2 = model_2(x) pre_ae_states_2, post_ae_states_2 = model_2.get_ae_states() rec_err_2 = get_rec_err(pre_ae_states_2, post_ae_states_2) attack2 = attacks[attack_name](model_2, sess=sess) adv_x_2 = attack2.generate(x, **attack_params[attack_name]) tf.summary.image('adversarial', adv_x_2) tf.summary.image('diff', x - adv_x_2) if not backprop_through_attack: adv_x_2 = tf.stop_gradient(adv_x_2) preds_2_adv = model_2(adv_x_2) pre_ae_states_adv_2, post_ae_states_adv_2 = model_2.get_ae_states() rec_err_adv_2 = get_rec_err(pre_ae_states_adv_2, post_ae_states_adv_2) # adv -> clean reconstruction rec_err_cross_2 = get_rec_err(pre_ae_states_2, post_ae_states_adv_2) class Evaluate2(object): def __init__(self): self.best_accuracy = 0. def __call__(self): # Accuracy of adversarially trained model on legitimate test inputs eval_params = {'batch_size': batch_size} accuracy, rec_loss = model_eval(sess, x, y, preds_2, X_test, Y_test, args=eval_params, aux_loss_lst=[rec_err_2], summary=merged) self.best_accuracy = max(self.best_accuracy, accuracy) print('Test accuracy on legitimate examples: %0.4f' % accuracy) print('Best test accuracy so far: {:0.4f}'.format( self.best_accuracy)) print('reconstruction error on legit examples: %0.4f' % rec_loss) report.adv_train_clean_eval = accuracy # Accuracy of the adversarially trained model on adversarial examples accuracy_adv, rec_loss_adv, rec_loss_cross = model_eval( sess, x, y, preds_2_adv, X_test, Y_test, args=eval_params, aux_loss_lst=[rec_err_adv_2, rec_err_cross_2], summary=merged) print('Test accuracy on adversarial examples: %0.4f' % accuracy_adv) print('reconstruction error on adv->adv: %0.4f' % rec_loss_adv) print('reconstruction error on adv->clean: %0.4f' % rec_loss_cross) report.adv_train_adv_eval = accuracy_adv with open('report.dat', 'a') as f: f.write(' '.join([ '%0.4f' % v for v in [ accuracy, accuracy_adv, rec_loss, rec_loss_adv, rec_loss_cross, ] ]) + '\n') evaluate_2 = Evaluate2() # combine both errors if rec_err_cross_2 is None or rec_err_2 is None: use_cross_error = False #XXX hack rec_use_train = rec_err_cross_2 + rec_err_2 if use_cross_error else rec_err_2 # Perform and evaluate adversarial training model_train(sess, x, y, preds_2, X_train, Y_train, predictions_adv=preds_2_adv, evaluate=evaluate_2, args=train_params, rng=rng, aux_loss=weight_rec_err(rec_use_train, rec_error_weight), opt_type=opt_type, summary=merged) # Calculate training errors if testing: eval_params = {'batch_size': batch_size} accuracy, rec_loss = model_eval(sess, x, y, preds_2, X_train, Y_train, args=eval_params, aux_loss_lst=[rec_err_2]) report.train_adv_train_clean_eval = accuracy accuracy, rec_loss = model_eval(sess, x, y, preds_2_adv, X_train, Y_train, args=eval_params, aux_loss_lst=[rec_err_adv_2]) report.train_adv_train_adv_eval = accuracy return report
def run_mnist_adv(num_epochs=NUM_EPOCHS, batch_size=BATCH_SIZE, testing=False, learning_rate=LEARNING_RATE): # Object used to keep track of (and return) key accuracies report = AccuracyReport() # set random seed tf.set_random_seed(42) # can use gpu config = tf.ConfigProto( device_count = {'GPU': 1 , 'CPU': 1} ) # Create TF session and set Keras backend session as TF sess = tf.Session(config=config) keras.backend.set_session(sess) # Get MNIST test data mnist = MNIST() x_train, y_train = mnist.get_set("train") x_test, y_test = mnist.get_set("test") # Obtain image params n_rows, n_cols, n_channels = x_train.shape[1:4] n_classes = y_train.shape[1] # define TF model graph model = ConvNet((n_rows, n_cols, n_channels), n_classes) model(model.input) wrap = KerasModelWrapper(model) fgsm = FastGradientMethod(wrap, sess=sess) fgsm_params = { 'eps': 0.3, 'clip_min': 0., 'clip_max': 1. } adv_acc_metric = get_adversarial_acc_metric(model, fgsm, fgsm_params) model.compile( optimizer=keras.optimizers.Adam(learning_rate), loss='categorical_crossentropy', metrics=['accuracy', adv_acc_metric] ) # Train an MNIST model model.fit(x_train, y_train, batch_size=batch_size, epochs=num_epochs, validation_data=(x_test, y_test), verbose=1) # Evaluate the accuracy on legitimate and adversarial test examples _, acc, adv_acc = model.evaluate(x_test, y_test, batch_size=batch_size, verbose=0) report.clean_train_clean_eval = acc report.clean_train_adv_eval = adv_acc print('Test accuracy on legitimate examples: %0.4f' % acc) print('Test accuracy on adversarial examples: %0.4f\n' % adv_acc) # Calculate training error if testing: _, train_acc, train_adv_acc = model.evaluate(x_train, y_train, batch_size=batch_size, verbose=0) report.train_clean_train_clean_eval = train_acc report.train_clean_train_adv_eval = train_adv_acc print("Repeating the process, using adversarial training") # Redefine Keras model model_2 = ConvNet((n_rows, n_cols, n_channels), n_classes) model_2(model_2.input) wrap_2 = KerasModelWrapper(model_2) fgsm_2 = FastGradientMethod(wrap_2, sess=sess) # Use a loss function based on legitimate and adversarial examples adv_loss_2 = get_adversarial_loss(model_2, fgsm_2, fgsm_params) adv_acc_metric_2 = get_adversarial_acc_metric(model_2, fgsm_2, fgsm_params) model_2.compile( optimizer=keras.optimizers.Adam(learning_rate), loss=adv_loss_2, metrics=['accuracy', adv_acc_metric_2] ) # Train an MNIST model model_2.fit(x_train, y_train, batch_size=batch_size, epochs=num_epochs, validation_data=(x_test, y_test), verbose=1) # Evaluate the accuracy on legitimate and adversarial test examples _, acc, adv_acc = model_2.evaluate(x_test, y_test, batch_size=batch_size, verbose=0) report.adv_train_clean_eval = acc report.adv_train_adv_eval = adv_acc print('Test accuracy on legitimate examples: %0.4f' % acc) print('Test accuracy on adversarial examples: %0.4f\n' % adv_acc) # Calculate training error if testing: _, train_acc, train_adv_acc = model_2.evaluate(x_train, y_train, batch_size=batch_size, verbose=0) report.train_adv_train_clean_eval = train_acc report.train_adv_train_adv_eval = train_adv_acc return report
def test_run_single_gpu_fgsm(self): """ Test the basic single GPU performance by comparing to the FGSM tutorial. """ from cleverhans_tutorials import mnist_tutorial_tf # Run the MNIST tutorial on a dataset of reduced size flags = { "train_start": 0, "train_end": 5000, "test_start": 0, "test_end": 333, "nb_epochs": 5, "testing": True, } report = mnist_tutorial_tf.mnist_tutorial(**flags) # Run the multi-gpu trainer for clean training flags.update( { "batch_size": 128, "adam_lrn": 0.001, "dataset": "mnist", "only_adv_train": False, "eval_iters": 1, "ngpu": 1, "fast_tests": False, "attack_type_train": "", "save_dir": None, "save_steps": 10000, "attack_nb_iter_train": None, "save": False, "model_type": "basic", "attack_type_test": "FGSM", } ) flags.update({"adv_train": False}) HParams = namedtuple("HParams", flags.keys()) hparams = HParams(**flags) np.random.seed(42) tf.set_random_seed(42) with tf.variable_scope(None, "runner"): report_dict = run_trainer(hparams) report_2 = AccuracyReport() report_2.train_clean_train_clean_eval = report_dict["train"] report_2.clean_train_clean_eval = report_dict["test"] report_2.clean_train_adv_eval = report_dict["FGSM"] # Run the multi-gpu trainer for adversarial training flags.update({"adv_train": True, "attack_type_train": "FGSM"}) HParams = namedtuple("HParams", flags.keys()) hparams = HParams(**flags) np.random.seed(42) tf.set_random_seed(42) with tf.variable_scope(None, "runner"): report_dict = run_trainer(hparams) report_2.train_adv_train_clean_eval = report_dict["train"] report_2.adv_train_clean_eval = report_dict["test"] report_2.adv_train_adv_eval = report_dict["FGSM"] self.assertClose( report.train_clean_train_clean_eval, report_2.train_clean_train_clean_eval, atol=5e-2, ) self.assertClose( report.clean_train_clean_eval, report_2.clean_train_clean_eval, atol=2e-2 ) self.assertClose( report.clean_train_adv_eval, report_2.clean_train_adv_eval, atol=5e-2 ) self.assertClose( report.train_adv_train_clean_eval, report_2.train_adv_train_clean_eval, atol=1e-1, ) self.assertClose( report.adv_train_clean_eval, report_2.adv_train_clean_eval, atol=2e-2 ) self.assertClose( report.adv_train_adv_eval, report_2.adv_train_adv_eval, atol=1e-1 )
def mnist_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=6, batch_size=128, learning_rate=0.001, clean_train=True, testing=False): """ MNIST cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param clean_train: if true, train on clean examples :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session sess = tf.Session() # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) # Use label smoothing assert Y_train.shape[1] == 10 label_smooth = .1 Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) y = tf.placeholder(tf.float32, shape=(None, 10)) model_path = "models/mnist" # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } fgsm_params = {'eps': 0.3} if clean_train: model = make_basic_cnn() preds = model.get_probs(x) def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test # examples eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params) report.clean_train_clean_eval = acc assert X_test.shape[0] == test_end - test_start, X_test.shape print('Test accuracy on legitimate examples: %0.4f' % acc) model_train(sess, x, y, preds, X_train, Y_train, evaluate=evaluate, args=train_params) # Calculate training error if testing: eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, X_train, Y_train, args=eval_params) report.train_clean_train_clean_eval = acc # Initialize the Fast Gradient Sign Method (FGSM) attack object and # graph fgsm = FastGradientMethod(model, sess=sess) adv_x = fgsm.generate(x, **fgsm_params) preds_adv = model.get_probs(adv_x) # Evaluate the accuracy of the MNIST model on adversarial examples eval_par = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print('Test accuracy on adversarial examples: %0.4f\n' % acc) report.clean_train_adv_eval = acc # Calculate training error if testing: eval_par = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_adv, X_train, Y_train, args=eval_par) report.train_clean_train_adv_eval = acc print("Repeating the process, using adversarial training") # Redefine TF model graph model_2 = make_basic_cnn() preds_2 = model_2(x) fgsm2 = FastGradientMethod(model_2, sess=sess) preds_2_adv = model_2(fgsm2.generate(x, **fgsm_params)) def evaluate_2(): # Accuracy of adversarially trained model on legitimate test inputs eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds_2, X_test, Y_test, args=eval_params) print('Test accuracy on legitimate examples: %0.4f' % accuracy) report.adv_train_clean_eval = accuracy # Accuracy of the adversarially trained model on adversarial examples accuracy = model_eval(sess, x, y, preds_2_adv, X_test, Y_test, args=eval_params) print('Test accuracy on adversarial examples: %0.4f' % accuracy) report.adv_train_adv_eval = accuracy # Perform and evaluate adversarial training model_train(sess, x, y, preds_2, X_train, Y_train, predictions_adv=preds_2_adv, evaluate=evaluate_2, args=train_params) # Calculate training errors if testing: eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds_2, X_train, Y_train, args=eval_params) report.train_adv_train_clean_eval = accuracy accuracy = model_eval(sess, x, y, preds_2_adv, X_train, Y_train, args=eval_params) report.train_adv_train_adv_eval = accuracy return report
def helper_run_multi_gpu_madryetal(self, extra_flags=None): """ Compare the single GPU performance to multiGPU performance. """ # Run the trainers on a dataset of reduced size flags = { 'train_start': 0, 'train_end': 5000, 'test_start': 0, 'test_end': 333, 'nb_epochs': 5, 'testing': True } # Run the multi-gpu trainer for adversarial training flags.update({ 'batch_size': 128, 'adam_lrn': 0.001, 'dataset': 'mnist', 'only_adv_train': False, 'eval_iters': 1, 'fast_tests': True, 'save_dir': None, 'save_steps': 10000, 'attack_nb_iter_train': 10, 'sync_step': None, 'adv_train': True, 'save': False, 'model_type': 'basic', 'attack_type_test': 'MadryEtAl_y' }) if extra_flags is not None: flags.update(extra_flags) # Run the multi-gpu trainer for adversarial training using 2 gpus # trainer_multigpu by default sets `allow_soft_placement=True` flags.update({ 'ngpu': 2, 'attack_type_train': 'MadryEtAl_y_multigpu', 'sync_step': 1 }) HParams = namedtuple('HParams', flags.keys()) hparams = HParams(**flags) np.random.seed(42) tf.compat.v1.set_random_seed(42) with tf.compat.v1.variable_scope(None, 'runner'): report_dict = run_trainer(hparams) report_m = AccuracyReport() report_m.train_adv_train_clean_eval = report_dict['train'] report_m.adv_train_clean_eval = report_dict['test'] report_m.adv_train_adv_eval = report_dict['MadryEtAl_y'] flags.update({'ngpu': 1, 'attack_type_train': 'MadryEtAl_y'}) hparams = HParams(**flags) np.random.seed(42) tf.compat.v1.set_random_seed(42) with tf.compat.v1.variable_scope(None, 'runner'): report_dict = run_trainer(hparams) report_s = AccuracyReport() report_s.train_adv_train_clean_eval = report_dict['train'] report_s.adv_train_clean_eval = report_dict['test'] report_s.adv_train_adv_eval = report_dict['MadryEtAl_y'] self.assertClose(report_s.train_adv_train_clean_eval, report_m.train_adv_train_clean_eval, atol=5e-2) self.assertClose(report_s.adv_train_clean_eval, report_m.adv_train_clean_eval, atol=2e-2) self.assertClose(report_s.adv_train_adv_eval, report_m.adv_train_adv_eval, atol=5e-2)
def mnist_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, clean_train=True, testing=False, backprop_through_attack=False, nb_filters=NB_FILTERS, num_threads=None, attack_string=None): """ MNIST cleverhans tutorial :param train_start: index of first training set example. :param train_end: index of last training set example. :param test_start: index of first test set example. :param test_end: index of last test set example. :param nb_epochs: number of epochs to train model. :param batch_size: size of training batches. :param learning_rate: learning rate for training. :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate. :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param nb_filters: number of filters in the CNN used for training. :param num_threads: number of threads used for running the process. :param attack_string: attack name for crafting adversarial attacks and adversarial training, in string format. :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Get MNIST test data mnist = MNIST(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) X_train, Y_train = mnist.get_set('train') X_test, Y_test = mnist.get_set('test') # Use label smoothing assert Y_train.shape[1] == 10 label_smooth = .1 Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } # Initialize the attack object attack_class = attack_selection(attack_string) attack_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} rng = np.random.RandomState([2018, 6, 18]) if clean_train: model = ModelBasicCNNTFE(nb_filters=nb_filters) def evaluate_clean(): """ Evaluate the accuracy of the MNIST model on legitimate test examples """ eval_params = {'batch_size': batch_size} acc = model_eval(model, X_test, Y_test, args=eval_params) report.clean_train_clean_eval = acc assert X_test.shape[0] == test_end - test_start, X_test.shape print('Test accuracy on legitimate examples: %0.4f' % acc) train(model, X_train, Y_train, evaluate=evaluate_clean, args=train_params, rng=rng, var_list=model.get_params()) if testing: # Calculate training error eval_params = {'batch_size': batch_size} acc = model_eval(model, X_train, Y_train, args=eval_params) report.train_clean_train_clean_eval = acc # Evaluate the accuracy of the MNIST model on adversarial examples eval_par = {'batch_size': batch_size} attack = attack_class(model) acc = model_eval( model, X_test, Y_test, args=eval_par, attack=attack, attack_args=attack_params) print('Test accuracy on adversarial examples: %0.4f\n' % acc) report.clean_train_adv_eval = acc # Calculate training error if testing: eval_par = {'batch_size': batch_size} acc = model_eval( model, X_train, Y_train, args=eval_par, attack=attack, attack_args=attack_params) print('Train accuracy on adversarial examples: %0.4f\n' % acc) report.train_clean_train_adv_eval = acc attack = None print("Repeating the process, using adversarial training") model_adv_train = ModelBasicCNNTFE(nb_filters=nb_filters) attack = attack_class(model_adv_train) def evaluate_adv(): # Accuracy of adversarially trained model on legitimate test inputs eval_params = {'batch_size': batch_size} accuracy = model_eval( model_adv_train, X_test, Y_test, args=eval_params) print('Test accuracy on legitimate examples: %0.4f' % accuracy) report.adv_train_clean_eval = accuracy # Accuracy of the adversarially trained model on adversarial examples accuracy = model_eval( model_adv_train, X_test, Y_test, args=eval_params, attack=attack, attack_args=attack_params) print('Test accuracy on adversarial examples: %0.4f' % accuracy) report.adv_train_adv_eval = accuracy # Perform and evaluate adversarial training train(model_adv_train, X_train, Y_train, evaluate=evaluate_adv, args=train_params, rng=rng, var_list=model_adv_train.get_params(), attack=attack, attack_args=attack_params) # Calculate training errors if testing: eval_params = {'batch_size': batch_size} accuracy = model_eval( model_adv_train, X_train, Y_train, args=eval_params, attack=None, attack_args=None) report.train_adv_train_clean_eval = accuracy accuracy = model_eval( model_adv_train, X_train, Y_train, args=eval_params, attack=attack, attack_args=attack_params) report.train_adv_train_adv_eval = accuracy return report
def test_run_single_gpu_fgsm(self): """ Test the basic single GPU performance by comparing to the FGSM tutorial. """ from cleverhans_tutorials import mnist_tutorial_tf # Run the MNIST tutorial on a dataset of reduced size flags = { 'train_start': 0, 'train_end': 5000, 'test_start': 0, 'test_end': 333, 'nb_epochs': 5, 'testing': True } report = mnist_tutorial_tf.mnist_tutorial(**flags) # Run the multi-gpu trainer for clean training flags.update({ 'batch_size': 128, 'adam_lrn': 0.001, 'dataset': 'mnist', 'only_adv_train': False, 'eval_iters': 1, 'ngpu': 1, 'fast_tests': False, 'attack_type_train': '', 'save_dir': None, 'save_steps': 10000, 'attack_nb_iter_train': None, 'save': False, 'model_type': 'basic', 'attack_type_test': 'FGSM' }) flags.update({'adv_train': False}) HParams = namedtuple('HParams', flags.keys()) hparams = HParams(**flags) np.random.seed(42) tf.compat.v1.set_random_seed(42) with tf.compat.v1.variable_scope(None, 'runner'): report_dict = run_trainer(hparams) report_2 = AccuracyReport() report_2.train_clean_train_clean_eval = report_dict['train'] report_2.clean_train_clean_eval = report_dict['test'] report_2.clean_train_adv_eval = report_dict['FGSM'] # Run the multi-gpu trainer for adversarial training flags.update({'adv_train': True, 'attack_type_train': 'FGSM'}) HParams = namedtuple('HParams', flags.keys()) hparams = HParams(**flags) np.random.seed(42) tf.compat.v1.set_random_seed(42) with tf.compat.v1.variable_scope(None, 'runner'): report_dict = run_trainer(hparams) report_2.train_adv_train_clean_eval = report_dict['train'] report_2.adv_train_clean_eval = report_dict['test'] report_2.adv_train_adv_eval = report_dict['FGSM'] self.assertClose(report.train_clean_train_clean_eval, report_2.train_clean_train_clean_eval, atol=5e-2) self.assertClose(report.clean_train_clean_eval, report_2.clean_train_clean_eval, atol=2e-2) self.assertClose(report.clean_train_adv_eval, report_2.clean_train_adv_eval, atol=5e-2) self.assertClose(report.train_adv_train_clean_eval, report_2.train_adv_train_clean_eval, atol=1e-1) self.assertClose(report.adv_train_clean_eval, report_2.adv_train_clean_eval, atol=2e-2) self.assertClose(report.adv_train_adv_eval, report_2.adv_train_adv_eval, atol=1e-1)
def mnist_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=6, batch_size=128, epsilon=0.3, learning_rate=0.001, train_dir="/tmp", filename="mnist.ckpt", load_model=False, testing=False): """ MNIST CleverHans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param train_dir: Directory storing the saved model :param filename: Filename to save model under :param load_model: True for load, False for not load :param testing: if true, test error is calculated :return: an AccuracyReport object """ keras.layers.core.K.set_learning_phase(0) # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) if not hasattr(backend, "tf"): raise RuntimeError("This tutorial requires keras to be configured" " to use the TensorFlow backend.") if keras.backend.image_dim_ordering() != 'tf': keras.backend.set_image_dim_ordering('tf') print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to " "'th', temporarily setting to 'tf'") # Create TF session and set as Keras backend session sess = tf.Session() keras.backend.set_session(sess) # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) # Use label smoothing assert Y_train.shape[1] == 10 label_smooth = .1 Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) y = tf.placeholder(tf.float32, shape=(None, 10)) # Define TF model graph model = cnn_model_BIM() preds = model(x) print("Defined TensorFlow model graph.") def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params) report.clean_train_clean_eval = acc assert X_test.shape[0] == test_end - test_start, X_test.shape print('Test accuracy on legitimate examples: %0.4f' % acc) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': train_dir, 'filename': filename } ckpt = tf.train.get_checkpoint_state(train_dir) ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path rng = np.random.RandomState([2017, 8, 30]) if load_model and ckpt_path: saver = tf.train.Saver() saver.restore(sess, ckpt_path) print("Model loaded from: {}".format(ckpt_path)) evaluate() else: print("Model was not loaded, training from scratch.") model_train(sess, x, y, preds, X_train, Y_train, evaluate=evaluate, args=train_params, save=False, rng=rng) # Calculate training error if testing: eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, X_train, Y_train, args=eval_params) report.train_clean_train_clean_eval = acc # Initialize the Fast Gradient Sign Method (FGSM) attack object and graph wrap = KerasModelWrapper(model) print("FastGradientMethod") fgsm1 = FastGradientMethod(wrap, sess=sess) for epsilon in [0.005, 0.01, 0.05, 0.1, 0.5, 1.0]: print("Epsilon =", epsilon), fgsm_params = {'eps': epsilon, 'clip_min': None, 'clip_max': None} adv_x = fgsm1.generate(x, **fgsm_params) # Consider the attack to be constant adv_x = tf.stop_gradient(adv_x) preds_adv = model(adv_x) # Evaluate the accuracy of the MNIST model on adversarial examples eval_par = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print('Test accuracy on adversarial examples: %0.4f\n' % acc) report.clean_train_adv_eval = acc print("BasicIterativeMethod") bim = BasicIterativeMethod(wrap, sess=sess) for epsilon, order in zip( [0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 0.5, 1.0], [np.inf, np.inf, np.inf, np.inf, np.inf, np.inf, 2, 2]): print("Epsilon =", epsilon), fgsm_params = { 'eps': epsilon, 'clip_min': 0., 'clip_max': 1., 'ord': order } adv_x = bim.generate(x, **fgsm_params) # Consider the attack to be constant adv_x = tf.stop_gradient(adv_x) preds_adv = model(adv_x) # Evaluate the accuracy of the MNIST model on adversarial examples eval_par = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print('Test accuracy on adversarial examples: %0.4f\n' % acc) report.clean_train_adv_eval = acc # Calculating train error if testing: eval_par = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_adv, X_train, Y_train, args=eval_par) report.train_clean_train_adv_eval = acc return print("Repeating the process, using adversarial training") # Redefine TF model graph model_2 = cnn_model() preds_2 = model_2(x) wrap_2 = KerasModelWrapper(model_2) #fgsm2 = FastGradientMethod(wrap_2, sess=sess) bim2 = BasicIterativeMethod(wrap_2, sess=sess) preds_2_adv = model_2(bim2.generate(x, **fgsm_params)) def evaluate_2(): # Accuracy of adversarially trained model on legitimate test inputs eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds_2, X_test, Y_test, args=eval_params) print('Test accuracy on legitimate examples: %0.4f' % accuracy) report.adv_train_clean_eval = accuracy # Accuracy of the adversarially trained model on adversarial examples accuracy = model_eval(sess, x, y, preds_2_adv, X_test, Y_test, args=eval_params) print('Test accuracy on adversarial examples: %0.4f' % accuracy) report.adv_train_adv_eval = accuracy # Perform and evaluate adversarial training model_train(sess, x, y, preds_2, X_train, Y_train, predictions_adv=preds_2_adv, evaluate=evaluate_2, args=train_params, save=False, rng=rng) # Calculate training errors if testing: eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds_2, X_train, Y_train, args=eval_params) report.train_adv_train_clean_eval = accuracy accuracy = model_eval(sess, x, y, preds_2_adv, X_train, Y_train, args=eval_params) report.train_adv_train_adv_eval = accuracy return report
def helper_run_multi_gpu_madryetal(self, extra_flags=None): """ Compare the single GPU performance to multiGPU performance. """ # Run the trainers on a dataset of reduced size flags = {'train_start': 0, 'train_end': 5000, 'test_start': 0, 'test_end': 333, 'nb_epochs': 5, 'testing': True} # Run the multi-gpu trainer for adversarial training flags.update({'batch_size': 128, 'adam_lrn': 0.001, 'dataset': 'mnist', 'only_adv_train': False, 'eval_iters': 1, 'fast_tests': True, 'save_dir': None, 'save_steps': 10000, 'attack_nb_iter_train': 10, 'sync_step': None, 'adv_train': True, 'save': False, 'model_type': 'basic', 'attack_type_test': 'MadryEtAl_y', }) if extra_flags is not None: flags.update(extra_flags) # Run the multi-gpu trainer for adversarial training using 2 gpus # trainer_multigpu by default sets `allow_soft_placement=True` flags.update({'ngpu': 2, 'attack_type_train': 'MadryEtAl_y_multigpu', 'sync_step': 1}) HParams = namedtuple('HParams', flags.keys()) hparams = HParams(**flags) np.random.seed(42) tf.set_random_seed(42) with tf.variable_scope(None, 'runner'): report_dict = run_trainer(hparams) report_m = AccuracyReport() report_m.train_adv_train_clean_eval = report_dict['train'] report_m.adv_train_clean_eval = report_dict['test'] report_m.adv_train_adv_eval = report_dict['MadryEtAl_y'] flags.update({'ngpu': 1, 'attack_type_train': 'MadryEtAl_y', }) hparams = HParams(**flags) np.random.seed(42) tf.set_random_seed(42) with tf.variable_scope(None, 'runner'): report_dict = run_trainer(hparams) report_s = AccuracyReport() report_s.train_adv_train_clean_eval = report_dict['train'] report_s.adv_train_clean_eval = report_dict['test'] report_s.adv_train_adv_eval = report_dict['MadryEtAl_y'] self.assertClose(report_s.train_adv_train_clean_eval, report_m.train_adv_train_clean_eval, atol=5e-2) self.assertClose(report_s.adv_train_clean_eval, report_m.adv_train_clean_eval, atol=2e-2) self.assertClose(report_s.adv_train_adv_eval, report_m.adv_train_adv_eval, atol=5e-2)