def main(_): # Images for inception classifier are normalized to be in [-1, 1] interval, # eps is a difference between pixels so it should be in [0, 2] interval. # Renormalizing epsilon from [0, 255] to [0, 2]. batch_size = FLAGS.batch_size batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3] num_classes = 1001 targeted = False tf.logging.set_verbosity(tf.logging.DEBUG) with tf.Graph().as_default(): # Prepare graph x_input = tf.placeholder(tf.float32, shape=batch_shape) model = InceptionModel(num_classes) with tf.Session() as sess: enm = ENM(model, back='tf', sess=None) enm_params = { 'beta': 0, 'batch_size': batch_size, 'learning_rate': 0.1, 'max_iterations': 1000, 'binary_search_steps': 9 } x_adv = enm.generate(x_input, **mim_params) saver = tf.train.Saver(slim.get_model_variables()) session_creator = tf.train.ChiefSessionCreator( scaffold=tf.train.Scaffold(saver=saver), checkpoint_filename_with_path=FLAGS.checkpoint_path, master=FLAGS.master) saver.restore(sess, FLAGS.checkpoint_path) sess.run(tf.global_variables_initializer()) # with tf.train.MonitoredSession(session_creator=session_creator) as sess: i = 0 for filenames, images in load_images(FLAGS.input_dir, batch_shape): adv_images = sess.run(x_adv, feed_dict={x_input: images}) print "input images: ", images.shape #adv_images = cw.generate_np(images, **cw_params) i += 16 print i # print filenames # print adv_images.shape # adv_images = cw.generate_np( save_images(adv_images, filenames, FLAGS.output_dir)
def mnist_tutorial_jsma(train_start=0, train_end=5500, test_start=0, test_end=1000, nb_epochs=8, batch_size=100, nb_classes=10, nb_filters=64, learning_rate=0.001): """ MNIST tutorial for the Jacobian-based saliency map approach (JSMA) :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param nb_classes: number of output classes :param learning_rate: learning rate for training :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Create TF session and set as Keras backend session sess = tf.Session() print("Created TensorFlow session.") set_log_level(logging.DEBUG) # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) label_smooth = .1 Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) y = tf.placeholder(tf.float32, shape=(None, 10)) # Define TF model graph model = make_basic_cnn() preds = model(x) print("Defined TensorFlow model graph.") ########################################################################### # Training the model using TensorFlow ########################################################################### # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } # sess.run(tf.global_variables_initializer()) rng = np.random.RandomState([2017, 8, 30]) print("x_train shape: ", X_train.shape) print("y_train shape: ", Y_train.shape) # do not log model_train(sess, x, y, preds, X_train, Y_train, args=train_params,verbose=False, rng=rng) f_out_clean = open("Clean_jsma_elastic_against5.log", "w") # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params) assert X_test.shape[0] == test_end - test_start, X_test.shape print('Test accuracy on legitimate test examples: {0}'.format(accuracy)) f_out_clean.write('Test accuracy on legitimate test examples: ' + str(accuracy) + '\n') # Clean test against JSMA jsma_params = {'theta': 1., 'gamma': 0.1, 'clip_min': 0., 'clip_max': 1., 'y_target': None} jsma = SaliencyMapMethod(model, back='tf', sess=sess) adv_x_jsma = jsma.generate(x, **jsma_params) preds_adv_jsma = model.get_probs(adv_x_jsma) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv_jsma, X_test, Y_test, args=eval_params) print('Clean test accuracy on JSMA adversarial examples: %0.4f' % acc) f_out_clean.write('Clean test accuracy on JSMA adversarial examples: ' + str(acc) + '\n') ################################################################ # Clean test against FGSM fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} fgsm = FastGradientMethod(model, sess=sess) adv_x_fgsm = fgsm.generate(x, **fgsm_params) preds_adv_fgsm = model.get_probs(adv_x_fgsm) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv_fgsm, X_test, Y_test, args=eval_params) print('Clean test accuracy on FGSM adversarial examples: %0.4f' % acc) f_out_clean.write('Clean test accuracy on FGSM adversarial examples: ' + str(acc) + '\n') ################################################################ # Clean test against BIM bim_params = {'eps': 0.3, 'eps_iter': 0.01, 'nb_iter': 100, 'clip_min': 0., 'clip_max': 1.} bim = BasicIterativeMethod(model, sess=sess) adv_x_bim = bim.generate(x, **bim_params) preds_adv_bim = model.get_probs(adv_x_bim) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv_bim, X_test, Y_test, args=eval_params) print('Clean test accuracy on BIM adversarial examples: %0.4f' % acc) f_out_clean.write('Clean test accuracy on BIM adversarial examples: ' + str(acc) + '\n') ################################################################ # Clean test against EN en_params = {'binary_search_steps': 1, # 'y': None, 'max_iterations': 100, 'learning_rate': 0.1, 'batch_size': batch_size, 'initial_const': 10} en = ElasticNetMethod(model, back='tf', sess=sess) adv_x_en = en.generate(x, **en_params) preds_adv_en = model.get_probs(adv_x_en) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv_en, X_test, Y_test, args=eval_params) print('Clean test accuracy on EN adversarial examples: %0.4f' % acc) f_out_clean.write('Clean test accuracy on EN adversarial examples: ' + str(acc) + '\n') ################################################################ # Clean test against DF deepfool_params = {'nb_candidate': 10, 'overshoot': 0.02, 'max_iter': 50, 'clip_min': 0., 'clip_max': 1.} deepfool = DeepFool(model, sess=sess) adv_x_df = deepfool.generate(x, **deepfool_params) preds_adv_df = model.get_probs(adv_x_df) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv_df, X_test, Y_test, args=eval_params) print('Clean test accuracy on DF adversarial examples: %0.4f' % acc) f_out_clean.write('Clean test accuracy on DF adversarial examples: ' + str(acc) + '\n') ################################################################ # Clean test against VAT vat_params = {'eps': 2.0, 'num_iterations': 1, 'xi': 1e-6, 'clip_min': 0., 'clip_max': 1.} vat = VirtualAdversarialMethod(model, sess=sess) adv_x_vat = vat.generate(x, **vat_params) preds_adv_vat = model.get_probs(adv_x_vat) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv_vat, X_test, Y_test, args=eval_params) print('Clean test accuracy on VAT adversarial examples: %0.4f\n' % acc) f_out_clean.write('Clean test accuracy on VAT adversarial examples: ' + str(acc) + '\n') f_out_clean.close() ########################################################################### # Craft adversarial examples using the Jacobian-based saliency map approach ########################################################################### print('Crafting ' + str(X_train.shape[0]) + ' * ' + str(nb_classes-1) + ' adversarial examples') model_2 = make_basic_cnn() preds_2 = model(x) # need this for constructing the array sess.run(tf.global_variables_initializer()) # run this again # sess.run(tf.global_variables_initializer()) # 1. Instantiate a SaliencyMapMethod attack object jsma = SaliencyMapMethod(model_2, back='tf', sess=sess) jsma_params = {'theta': 1., 'gamma': 0.1, 'clip_min': 0., 'clip_max': 1., 'y_target': None} adv_random = jsma.generate(x, **jsma_params) preds_adv_random = model_2.get_probs(adv_random) # 2. Instantiate FGSM attack fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} fgsm = FastGradientMethod(model_2, sess=sess) adv_x_fgsm = fgsm.generate(x, **fgsm_params) preds_adv_fgsm = model_2.get_probs(adv_x_fgsm) # 3. Instantiate Elastic net attack en_params = {'binary_search_steps': 5, #'y': None, 'max_iterations': 100, 'learning_rate': 0.1, 'batch_size': batch_size, 'initial_const': 10} enet = ElasticNetMethod(model_2, sess=sess) adv_x_en = enet.generate(x, **en_params) preds_adv_elastic_net = model_2.get_probs(adv_x_en) # 4. Deepfool deepfool_params = {'nb_candidate':10, 'overshoot':0.02, 'max_iter': 50, 'clip_min': 0., 'clip_max': 1.} deepfool = DeepFool(model_2, sess=sess) adv_x_df = deepfool.generate(x, **deepfool_params) preds_adv_deepfool = model_2.get_probs(adv_x_df) # 5. Base Iterative bim_params = {'eps': 0.3, 'eps_iter': 0.01, 'nb_iter': 100, 'clip_min': 0., 'clip_max': 1.} base_iter = BasicIterativeMethod(model_2, sess=sess) adv_x_bi = base_iter.generate(x, **bim_params) preds_adv_base_iter = model_2.get_probs(adv_x_bi) # 6. C & W Attack cw = CarliniWagnerL2(model_2, back='tf', sess=sess) cw_params = {'binary_search_steps': 1, # 'y': None, 'max_iterations': 100, 'learning_rate': 0.1, 'batch_size': batch_size, 'initial_const': 10} adv_x_cw = cw.generate(x, **cw_params) preds_adv_cw = model_2.get_probs(adv_x_cw) #7 vat_params = {'eps': 2.0, 'num_iterations': 1, 'xi': 1e-6, 'clip_min': 0., 'clip_max': 1.} vat = VirtualAdversarialMethod(model_2, sess=sess) adv_x = vat.generate(x, **vat_params) preds_adv_vat = model_2.get_probs(adv_x) # ==> generate 10 targeted classes for every train data regardless # This call runs the Jacobian-based saliency map approach # Loop over the samples we want to perturb into adversarial examples X_train_adv_set = [] Y_train_adv_set = [] for index in range(X_train.shape[0]): print('--------------------------------------') x_val = X_train[index:(index+1)] y_val = Y_train[index] # add normal sample in!!!! X_train_adv_set.append(x_val) Y_train_adv_set.append(y_val) # We want to find an adversarial example for each possible target class # (i.e. all classes that differ from the label given in the dataset) current_class = int(np.argmax(y_val)) target_classes = other_classes(nb_classes, current_class) # Loop over all target classes for target in target_classes: # print('Generating adv. example for target class %i' % target) # This call runs the Jacobian-based saliency map approach one_hot_target = np.zeros((1, nb_classes), dtype=np.float32) one_hot_target[0, target] = 1 jsma_params['y_target'] = one_hot_target adv_x = jsma.generate_np(x_val, **jsma_params) # append to X_train_adv_set and Y_train_adv_set X_train_adv_set.append(adv_x) Y_train_adv_set.append(y_val) # shape is: (1, 28, 28, 1) # print("adv_x shape is: ", adv_x.shape) # check for success rate # res = int(model_argmax(sess, x, preds, adv_x) == target) print('-------------Finished Generating Np Adversarial Data-------------------------') X_train_data = np.concatenate(X_train_adv_set, axis=0) Y_train_data = np.stack(Y_train_adv_set, axis=0) print("X_train_data shape is: ", X_train_data.shape) print("Y_train_data shape is: ", Y_train_data.shape) # saves the output so later no need to re-fun file np.savez("jsma_training_data.npz", x_train=X_train_data , y_train=Y_train_data) # >>> data = np.load('/tmp/123.npz') # >>> data['a'] f_out = open("Adversarial_jsma_elastic_against5.log", "w") # evaluate the function against 5 attacks # fgsm, base iterative, jsma, elastic net, and deepfool def evaluate_against_all(): # 1 Clean Data eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params) print('Legitimate accuracy: %0.4f' % accuracy) tmp = 'Legitimate accuracy: '+ str(accuracy) + "\n" f_out.write(tmp) # 2 JSMA accuracy = model_eval(sess, x, y, preds_adv_random, X_test, Y_test, args=eval_params) print('JSMA accuracy: %0.4f' % accuracy) tmp = 'JSMA accuracy:'+ str(accuracy) + "\n" f_out.write(tmp) # 3 FGSM accuracy = model_eval(sess, x, y, preds_adv_fgsm, X_test, Y_test, args=eval_params) print('FGSM accuracy: %0.4f' % accuracy) tmp = 'FGSM accuracy:' + str(accuracy) + "\n" f_out.write(tmp) # 4 Base Iterative accuracy = model_eval(sess, x, y, preds_adv_base_iter, X_test, Y_test, args=eval_params) print('Base Iterative accuracy: %0.4f' % accuracy) tmp = 'Base Iterative accuracy:' + str(accuracy) + "\n" f_out.write(tmp) # 5 Elastic Net accuracy = model_eval(sess, x, y, preds_adv_elastic_net, X_test, Y_test, args=eval_params) print('Elastic Net accuracy: %0.4f' % accuracy) tmp = 'Elastic Net accuracy:' + str(accuracy) + "\n" f_out.write(tmp) # 6 DeepFool accuracy = model_eval(sess, x, y, preds_adv_deepfool, X_test, Y_test, args=eval_params) print('DeepFool accuracy: %0.4f' % accuracy) tmp = 'DeepFool accuracy:' + str(accuracy) + "\n" f_out.write(tmp) # 7 C & W Attack accuracy = model_eval(sess, x, y, preds_adv_cw, X_test, Y_test, args=eval_params) print('C & W accuracy: %0.4f' % accuracy) tmp = 'C & W accuracy:' + str(accuracy) + "\n" f_out.write(tmp) f_out.write("*******End of Epoch***********\n\n") # 8 Virtual Adversarial accuracy = model_eval(sess, x, y, preds_adv_vat, X_test, Y_test, args=eval_params) print('VAT accuracy: %0.4f' % accuracy) tmp = 'VAT accuracy:' + str(accuracy) + "\n" f_out.write(tmp) f_out.write("*******End of Epoch***********\n\n") print("*******End of Epoch***********\n\n") # report.adv_train_adv_eval = accuracy print("Now Adversarial Training with Elastic Net + modified X_train and Y_train") # trained_model.out train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': '/home/stephen/PycharmProjects/jsma-runall-mac/', 'filename': 'trained_model.out' } model_train(sess, x, y, preds_2, X_train_data, Y_train_data, predictions_adv=preds_adv_elastic_net, evaluate=evaluate_against_all, verbose=False, args=train_params, rng=rng) # Close TF session sess.close() return report
def JSMA_FGSM_BIM(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=6, batch_size=128, learning_rate=0.001, clean_train=True, testing=False, backprop_through_attack=False, nb_filters=64): """ MNIST cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param clean_train: if true, train on clean examples :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session sess = tf.Session() # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) source_samples = batch_size # Use label smoothing # Hopefully this doesn't screw up JSMA... assert Y_train.shape[1] == 10 label_smooth = .1 Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) y = tf.placeholder(tf.float32, shape=(None, 10)) model_path = "models/mnist" # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } eval_par = {'batch_size': batch_size} rng = np.random.RandomState([2017, 8, 30]) if clean_train: model = make_basic_cnn(nb_filters=nb_filters) preds = model.get_probs(x) def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test # examples eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params) report.clean_train_clean_eval = acc assert X_test.shape[0] == test_end - test_start, X_test.shape print('Test accuracy on legitimate examples: %0.4f' % acc) model_train(sess, x, y, preds, X_train, Y_train, evaluate=evaluate, args=train_params, rng=rng) print("#####Starting attacks on clean model#####") ################################################################# #Clean test against JSMA jsma_params = { 'theta': 1., 'gamma': 0.1, 'clip_min': 0., 'clip_max': 1., 'y_target': None } jsma = SaliencyMapMethod(model, back='tf', sess=sess) adv_x = jsma.generate(x, **jsma_params) preds_adv = model.get_probs(adv_x) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print('Clean test accuracy on JSMA adversarial examples: %0.4f' % acc) ################################################################ #Clean test against FGSM fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} fgsm = FastGradientMethod(model, sess=sess) adv_x = fgsm.generate(x, **fgsm_params) preds_adv = model.get_probs(adv_x) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print('Clean test accuracy on FGSM adversarial examples: %0.4f' % acc) ################################################################ #Clean test against BIM bim_params = { 'eps': 0.3, 'eps_iter': 0.01, 'nb_iter': 100, 'clip_min': 0., 'clip_max': 1. } bim = BasicIterativeMethod(model, sess=sess) adv_x = bim.generate(x, **bim_params) preds_adv = model.get_probs(adv_x) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print('Clean test accuracy on BIM adversarial examples: %0.4f' % acc) ################################################################ #Clean test against EN en_params = { 'binary_search_steps': 1, #'y': None, 'max_iterations': 100, 'learning_rate': 0.1, 'batch_size': source_samples, 'initial_const': 10 } en = ElasticNetMethod(model, back='tf', sess=sess) adv_x = en.generate(x, **en_params) preds_adv = model.get_probs(adv_x) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print('Clean test accuracy on EN adversarial examples: %0.4f' % acc) ################################################################ #Clean test against DF deepfool_params = { 'nb_candidate': 10, 'overshoot': 0.02, 'max_iter': 50, 'clip_min': 0., 'clip_max': 1. } deepfool = DeepFool(model, sess=sess) adv_x = deepfool.generate(x, **deepfool_params) preds_adv = model.get_probs(adv_x) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print('Clean test accuracy on DF adversarial examples: %0.4f' % acc) ################################################################ #Clean test against VAT vat_params = { 'eps': 2.0, 'num_iterations': 1, 'xi': 1e-6, 'clip_min': 0., 'clip_max': 1. } vat = VirtualAdversarialMethod(model, sess=sess) adv_x = vat.generate(x, **vat_params) preds_adv = model.get_probs(adv_x) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print('Clean test accuracy on VAT adversarial examples: %0.4f\n' % acc) ################################################################ print("Repeating the process, using adversarial training\n") # Redefine TF model graph model_2 = make_basic_cnn(nb_filters=nb_filters) preds_2 = model_2(x) ################################################################# #Adversarial test against JSMA jsma_params = { 'theta': 1., 'gamma': 0.1, 'clip_min': 0., 'clip_max': 1., 'y_target': None } jsma = SaliencyMapMethod(model, back='tf', sess=sess) adv_x = jsma.generate(x, **jsma_params) preds_adv_jsma = model.get_probs(adv_x) ################################################################ #Adversarial test against FGSM fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} fgsm = FastGradientMethod(model, sess=sess) adv_x = fgsm.generate(x, **fgsm_params) preds_adv_fgsm = model.get_probs(adv_x) ################################################################ #Adversarial test against BIM bim_params = { 'eps': 0.3, 'eps_iter': 0.01, 'nb_iter': 100, 'clip_min': 0., 'clip_max': 1. } bim = BasicIterativeMethod(model, sess=sess) adv_x = bim.generate(x, **bim_params) preds_adv_bim = model.get_probs(adv_x) ################################################################ #Adversarial test against EN en_params = { 'binary_search_steps': 5, #'y': None, 'max_iterations': 100, 'learning_rate': 0.1, 'batch_size': source_samples, 'initial_const': 10 } en = ElasticNetMethod(model, back='tf', sess=sess) adv_x = en.generate(x, **en_params) preds_adv_en = model.get_probs(adv_x) ################################################################ #Adversarial test against DF deepfool_params = { 'nb_candidate': 10, 'overshoot': 0.02, 'max_iter': 200, 'clip_min': 0., 'clip_max': 1. } deepfool = DeepFool(model, sess=sess) adv_x = deepfool.generate(x, **deepfool_params) preds_adv_df = model.get_probs(adv_x) ################################################################ #Adversarial test against VAT vat_params = { 'eps': 2.0, 'num_iterations': 1, 'xi': 1e-6, 'clip_min': 0., 'clip_max': 1. } vat = VirtualAdversarialMethod(model, sess=sess) adv_x = vat.generate(x, **vat_params) preds_adv_vat = model.get_probs(adv_x) ################################################################ print("#####Evaluate trained model#####") def evaluate_2(): # Evaluate the accuracy of the MNIST model on JSMA adversarial examples acc = model_eval(sess, x, y, preds_adv_jsma, X_test, Y_test, args=eval_par) print('Test accuracy on JSMA adversarial examples: %0.4f' % acc) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv_fgsm, X_test, Y_test, args=eval_par) print('Test accuracy on FGSM adversarial examples: %0.4f' % acc) # Evaluate the accuracy of the MNIST model on BIM adversarial examples acc = model_eval(sess, x, y, preds_adv_bim, X_test, Y_test, args=eval_par) print('Test accuracy on BIM adversarial examples: %0.4f' % acc) # Evaluate the accuracy of the MNIST model on EN adversarial examples acc = model_eval(sess, x, y, preds_adv_en, X_test, Y_test, args=eval_par) print('Test accuracy on EN adversarial examples: %0.4f' % acc) # Evaluate the accuracy of the MNIST model on DF adversarial examples acc = model_eval(sess, x, y, preds_adv_df, X_test, Y_test, args=eval_par) print('Test accuracy on DF adversarial examples: %0.4f' % acc) # Evaluate the accuracy of the MNIST model on VAT adversarial examples acc = model_eval(sess, x, y, preds_adv_vat, X_test, Y_test, args=eval_par) print('Test accuracy on VAT adversarial examples: %0.4f\n' % acc) preds_2_adv = [ preds_adv_jsma, preds_adv_fgsm, preds_adv_bim # ,preds_adv_en # ,preds_adv_df ] model_train(sess, x, y, preds_2, X_train, Y_train, predictions_adv=preds_2_adv, evaluate=evaluate_2, args=train_params, rng=rng)
class TestElasticNetMethod(CleverHansTest): def setUp(self): super(TestElasticNetMethod, self).setUp() self.sess = tf.Session() self.model = SimpleModel() self.attack = ElasticNetMethod(self.model, sess=self.sess) def test_generate_np_untargeted_gives_adversarial_example(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) x_adv = self.attack.generate_np(x_val, max_iterations=100, binary_search_steps=3, initial_const=1, clip_min=-5, clip_max=5, batch_size=10) orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue(np.mean(orig_labs == new_labs) < 0.1) def test_generate_np_targeted_gives_adversarial_example(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) feed_labs = np.zeros((100, 2)) feed_labs[np.arange(100), np.random.randint(0, 1, 100)] = 1 x_adv = self.attack.generate_np(x_val, max_iterations=100, binary_search_steps=3, initial_const=1, clip_min=-5, clip_max=5, batch_size=100, y_target=feed_labs) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue( np.mean(np.argmax(feed_labs, axis=1) == new_labs) > 0.9) def test_generate_gives_adversarial_example(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1) feed_labs = np.zeros((100, 2)) feed_labs[np.arange(100), orig_labs] = 1 x = tf.placeholder(tf.float32, x_val.shape) y = tf.placeholder(tf.float32, feed_labs.shape) x_adv_p = self.attack.generate(x, max_iterations=100, binary_search_steps=3, initial_const=1, clip_min=-5, clip_max=5, batch_size=100, y=y) self.assertEqual(x_val.shape, x_adv_p.shape) x_adv = self.sess.run(x_adv_p, {x: x_val, y: feed_labs}) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue(np.mean(orig_labs == new_labs) < 0.1) def test_generate_np_gives_clipped_adversarial_examples(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) x_adv = self.attack.generate_np(x_val, max_iterations=10, binary_search_steps=1, learning_rate=1e-3, initial_const=1, clip_min=-0.2, clip_max=0.3, batch_size=100) self.assertTrue(-0.201 < np.min(x_adv)) self.assertTrue(np.max(x_adv) < .301) def test_generate_np_high_confidence_targeted_examples(self): trivial_model = TrivialModel() for CONFIDENCE in [0, 2.3]: x_val = np.random.rand(10, 1) - .5 x_val = np.array(x_val, dtype=np.float32) feed_labs = np.zeros((10, 2)) feed_labs[np.arange(10), np.random.randint(0, 2, 10)] = 1 attack = CarliniWagnerL2(trivial_model, sess=self.sess) x_adv = attack.generate_np(x_val, max_iterations=100, binary_search_steps=2, learning_rate=1e-2, initial_const=1, clip_min=-10, clip_max=10, confidence=CONFIDENCE, y_target=feed_labs, batch_size=10) new_labs = self.sess.run(trivial_model.get_logits(x_adv)) good_labs = new_labs[np.arange(10), np.argmax(feed_labs, axis=1)] bad_labs = new_labs[np.arange(10), 1 - np.argmax(feed_labs, axis=1)] self.assertTrue( np.isclose(0, np.min(good_labs - (bad_labs + CONFIDENCE)), atol=1e-1)) self.assertTrue( np.mean( np.argmax(new_labs, axis=1) == np.argmax(feed_labs, axis=1)) > .9) def test_generate_np_high_confidence_untargeted_examples(self): trivial_model = TrivialModel() for CONFIDENCE in [0, 2.3]: x_val = np.random.rand(10, 1) - .5 x_val = np.array(x_val, dtype=np.float32) orig_labs = np.argmax(self.sess.run( trivial_model.get_logits(x_val)), axis=1) attack = CarliniWagnerL2(trivial_model, sess=self.sess) x_adv = attack.generate_np(x_val, max_iterations=100, binary_search_steps=2, learning_rate=1e-2, initial_const=1, clip_min=-10, clip_max=10, confidence=CONFIDENCE, batch_size=10) new_labs = self.sess.run(trivial_model.get_logits(x_adv)) good_labs = new_labs[np.arange(10), 1 - orig_labs] bad_labs = new_labs[np.arange(10), orig_labs] self.assertTrue( np.mean(np.argmax(new_labs, axis=1) == orig_labs) == 0) self.assertTrue( np.isclose(0, np.min(good_labs - (bad_labs + CONFIDENCE)), atol=1e-1))
def mnist_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=6, batch_size=128, learning_rate=0.001, clean_train=True, testing=False, backprop_through_attack=False, nb_filters=64): """ MNIST cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param clean_train: if true, train on clean examples :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session sess = tf.Session() # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) source_samples = batch_size # Use label smoothing # Hopefully this doesn't screw up JSMA... # assert Y_train.shape[1] == 10 # label_smooth = .1 # Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) y = tf.placeholder(tf.float32, shape=(None, 10)) model_path = "models/mnist" # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} rng = np.random.RandomState([2017, 8, 30]) if clean_train: model = make_basic_cnn(nb_filters=nb_filters) preds = model.get_probs(x) print("evaluate 1") def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test # examples eval_params = {'batch_size': batch_size} acc = model_eval( sess, x, y, preds, X_test, Y_test, args=eval_params) report.clean_train_clean_eval = acc assert X_test.shape[0] == test_end - test_start, X_test.shape print('Test accuracy on legitimate examples: %0.4f' % acc) model_train(sess, x, y, preds, X_train, Y_train, evaluate=evaluate, args=train_params, rng=rng) # Calculate training error if testing: eval_params = {'batch_size': batch_size} acc = model_eval( sess, x, y, preds, X_train, Y_train, args=eval_params) report.train_clean_train_clean_eval = acc # Initialize the Fast Gradient Sign Method (FGSM) attack object and # graph fgsm = FastGradientMethod(model, sess=sess) adv_x = fgsm.generate(x, **fgsm_params) preds_adv = model.get_probs(adv_x) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples eval_par = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print('Test accuracy on FGSM adversarial examples: %0.4f\n' % acc) # Calculate training error if testing: eval_par = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_adv, X_train, Y_train, args=eval_par) report.train_clean_train_adv_eval = acc ################################################################ # Init the Elastic Network Method attack object and graph en = ElasticNetMethod(model, back='tf', sess=sess) en_params = {'binary_search_steps': 1, #'y': None, 'max_iterations': 100, 'learning_rate': 0.1, 'batch_size': source_samples, 'initial_const': 10} adv_x_2 = en.generate(x, **en_params) preds_adv_2 = model.get_probs(adv_x_2) en_eval_params = {'batch_size': source_samples} # Evaluate the accuracy of the MNIST model on EN adversarial examples acc = model_eval(sess, x, y, preds_adv_2, X_test, Y_test, args=en_eval_params) print('Test accuracy on EN adversarial examples: %0.4f\n' % acc) ############################################################### # Calculate training error if testing: eval_par = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_adv, X_train, Y_train, args=eval_par) report.train_clean_train_adv_eval = acc print("Repeating the process, using adversarial training") # Redefine TF model graph model_2 = make_basic_cnn(nb_filters=nb_filters) preds_2 = model_2(x) fgsm2 = FastGradientMethod(model_2, sess=sess) adv_x_fgsm = fgsm2.generate(x, **fgsm_params) if not backprop_through_attack: # For the fgsm attack used in this tutorial, the attack has zero # gradient so enabling this flag does not change the gradient. # For some other attacks, enabling this flag increases the cost of # training, but gives the defender the ability to anticipate how # the atacker will change their strategy in response to updates to # the defender's parameters. adv_x_2 = tf.stop_gradient(adv_x_fgsm) preds_2_adv_fgsm = model_2(adv_x_fgsm) ########################################## en2 = ElasticNetMethod(model_2, back='tf',sess=sess) en_params = {'binary_search_steps': 1, #'y': None, 'max_iterations': 100, 'learning_rate': 0.1, 'batch_size': source_samples, 'initial_const': 10} adv_x_en = en2.generate(x, **en_params) preds_2_adv_en = model_2(adv_x_en) print("evaluate 2") def evaluate_2(): # evaluate the final result of the model eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds_2, X_test, Y_test, args=eval_params) print('Test accuracy on legitimate examples: %0.4f' % accuracy) # Accuracy of the adversarially trained model on FGSM adversarial examples accuracy = model_eval(sess, x, y, preds_2_adv_fgsm, X_test, Y_test, args=eval_params) print('Test accuracy on FGSM adversarial examples: %0.4f' % accuracy) # Accuracy of the adversarially trained model on EN Method adversarial examples en_eval_params = {'batch_size': source_samples} accuracy = model_eval(sess, x, y, preds_2_adv_en, X_test, Y_test, args=en_eval_params) print('Test accuracy on EN adversarial examples: %0.4f' % accuracy) # Perform and evaluate adversarial training # want to combine preds but can't figure out the data types... ??? # hope this training style works preds_2_adv = [preds_2_adv_fgsm, preds_2_adv_en] train_params = { 'nb_epochs': nb_epochs, 'batch_size': source_samples, 'learning_rate': learning_rate } model_train(sess, x, y, preds_2, X_train, Y_train, predictions_adv=[preds_2_adv_en],evaluate = evaluate_2, args=train_params, rng=rng) return report
class TestElasticNetMethod(CleverHansTest): def setUp(self): super(TestElasticNetMethod, self).setUp() import tensorflow as tf # The world's simplest neural network def my_model(x): W1 = tf.constant([[1.5, .3], [-2, 0.3]], dtype=tf.float32) h1 = tf.nn.sigmoid(tf.matmul(x, W1)) W2 = tf.constant([[-2.4, 1.2], [0.5, -2.3]], dtype=tf.float32) res = tf.matmul(h1, W2) return res self.sess = tf.Session() self.model = my_model self.attack = ElasticNetMethod(self.model, sess=self.sess) def test_generate_np_untargeted_gives_adversarial_example(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) x_adv = self.attack.generate_np(x_val, max_iterations=100, binary_search_steps=3, initial_const=1, clip_min=-5, clip_max=5, batch_size=10) orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue(np.mean(orig_labs == new_labs) < 0.1) def test_generate_np_targeted_gives_adversarial_example(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) feed_labs = np.zeros((100, 2)) feed_labs[np.arange(100), np.random.randint(0, 1, 100)] = 1 x_adv = self.attack.generate_np(x_val, max_iterations=100, binary_search_steps=3, initial_const=1, clip_min=-5, clip_max=5, batch_size=100, y_target=feed_labs) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue(np.mean(np.argmax(feed_labs, axis=1) == new_labs) > 0.9) def test_generate_gives_adversarial_example(self): import tensorflow as tf x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1) feed_labs = np.zeros((100, 2)) feed_labs[np.arange(100), orig_labs] = 1 x = tf.placeholder(tf.float32, x_val.shape) y = tf.placeholder(tf.float32, feed_labs.shape) x_adv_p = self.attack.generate(x, max_iterations=100, binary_search_steps=3, initial_const=1, clip_min=-5, clip_max=5, batch_size=100, y=y) x_adv = self.sess.run(x_adv_p, {x: x_val, y: feed_labs}) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue(np.mean(orig_labs == new_labs) < 0.1) def test_generate_np_gives_clipped_adversarial_examples(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) x_adv = self.attack.generate_np(x_val, max_iterations=10, binary_search_steps=1, learning_rate=1e-3, initial_const=1, clip_min=-0.2, clip_max=0.3, batch_size=100) self.assertTrue(-0.201 < np.min(x_adv)) self.assertTrue(np.max(x_adv) < .301) def test_generate_np_high_confidence_targeted_examples(self): import tensorflow as tf def trivial_model(x): W1 = tf.constant([[1, -1]], dtype=tf.float32) res = tf.matmul(x, W1) return res for CONFIDENCE in [0, 2.3]: x_val = np.random.rand(10, 1) - .5 x_val = np.array(x_val, dtype=np.float32) feed_labs = np.zeros((10, 2)) feed_labs[np.arange(10), np.random.randint(0, 2, 10)] = 1 attack = CarliniWagnerL2(trivial_model, sess=self.sess) x_adv = attack.generate_np(x_val, max_iterations=100, binary_search_steps=2, learning_rate=1e-2, initial_const=1, clip_min=-10, clip_max=10, confidence=CONFIDENCE, y_target=feed_labs, batch_size=10) new_labs = self.sess.run(trivial_model(x_adv)) good_labs = new_labs[np.arange(10), np.argmax(feed_labs, axis=1)] bad_labs = new_labs[np.arange( 10), 1 - np.argmax(feed_labs, axis=1)] self.assertTrue(np.isclose( 0, np.min(good_labs - (bad_labs + CONFIDENCE)), atol=1e-1)) self.assertTrue(np.mean(np.argmax(new_labs, axis=1) == np.argmax(feed_labs, axis=1)) > .9) def test_generate_np_high_confidence_untargeted_examples(self): import tensorflow as tf def trivial_model(x): W1 = tf.constant([[1, -1]], dtype=tf.float32) res = tf.matmul(x, W1) return res for CONFIDENCE in [0, 2.3]: x_val = np.random.rand(10, 1) - .5 x_val = np.array(x_val, dtype=np.float32) orig_labs = np.argmax(self.sess.run(trivial_model(x_val)), axis=1) attack = CarliniWagnerL2(trivial_model, sess=self.sess) x_adv = attack.generate_np(x_val, max_iterations=100, binary_search_steps=2, learning_rate=1e-2, initial_const=1, clip_min=-10, clip_max=10, confidence=CONFIDENCE, batch_size=10) new_labs = self.sess.run(trivial_model(x_adv)) good_labs = new_labs[np.arange(10), 1 - orig_labs] bad_labs = new_labs[np.arange(10), orig_labs] self.assertTrue(np.mean(np.argmax(new_labs, axis=1) == orig_labs) == 0) self.assertTrue(np.isclose( 0, np.min(good_labs - (bad_labs + CONFIDENCE)), atol=1e-1))
def evaluate_ch(model, config, sess, norm='l1', bound=None, verbose=True): dataset = config['data'] num_eval_examples = config['num_eval_examples'] eval_batch_size = config['eval_batch_size'] if dataset == "mnist": from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets('MNIST_data', one_hot=False) X = mnist.test.images[0:num_eval_examples, :].reshape(-1, 28, 28, 1) Y = mnist.test.labels[0:num_eval_examples] x_image = tf.placeholder(tf.float32, shape=[None, 28, 28, 1]) else: import cifar10_input data_path = config["data_path"] cifar = cifar10_input.CIFAR10Data(data_path) X = cifar.eval_data.xs[0:num_eval_examples, :].astype(np.float32) / 255.0 Y = cifar.eval_data.ys[0:num_eval_examples] x_image = tf.placeholder(tf.float32, shape=[None, 32, 32, 3]) assert norm == 'l1' if norm=='l2': attack = CarliniWagnerL2(model, sess) params = {'batch_size': eval_batch_size, 'binary_search_steps': 9} else: attack = ElasticNetMethod(model, sess, clip_min=0.0, clip_max=1.0) params = {'beta': 1e-2, 'decision_rule': 'L1', 'batch_size': eval_batch_size, 'learning_rate': 1e-2, 'max_iterations': 1000} if verbose: set_log_level(logging.DEBUG, name="cleverhans") y = tf.placeholder(tf.int64, shape=[None, 10]) params['y'] = y adv_x = attack.generate(x_image, **params) preds_adv = model.get_predicted_class(adv_x) preds_nat = model.get_predicted_class(x_image) all_preds, all_preds_adv, all_adv_x = batch_eval( sess, [x_image, y], [preds_nat, preds_adv, adv_x], [X, one_hot(Y, 10)], batch_size=eval_batch_size) print('acc nat', np.mean(all_preds == Y)) print('acc adv', np.mean(all_preds_adv == Y)) if dataset == "cifar10": X *= 255.0 all_adv_x *= 255.0 if norm == 'l2': lps = np.sqrt(np.sum(np.square(all_adv_x - X), axis=(1,2,3))) else: lps = np.sum(np.abs(all_adv_x - X), axis=(1,2,3)) print('mean lp: ', np.mean(lps)) for b in [bound, bound/2.0, bound/4.0, bound/8.0]: print('lp={}, acc={}'.format(b, np.mean((all_preds_adv == Y) | (lps > b)))) all_corr_adv = (all_preds_adv == Y) all_corr_nat = (all_preds == Y) return all_corr_nat, all_corr_adv, lps
class TestElasticNetMethod(CleverHansTest): def setUp(self): super(TestElasticNetMethod, self).setUp() self.sess = tf.Session() self.model = SimpleModel() self.attack = ElasticNetMethod(self.model, sess=self.sess) def test_generate_np_untargeted_gives_adversarial_example(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) x_adv = self.attack.generate_np(x_val, max_iterations=100, binary_search_steps=3, initial_const=1, clip_min=-5, clip_max=5, batch_size=10) orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue(np.mean(orig_labs == new_labs) < 0.1) def test_generate_np_targeted_gives_adversarial_example(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) feed_labs = np.zeros((100, 2)) feed_labs[np.arange(100), np.random.randint(0, 1, 100)] = 1 x_adv = self.attack.generate_np(x_val, max_iterations=100, binary_search_steps=3, initial_const=1, clip_min=-5, clip_max=5, batch_size=100, y_target=feed_labs) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue(np.mean(np.argmax(feed_labs, axis=1) == new_labs) > 0.9) def test_generate_gives_adversarial_example(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1) feed_labs = np.zeros((100, 2)) feed_labs[np.arange(100), orig_labs] = 1 x = tf.placeholder(tf.float32, x_val.shape) y = tf.placeholder(tf.float32, feed_labs.shape) x_adv_p = self.attack.generate(x, max_iterations=100, binary_search_steps=3, initial_const=1, clip_min=-5, clip_max=5, batch_size=100, y=y) x_adv = self.sess.run(x_adv_p, {x: x_val, y: feed_labs}) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue(np.mean(orig_labs == new_labs) < 0.1) def test_generate_np_gives_clipped_adversarial_examples(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) x_adv = self.attack.generate_np(x_val, max_iterations=10, binary_search_steps=1, learning_rate=1e-3, initial_const=1, clip_min=-0.2, clip_max=0.3, batch_size=100) self.assertTrue(-0.201 < np.min(x_adv)) self.assertTrue(np.max(x_adv) < .301) def test_generate_np_high_confidence_targeted_examples(self): trivial_model = TrivialModel() for CONFIDENCE in [0, 2.3]: x_val = np.random.rand(10, 1) - .5 x_val = np.array(x_val, dtype=np.float32) feed_labs = np.zeros((10, 2)) feed_labs[np.arange(10), np.random.randint(0, 2, 10)] = 1 attack = CarliniWagnerL2(trivial_model, sess=self.sess) x_adv = attack.generate_np(x_val, max_iterations=100, binary_search_steps=2, learning_rate=1e-2, initial_const=1, clip_min=-10, clip_max=10, confidence=CONFIDENCE, y_target=feed_labs, batch_size=10) new_labs = self.sess.run(trivial_model.get_logits(x_adv)) good_labs = new_labs[np.arange(10), np.argmax(feed_labs, axis=1)] bad_labs = new_labs[np.arange( 10), 1 - np.argmax(feed_labs, axis=1)] self.assertTrue(np.isclose( 0, np.min(good_labs - (bad_labs + CONFIDENCE)), atol=1e-1)) self.assertTrue(np.mean(np.argmax(new_labs, axis=1) == np.argmax(feed_labs, axis=1)) > .9) def test_generate_np_high_confidence_untargeted_examples(self): trivial_model = TrivialModel() for CONFIDENCE in [0, 2.3]: x_val = np.random.rand(10, 1) - .5 x_val = np.array(x_val, dtype=np.float32) orig_labs = np.argmax(self.sess.run(trivial_model.get_logits(x_val)), axis=1) attack = CarliniWagnerL2(trivial_model, sess=self.sess) x_adv = attack.generate_np(x_val, max_iterations=100, binary_search_steps=2, learning_rate=1e-2, initial_const=1, clip_min=-10, clip_max=10, confidence=CONFIDENCE, batch_size=10) new_labs = self.sess.run(trivial_model.get_logits(x_adv)) good_labs = new_labs[np.arange(10), 1 - orig_labs] bad_labs = new_labs[np.arange(10), orig_labs] self.assertTrue(np.mean(np.argmax(new_labs, axis=1) == orig_labs) == 0) self.assertTrue(np.isclose( 0, np.min(good_labs - (bad_labs + CONFIDENCE)), atol=1e-1))