def __init__(self, model, step_size_iter=0.05, max_perturbation=0.3, n_iterations=10, targeted=False, norm_order=np.inf, rand_init=None, rand_minmax=0.3, clip_min=None, clip_max=None, sanity_checks=True): super().__init__(model=model, clip_min=clip_min, clip_max=clip_max) self._targeted = targeted self._step_size_iter = step_size_iter self._max_perturbation = max_perturbation self._n_iterations = n_iterations self._norm_order = norm_order self._rand_init = rand_init self._rand_minmax = rand_minmax self._sanity_checks = sanity_checks with self.graph.as_default(): self._method = BasicIterativeMethod( self._model, sess=self.session, eps=self._max_perturbation, eps_iter=self._step_size_iter, nb_iter=self._n_iterations, ord=self._norm_order, clip_min=self._clip_min, clip_max=self._clip_max, rand_init=self._rand_init, sanity_checks=self._sanity_checks)
def main(_): # Images for inception classifier are normalized to be in [-1, 1] interval, # eps is a difference between pixels so it should be in [0, 2] interval. # Renormalizing epsilon from [0, 255] to [0, 2]. eps = 2.0 * FLAGS.max_epsilon / 255.0 batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3] num_classes = 1001 tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): # Prepare graph x_input = tf.placeholder(tf.float32, shape=batch_shape) model = InceptionModel(num_classes) fgsm = BasicIterativeMethod(model) x_adv = fgsm.generate(x_input, eps=eps, clip_min=-1., clip_max=1.) # Run computation saver = tf.train.Saver(slim.get_model_variables()) session_creator = tf.train.ChiefSessionCreator( scaffold=tf.train.Scaffold(saver=saver), checkpoint_filename_with_path=FLAGS.checkpoint_path, master=FLAGS.master) with tf.train.MonitoredSession( session_creator=session_creator) as sess: for filenames, images in load_images(FLAGS.input_dir, batch_shape): adv_images = sess.run(x_adv, feed_dict={x_input: images}) save_images(adv_images, filenames, FLAGS.output_dir)
def backtracking(sess, x, y, model, x_test, y_test, params, batch_size=128): tf.set_random_seed(1822) set_log_level(logging.DEBUG) from cleverhans.attacks import BasicIterativeMethod method = BasicIterativeMethod(model, sess=sess) adv_x = method.generate(x, **params) num_batch = x_test.shape[0] // batch_size adv_imgs = [] for i in range(num_batch): if (i + 1) * batch_size >= x_test.shape[0]: adv_imgs.append( sess.run(adv_x, feed_dict={ x: x_test[i * batch_size:], y: y_test[i * batch_size:] })) else: adv_imgs.append( sess.run(adv_x, feed_dict={ x: x_test[i * batch_size:(i + 1) * batch_size], y: y_test[i * batch_size:(i + 1) * batch_size] })) adv_imgs = np.concatenate(adv_imgs, axis=0) return adv_imgs
def fgsm_combo(): acc = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_par) print('Test accuracy on legitimate examples: %0.4f\n' % acc) fgsm = FastGradientMethod(model, sess=sess) #initialize_uninitialized_global_variables(sess) adv_x = fgsm.generate(x, **fgsm_params) preds_adv = model.get_probs(adv_x) acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print( 'Test accuracy on adversarial examples generated by fgsm: %0.4f\n' % acc) bim = BasicIterativeMethod(model, sess=sess) adv_x = bim.generate(x) preds_adv = model.get_probs(adv_x) acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print( 'Test accuracy on adversarial examples generated by IterativeMethod: %0.4f\n' % acc)
class TestBasicIterativeMethod(TestFastGradientMethod): def setUp(self): super(TestBasicIterativeMethod, self).setUp() import tensorflow as tf # The world's simplest neural network def my_model(x): W1 = tf.constant([[1.5, .3], [-2, 0.3]], dtype=tf.float32) h1 = tf.nn.sigmoid(tf.matmul(x, W1)) W2 = tf.constant([[-2.4, 1.2], [0.5, -2.3]], dtype=tf.float32) res = tf.nn.softmax(tf.matmul(x, W2)) return res self.sess = tf.Session() self.model = my_model self.attack = BasicIterativeMethod(self.model, sess=self.sess) def test_generate_np_does_not_cache_graph_computation_for_nb_iter(self): import tensorflow as tf x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) x_adv = self.attack.generate_np(x_val, eps=1.0, ord=np.inf, clip_min=-5.0, clip_max=5.0, nb_iter=10) orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue(np.mean(orig_labs == new_labs) < 0.1) ok = [False] old_grads = tf.gradients def fn(*x, **y): ok[0] = True return old_grads(*x, **y) tf.gradients = fn x_adv = self.attack.generate_np(x_val, eps=1.0, ord=np.inf, clip_min=-5.0, clip_max=5.0, nb_iter=11) orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue(np.mean(orig_labs == new_labs) < 0.1) tf.gradients = old_grads self.assertTrue(ok[0])
class TestBasicIterativeMethod(TestFastGradientMethod): def setUp(self): super(TestBasicIterativeMethod, self).setUp() self.sess = tf.Session() self.model = SimpleModel() self.attack = BasicIterativeMethod(self.model, sess=self.sess) def test_attack_strength(self): """ If clipping is not done at each iteration (not passing clip_min and clip_max to fgm), this attack fails by np.mean(orig_labels == new_labels) == .39. """ x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) x_adv = self.attack.generate_np(x_val, eps=1.0, ord=np.inf, clip_min=0.5, clip_max=0.7, nb_iter=5) orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue(np.mean(orig_labs == new_labs) < 0.1) def test_generate_np_does_not_cache_graph_computation_for_nb_iter(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) x_adv = self.attack.generate_np(x_val, eps=1.0, ord=np.inf, clip_min=-5.0, clip_max=5.0, nb_iter=10) orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue(np.mean(orig_labs == new_labs) < 0.1) ok = [False] old_grads = tf.gradients def fn(*x, **y): ok[0] = True return old_grads(*x, **y) tf.gradients = fn x_adv = self.attack.generate_np(x_val, eps=1.0, ord=np.inf, clip_min=-5.0, clip_max=5.0, nb_iter=11) orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue(np.mean(orig_labs == new_labs) < 0.1) tf.gradients = old_grads self.assertTrue(ok[0])
def main(argv): checkpoint = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) if checkpoint is None: raise ValueError("Couldn't find latest checkpoint in " + FLAGS.checkpoint_dir) train_start = 0 train_end = 60000 test_start = 0 test_end = 10000 X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) assert Y_train.shape[1] == 10 # NOTE: for compatibility with Madry Lab downloadable checkpoints, # we cannot enclose this in a scope or do anything else that would # change the automatic naming of the variables. model = MadryMNIST() x_input = tf.placeholder(tf.float32, shape=[None, 784]) x_image = tf.placeholder(tf.float32, shape=[None, 28, 28, 1]) y = tf.placeholder(tf.float32, shape=[None, 10]) if FLAGS.attack_type == 'fgsm': fgsm = FastGradientMethod(model) fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} adv_x = fgsm.generate(x_image, **fgsm_params) elif FLAGS.attack_type == 'bim': bim = BasicIterativeMethod(model) bim_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1., 'nb_iter': 50, 'eps_iter': .01} adv_x = bim.generate(x_image, **bim_params) else: raise ValueError(FLAGS.attack_type) preds_adv = model.get_probs(adv_x) saver = tf.train.Saver() with tf.Session() as sess: # Restore the checkpoint saver.restore(sess, checkpoint) # Evaluate the accuracy of the MNIST model on adversarial examples eval_par = {'batch_size': FLAGS.batch_size} t1 = time.time() acc = model_eval( sess, x_image, y, preds_adv, X_test, Y_test, args=eval_par) t2 = time.time() print("Took", t2 - t1, "seconds") print('Test accuracy on adversarial examples: %0.4f\n' % acc)
def main(argv): checkpoint = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) if checkpoint is None: raise ValueError("Couldn't load checkpoint") train_start = 0 train_end = 60000 test_start = 0 test_end = 10000 X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) assert Y_train.shape[1] == 10 # NOTE: for compatibility with Madry Lab downloadable checkpoints, # we cannot enclose this in a scope or do anything else that would # change the automatic naming of the variables. model = MadryMNIST() x_input = tf.placeholder(tf.float32, shape=[None, 784]) x_image = tf.placeholder(tf.float32, shape=[None, 28, 28, 1]) y = tf.placeholder(tf.float32, shape=[None, 10]) if FLAGS.attack_type == 'fgsm': fgsm = FastGradientMethod(model) fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} adv_x = fgsm.generate(x_image, **fgsm_params) elif FLAGS.attack_type == 'bim': bim = BasicIterativeMethod(model) bim_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1., 'nb_iter': 50, 'eps_iter': .01} adv_x = bim.generate(x_image, **bim_params) else: raise ValueError(FLAGS.attack_type) preds_adv = model.get_probs(adv_x) saver = tf.train.Saver() with tf.Session() as sess: # Restore the checkpoint saver.restore(sess, checkpoint) # Evaluate the accuracy of the MNIST model on adversarial examples eval_par = {'batch_size': FLAGS.batch_size} t1 = time.time() acc = model_eval( sess, x_image, y, preds_adv, X_test, Y_test, args=eval_par) t2 = time.time() print("Took", t2 - t1, "seconds") print('Test accuracy on adversarial examples: %0.4f\n' % acc)
def bim_attack(train_data, model, sess): wrap = KerasModelWrapper(model) bim = BasicIterativeMethod(wrap, sess=sess) bim_params = { 'eps_iter': 0.01, 'nb_iter': 10, 'clip_min': 0., 'clip_max': 1. } adv_x = bim.generate_np(train_data, **bim_params) return adv_x
def generate_bim_examples(sess, model, x, y, X, Y, attack_params, verbose, attack_log_fpath): """ Untargeted attack. Y is not needed. """ bim = BasicIterativeMethod(model, back='tf', sess=sess) bim_params = {'eps': 0.1, 'eps_iter':0.05, 'nb_iter':10, 'y':y, #y is y for untargeted, y is Y for targeted 'ord':np.inf, 'clip_min':0, 'clip_max':1} bim_params = override_params(bim_params, attack_params) X_adv = bim.generate_np(X, **bim_params) return X_adv
def bim_attack(train_data,model,sess): adv_x = [] wrap = KerasModelWrapper(model) bim = BasicIterativeMethod(wrap, sess=sess) bim_params = {'eps_iter': 0.01, 'nb_iter': 10, 'clip_min': 0., 'clip_max': 1.} for i in range(train_data.shape[0]//100): if i == 0: adv_x = bim.generate_np(train_data[i*100:(i+1)*100], **bim_params) else: adv_x = np.concatenate((adv_x,bim.generate_np(train_data[i*100:(i+1)*100], **bim_params))) return adv_x
def setUp(self): super(TestBasicIterativeMethod, self).setUp() import tensorflow as tf # The world's simplest neural network def my_model(x): W1 = tf.constant([[1.5, .3], [-2, 0.3]], dtype=tf.float32) h1 = tf.nn.sigmoid(tf.matmul(x, W1)) W2 = tf.constant([[-2.4, 1.2], [0.5, -2.3]], dtype=tf.float32) res = tf.nn.softmax(tf.matmul(h1, W2)) return res self.sess = tf.Session() self.model = my_model self.attack = BasicIterativeMethod(self.model, sess=self.sess)
class BIMAttack(AdversarialAttack): def __init__(self, model, step_size_iter=0.05, max_perturbation=0.3, n_iterations=10, targeted=False, norm_order=np.inf, rand_init=None, rand_minmax=0.3, clip_min=None, clip_max=None, sanity_checks=True): super().__init__(model=model, clip_min=clip_min, clip_max=clip_max) self._targeted = targeted self._step_size_iter = step_size_iter self._max_perturbation = max_perturbation self._n_iterations = n_iterations self._norm_order = norm_order self._rand_init = rand_init self._rand_minmax = rand_minmax self._sanity_checks = sanity_checks with self.graph.as_default(): self._method = BasicIterativeMethod( self._model, sess=self.session, eps=self._max_perturbation, eps_iter=self._step_size_iter, nb_iter=self._n_iterations, ord=self._norm_order, clip_min=self._clip_min, clip_max=self._clip_max, rand_init=self._rand_init, sanity_checks=self._sanity_checks) def attack_method(self, labels): if labels is not None: if self._targeted: return self._method.generate(x=self._x_clean, y_target=labels, rand_minmax=self._rand_minmax) else: return self._method.generate(x=self._x_clean, y=labels, rand_minmax=self._rand_minmax) return self._method.generate(x=self._x_clean, rand_minmax=self._rand_minmax)
def get_BIM_samples(loaded_model, samples, nb_iter): sess = backend.get_session() wrap = KerasModelWrapper(loaded_model) bim = BasicIterativeMethod(wrap, sess=sess) bim_params = { 'eps_iter': 0.05, 'nb_iter': nb_iter, 'clip_min': 0., 'clip_max': 1. } adv_x = bim.generate_np(samples, **bim_params) return adv_x
def generate_bim_examples(sess, model, x, y, X, Y, attack_params, verbose, attack_log_fpath): """ Untargeted attack. Y is not needed. """ bim = BasicIterativeMethod(KerasModelWrapper(model), back='tf', sess=sess) bim_params = { 'eps': 0.1, 'eps_iter': 0.05, 'nb_iter': 10, 'clip_min': 0, 'clip_max': 1 } X_adv = bim.generate_np(X, **bim_params) return X_adv
def backtracking(sess, x, model, x_test, params, batch_size=128): from cleverhans.attacks import BasicIterativeMethod method = BasicIterativeMethod(model, sess=sess) adv_x = method.generate(x, **params) num_batch = x_test.shape[0] // batch_size adv_imgs = [] for i in range(num_batch): if i + 1 == num_batch: x_feed = x_test[i*batch_size:] else: x_feed = x_test[i*batch_size:(i+1)*batch_size] adv_img = sess.run(adv_x, feed_dict={x: x_feed}) adv_imgs.append(adv_img) adv_imgs = np.concatenate(adv_imgs, axis=0) return adv_imgs
def evaluate_checkpoint(filename): if attack_method == 'BIM': bim = BasicIterativeMethod(model) bim_params = { 'eps': 0.3, 'clip_min': 0., 'clip_max': 1., 'nb_iter': 50, 'eps_iter': .01 } adv_x = bim.generate(x_image, **bim_params) elif attack_method == 'FGM': FGM_attack = FastGradientMethod(model) FGM_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} adv_x = FGM_attack.generate(x_image, **FGM_params) elif attack_method == 'PGD': pgd = ProjectedGradientDescent(model) pgd_params = { 'eps': 0.09, 'clip_min': 0., 'clip_max': 1., 'nb_iter': 40, 'eps_iter': .01 } adv_x = pgd.generate(x_image, **pgd_params) preds_adv = model.get_probs(adv_x) with tf.Session() as sess: # Restore the checkpoint saver = tf.train.Saver(var_list=model.all_variables) saver.restore(sess, filename) eval_par = {'batch_size': batch_size} t1 = time.time() acc = model_eval(sess, x_image, y, preds_adv, X_test, Y_test, args=eval_par) t2 = time.time() print("Took", t2 - t1, "seconds") print('Test accuracy on adversarial examples: %0.4f\n' % acc)
def adversarial_training(epsilon=0.3, eps_iter=0.05, nb_iter=10, order=np.inf): bim2 = BasicIterativeMethod(wrap_2, sess=sess) preds_2_adv = model_2(bim2.generate(x, **fgsm_params)) def evaluate_2(): # Accuracy of adversarially trained model on legitimate test inputs eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds_2, X_test, Y_test, args=eval_params) print('Test accuracy on legitimate examples: %0.4f' % accuracy) report.adv_train_clean_eval = accuracy # Accuracy of the adversarially trained model on adversarial examples accuracy = model_eval(sess, x, y, preds_2_adv, X_test, Y_test, args=eval_params) print('Test accuracy on adversarial examples: %0.4f' % accuracy) report.adv_train_adv_eval = accuracy # Perform and evaluate adversarial training model_train(sess, x, y, preds_2, X_train, Y_train, predictions_adv=preds_2_adv, evaluate=evaluate_2, args=train_params, save=False, rng=rng)
def _BIM(self): bim_attack = BasicIterativeMethod(self.wrapped_model, sess=self.sess) eps = 0 if self.dataset == 'MNIST': for _ in range(5): eps = eps + 0.1 params = { 'eps': eps, 'eps_iter': eps / 10, 'nb_iter': 10, 'y': self.y, 'clip_min': 0., 'clip_max': 1. } adv_x = bim_attack.generate(self.x, **params) adv_x = tf.stop_gradient(adv_x) print(f'Epsilon: {eps}') self.out_file.write(f'Epsilon: {eps}\n') self.save_images(adv_x, self.save_loc + f'_e{eps}') if self.dataset == 'CIFAR10': for _ in range(10): eps = eps + 1 params = { 'eps': eps / 255, 'eps_iter': eps / 255 / 10, 'nb_iter': 10, 'y': self.y, 'clip_min': 0., 'clip_max': 1. } adv_x = bim_attack.generate(self.x, **params) adv_x = tf.stop_gradient(adv_x) print(f'Epsilon: {eps}') self.out_file.write(f'Epsilon: {eps}\n') self.save_images(adv_x, self.save_loc + f'_e{eps}')
def get_adversarial_attack_and_params(attack_name, wrap, sess): params = None stop_gradient = False if attack_name == "fgsm": attack = FastGradientMethod(wrap, sess=sess) params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} stop_gradient = True if attack_name == "deepfool": attack = DeepFool(wrap, sess=sess) if attack_name == "lbfgs": attack = LBFGS(wrap, sess=sess) if attack_name == "saliency": attack = SaliencyMapMethod(wrap, sess=sess) if attack_name == "bim": attack = BasicIterativeMethod(wrap, sess=sess) return attack, params, stop_gradient
def next_images(): tf.logging.set_verbosity(tf.logging.INFO) graph_fgsm = tf.Graph() print("{} generator graph is ready!".format(mode)) with graph_fgsm.as_default(): x_input = tf.placeholder(tf.float32, shape=batch_shape) model = InceptionModelProb(importer.num_classes, x_input) params = {'eps': eps} if mode == 'fgsm': graph = FastGradientMethod(model) elif mode == 'ifgsm': params['nb_iter'] = 10 graph = BasicIterativeMethod(model) else: raise Exception("Not supported mode") print('graph params: {}'.format(params)) x_adv = graph.generate(x_input, **params) saver = tf.train.Saver(slim.get_model_variables()) session_creator = tf.train.ChiefSessionCreator( scaffold=tf.train.Scaffold(saver=saver), checkpoint_filename_with_path=importer.checkpoint_path, master=importer.tensorflow_master) image_iterator = importer.load_images_generator(batch_shape) with tf.train.MonitoredSession( session_creator=session_creator) as sess: while True: filenames, images = next(image_iterator, (None, None)) if filenames is None: break adversarial_images = sess.run(x_adv, feed_dict={x_input: images}) #print("Image:{}, diff:{}".format(filenames[0],np.sum(np.abs(images[0]-adversarial_images[0])))) if is_return_orig_images: yield filenames, adversarial_images, images else: yield filenames, adversarial_images
def mnist_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=6, batch_size=128, epsilon=0.3, learning_rate=0.001, train_dir="/tmp", filename="mnist.ckpt", load_model=False, testing=False): """ MNIST CleverHans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param train_dir: Directory storing the saved model :param filename: Filename to save model under :param load_model: True for load, False for not load :param testing: if true, test error is calculated :return: an AccuracyReport object """ keras.layers.core.K.set_learning_phase(0) # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) if not hasattr(backend, "tf"): raise RuntimeError("This tutorial requires keras to be configured" " to use the TensorFlow backend.") if keras.backend.image_dim_ordering() != 'tf': keras.backend.set_image_dim_ordering('tf') print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to " "'th', temporarily setting to 'tf'") # Create TF session and set as Keras backend session sess = tf.Session() keras.backend.set_session(sess) # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) # Use label smoothing assert Y_train.shape[1] == 10 label_smooth = .1 Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) y = tf.placeholder(tf.float32, shape=(None, 10)) # Define TF model graph model = cnn_model_BIM() preds = model(x) print("Defined TensorFlow model graph.") def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params) report.clean_train_clean_eval = acc assert X_test.shape[0] == test_end - test_start, X_test.shape print('Test accuracy on legitimate examples: %0.4f' % acc) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': train_dir, 'filename': filename } ckpt = tf.train.get_checkpoint_state(train_dir) ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path rng = np.random.RandomState([2017, 8, 30]) if load_model and ckpt_path: saver = tf.train.Saver() saver.restore(sess, ckpt_path) print("Model loaded from: {}".format(ckpt_path)) evaluate() else: print("Model was not loaded, training from scratch.") model_train(sess, x, y, preds, X_train, Y_train, evaluate=evaluate, args=train_params, save=False, rng=rng) # Calculate training error if testing: eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, X_train, Y_train, args=eval_params) report.train_clean_train_clean_eval = acc # Initialize the Fast Gradient Sign Method (FGSM) attack object and graph wrap = KerasModelWrapper(model) print("FastGradientMethod") fgsm1 = FastGradientMethod(wrap, sess=sess) for epsilon in [0.005, 0.01, 0.05, 0.1, 0.5, 1.0]: print("Epsilon =", epsilon), fgsm_params = {'eps': epsilon, 'clip_min': None, 'clip_max': None} adv_x = fgsm1.generate(x, **fgsm_params) # Consider the attack to be constant adv_x = tf.stop_gradient(adv_x) preds_adv = model(adv_x) # Evaluate the accuracy of the MNIST model on adversarial examples eval_par = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print('Test accuracy on adversarial examples: %0.4f\n' % acc) report.clean_train_adv_eval = acc print("BasicIterativeMethod") bim = BasicIterativeMethod(wrap, sess=sess) for epsilon, order in zip( [0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 0.5, 1.0], [np.inf, np.inf, np.inf, np.inf, np.inf, np.inf, 2, 2]): print("Epsilon =", epsilon), fgsm_params = { 'eps': epsilon, 'clip_min': 0., 'clip_max': 1., 'ord': order } adv_x = bim.generate(x, **fgsm_params) # Consider the attack to be constant adv_x = tf.stop_gradient(adv_x) preds_adv = model(adv_x) # Evaluate the accuracy of the MNIST model on adversarial examples eval_par = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print('Test accuracy on adversarial examples: %0.4f\n' % acc) report.clean_train_adv_eval = acc # Calculating train error if testing: eval_par = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_adv, X_train, Y_train, args=eval_par) report.train_clean_train_adv_eval = acc return print("Repeating the process, using adversarial training") # Redefine TF model graph model_2 = cnn_model() preds_2 = model_2(x) wrap_2 = KerasModelWrapper(model_2) #fgsm2 = FastGradientMethod(wrap_2, sess=sess) bim2 = BasicIterativeMethod(wrap_2, sess=sess) preds_2_adv = model_2(bim2.generate(x, **fgsm_params)) def evaluate_2(): # Accuracy of adversarially trained model on legitimate test inputs eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds_2, X_test, Y_test, args=eval_params) print('Test accuracy on legitimate examples: %0.4f' % accuracy) report.adv_train_clean_eval = accuracy # Accuracy of the adversarially trained model on adversarial examples accuracy = model_eval(sess, x, y, preds_2_adv, X_test, Y_test, args=eval_params) print('Test accuracy on adversarial examples: %0.4f' % accuracy) report.adv_train_adv_eval = accuracy # Perform and evaluate adversarial training model_train(sess, x, y, preds_2, X_train, Y_train, predictions_adv=preds_2_adv, evaluate=evaluate_2, args=train_params, save=False, rng=rng) # Calculate training errors if testing: eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds_2, X_train, Y_train, args=eval_params) report.train_adv_train_clean_eval = accuracy accuracy = model_eval(sess, x, y, preds_2_adv, X_train, Y_train, args=eval_params) report.train_adv_train_adv_eval = accuracy return report
def JSMA_FGSM_BIM(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=6, batch_size=128, learning_rate=0.001, clean_train=True, testing=False, backprop_through_attack=False, nb_filters=64): """ MNIST cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param clean_train: if true, train on clean examples :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session sess = tf.Session() # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) source_samples = batch_size # Use label smoothing # Hopefully this doesn't screw up JSMA... assert Y_train.shape[1] == 10 label_smooth = .1 Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) y = tf.placeholder(tf.float32, shape=(None, 10)) model_path = "models/mnist" # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } eval_par = {'batch_size': batch_size} rng = np.random.RandomState([2017, 8, 30]) if clean_train: model = make_basic_cnn(nb_filters=nb_filters) preds = model.get_probs(x) def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test # examples eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params) report.clean_train_clean_eval = acc assert X_test.shape[0] == test_end - test_start, X_test.shape print('Test accuracy on legitimate examples: %0.4f' % acc) model_train(sess, x, y, preds, X_train, Y_train, evaluate=evaluate, args=train_params, rng=rng) print("#####Starting attacks on clean model#####") ################################################################# #Clean test against JSMA jsma_params = { 'theta': 1., 'gamma': 0.1, 'clip_min': 0., 'clip_max': 1., 'y_target': None } jsma = SaliencyMapMethod(model, back='tf', sess=sess) adv_x = jsma.generate(x, **jsma_params) preds_adv = model.get_probs(adv_x) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print('Clean test accuracy on JSMA adversarial examples: %0.4f' % acc) ################################################################ #Clean test against FGSM fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} fgsm = FastGradientMethod(model, sess=sess) adv_x = fgsm.generate(x, **fgsm_params) preds_adv = model.get_probs(adv_x) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print('Clean test accuracy on FGSM adversarial examples: %0.4f' % acc) ################################################################ #Clean test against BIM bim_params = { 'eps': 0.3, 'eps_iter': 0.01, 'nb_iter': 100, 'clip_min': 0., 'clip_max': 1. } bim = BasicIterativeMethod(model, sess=sess) adv_x = bim.generate(x, **bim_params) preds_adv = model.get_probs(adv_x) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print('Clean test accuracy on BIM adversarial examples: %0.4f' % acc) ################################################################ #Clean test against EN en_params = { 'binary_search_steps': 1, #'y': None, 'max_iterations': 100, 'learning_rate': 0.1, 'batch_size': source_samples, 'initial_const': 10 } en = ElasticNetMethod(model, back='tf', sess=sess) adv_x = en.generate(x, **en_params) preds_adv = model.get_probs(adv_x) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print('Clean test accuracy on EN adversarial examples: %0.4f' % acc) ################################################################ #Clean test against DF deepfool_params = { 'nb_candidate': 10, 'overshoot': 0.02, 'max_iter': 50, 'clip_min': 0., 'clip_max': 1. } deepfool = DeepFool(model, sess=sess) adv_x = deepfool.generate(x, **deepfool_params) preds_adv = model.get_probs(adv_x) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print('Clean test accuracy on DF adversarial examples: %0.4f' % acc) ################################################################ #Clean test against VAT vat_params = { 'eps': 2.0, 'num_iterations': 1, 'xi': 1e-6, 'clip_min': 0., 'clip_max': 1. } vat = VirtualAdversarialMethod(model, sess=sess) adv_x = vat.generate(x, **vat_params) preds_adv = model.get_probs(adv_x) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print('Clean test accuracy on VAT adversarial examples: %0.4f\n' % acc) ################################################################ print("Repeating the process, using adversarial training\n") # Redefine TF model graph model_2 = make_basic_cnn(nb_filters=nb_filters) preds_2 = model_2(x) ################################################################# #Adversarial test against JSMA jsma_params = { 'theta': 1., 'gamma': 0.1, 'clip_min': 0., 'clip_max': 1., 'y_target': None } jsma = SaliencyMapMethod(model, back='tf', sess=sess) adv_x = jsma.generate(x, **jsma_params) preds_adv_jsma = model.get_probs(adv_x) ################################################################ #Adversarial test against FGSM fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} fgsm = FastGradientMethod(model, sess=sess) adv_x = fgsm.generate(x, **fgsm_params) preds_adv_fgsm = model.get_probs(adv_x) ################################################################ #Adversarial test against BIM bim_params = { 'eps': 0.3, 'eps_iter': 0.01, 'nb_iter': 100, 'clip_min': 0., 'clip_max': 1. } bim = BasicIterativeMethod(model, sess=sess) adv_x = bim.generate(x, **bim_params) preds_adv_bim = model.get_probs(adv_x) ################################################################ #Adversarial test against EN en_params = { 'binary_search_steps': 5, #'y': None, 'max_iterations': 100, 'learning_rate': 0.1, 'batch_size': source_samples, 'initial_const': 10 } en = ElasticNetMethod(model, back='tf', sess=sess) adv_x = en.generate(x, **en_params) preds_adv_en = model.get_probs(adv_x) ################################################################ #Adversarial test against DF deepfool_params = { 'nb_candidate': 10, 'overshoot': 0.02, 'max_iter': 200, 'clip_min': 0., 'clip_max': 1. } deepfool = DeepFool(model, sess=sess) adv_x = deepfool.generate(x, **deepfool_params) preds_adv_df = model.get_probs(adv_x) ################################################################ #Adversarial test against VAT vat_params = { 'eps': 2.0, 'num_iterations': 1, 'xi': 1e-6, 'clip_min': 0., 'clip_max': 1. } vat = VirtualAdversarialMethod(model, sess=sess) adv_x = vat.generate(x, **vat_params) preds_adv_vat = model.get_probs(adv_x) ################################################################ print("#####Evaluate trained model#####") def evaluate_2(): # Evaluate the accuracy of the MNIST model on JSMA adversarial examples acc = model_eval(sess, x, y, preds_adv_jsma, X_test, Y_test, args=eval_par) print('Test accuracy on JSMA adversarial examples: %0.4f' % acc) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv_fgsm, X_test, Y_test, args=eval_par) print('Test accuracy on FGSM adversarial examples: %0.4f' % acc) # Evaluate the accuracy of the MNIST model on BIM adversarial examples acc = model_eval(sess, x, y, preds_adv_bim, X_test, Y_test, args=eval_par) print('Test accuracy on BIM adversarial examples: %0.4f' % acc) # Evaluate the accuracy of the MNIST model on EN adversarial examples acc = model_eval(sess, x, y, preds_adv_en, X_test, Y_test, args=eval_par) print('Test accuracy on EN adversarial examples: %0.4f' % acc) # Evaluate the accuracy of the MNIST model on DF adversarial examples acc = model_eval(sess, x, y, preds_adv_df, X_test, Y_test, args=eval_par) print('Test accuracy on DF adversarial examples: %0.4f' % acc) # Evaluate the accuracy of the MNIST model on VAT adversarial examples acc = model_eval(sess, x, y, preds_adv_vat, X_test, Y_test, args=eval_par) print('Test accuracy on VAT adversarial examples: %0.4f\n' % acc) preds_2_adv = [ preds_adv_jsma, preds_adv_fgsm, preds_adv_bim # ,preds_adv_en # ,preds_adv_df ] model_train(sess, x, y, preds_2, X_train, Y_train, predictions_adv=preds_2_adv, evaluate=evaluate_2, args=train_params, rng=rng)
def attack_classifier(sess, x, model, x_test, attack_method="fgsm", target=None, batch_size=128): if attack_method == "fgsm": from cleverhans.attacks import FastGradientMethod params = {'eps': 8/255, 'clip_min': 0., 'clip_max': 1. } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = FastGradientMethod(model, sess=sess) elif attack_method == "basic_iterative": from cleverhans.attacks import BasicIterativeMethod params = {'eps': 8./255, 'eps_iter': 1./255, 'nb_iter': 10, 'clip_min': 0., 'clip_max': 1., 'ord': np.inf } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = BasicIterativeMethod(model,sess = sess) elif attack_method == "momentum_iterative": from cleverhans.attacks import MomentumIterativeMethod params = {'eps':8/255, 'eps_iter':1/255, 'nb_iter': 10, 'clip_min': 0., 'clip_max': 1. } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = MomentumIterativeMethod(model,sess = sess) elif attack_method == "saliency": from cleverhans.attacks import SaliencyMapMethod params = {'theta':8/255, 'gamma':0.1, 'clip_min': 0., 'clip_max': 1. } assert target is None method = SaliencyMapMethod(model,sess = sess) elif attack_method == "virtual": from cleverhans.attacks import VirtualAdversarialMethod params = {'eps':8/255, 'num_iterations':10, 'xi' :1e-6, 'clip_min': 0., 'clip_max': 1. } assert target is None method = VirtualAdversarialMethod(model,sess = sess) elif attack_method == "cw": from cleverhans.attacks import CarliniWagnerL2 params = { "confidence":0, "batch_size":128, "learning_rate":1e-4, "binary_search_steps":10, "max_iterations":1000, "abort_early": True, "initial_const":1e-2, "clip_min":0, "clip_max":1 } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = CarliniWagnerL2(model,sess = sess) elif attack_method == "elastic_net": from cleverhans.attacks import ElasticNetMethod params = { "fista": "FISTA", "beta": 0.1, "decision_rule":"EN", "confidence":0, "batch_size":128, "learning_rate":1e-4, "binary_search_steps":10, "max_iterations":1000, "abort_early": True, "initial_const":1e-2, "clip_min":0, "clip_max":1 } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = ElasticNetMethod(model,sess = sess) elif attack_method == "deepfool": from cleverhans.attacks import DeepFool params = { "nb_candidate":10, "overshoot":1e-3, "max_iter":100, "nb_classes":10, "clip_min":0, "clip_max":1 } assert target is None method = DeepFool(model,sess = sess) elif attack_method == "lbfgs": from cleverhans.attacks import LBFGS params = { 'batch_size':128, "binary_search_steps":10, "max_iterations":1000, "initial_const":1e-2, 'clip_min': 0., 'clip_max': 1. } assert target is not None params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = LBFGS(model,sess = sess) elif attack_method == "madry": from cleverhans.attacks import MadryEtAl params = {'eps':8/255, 'eps_iter':1/255, 'nb_iter':10, 'ord':np.inf, 'clip_min': 0., 'clip_max': 1. } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = MadryEtAl(model, sess = sess) elif attack_method == "SPSA": from cleverhans.attacks import SPSA params = { 'epsilon':1/255, 'num_steps':10, 'is_targeted':False, 'early_stop_loss_threshold':None, 'learning_rate':0.01, 'delta':0.01, 'batch_size':128, 'spsa_iters':1, 'is_debug':False } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) params["is_targeted"] = True method = SPSA(model, sess = sess) else: raise ValueError("Can not recognize this attack method: %s" % attack_method) adv_x = method.generate(x, **params) num_batch = x_test.shape[0] // batch_size adv_imgs = [] for i in range(num_batch): x_feed = x_test[i*batch_size:(i+1)*batch_size] #y_feed = y_test[i*batch_size:(i+1)*batch_size] adv_img = sess.run(adv_x, feed_dict={x: x_feed}) adv_imgs.append(adv_img) adv_imgs = np.concatenate(adv_imgs, axis=0) return adv_imgs
def setUp(self): super(TestBasicIterativeMethod, self).setUp() self.sess = tf.Session() self.model = SimpleModel() self.attack = BasicIterativeMethod(self.model, sess=self.sess)
def train(cifar10_data, epochs, L, learning_rate, scale3, Delta2, epsilon2, eps2_ratio, alpha, perturbFM, fgsm_eps, total_eps, logfile): logfile.write("fgsm_eps \t %g, LR \t %g, alpha \t %d , epsilon \t %d \n" % (fgsm_eps, learning_rate, alpha, total_eps)) """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) eps_benign = 1 / (1 + eps2_ratio) * (epsilon2) eps_adv = eps2_ratio / (1 + eps2_ratio) * (epsilon2) # Parameters Declarification #with tf.variable_scope('conv1') as scope: kernel1 = _variable_with_weight_decay( 'kernel1', shape=[4, 4, 3, 128], stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2), wd=0.0, collect=[AECODER_VARIABLES]) biases1 = _bias_on_cpu('biases1', [128], tf.constant_initializer(0.0), collect=[AECODER_VARIABLES]) shape = kernel1.get_shape().as_list() w_t = tf.reshape(kernel1, [-1, shape[-1]]) w = tf.transpose(w_t) sing_vals = tf.svd(w, compute_uv=False) sensitivity = tf.reduce_max(sing_vals) gamma = 2 * Delta2 / (L * sensitivity ) #2*3*(14*14 + 2)*16/(L*sensitivity) #with tf.variable_scope('conv2') as scope: kernel2 = _variable_with_weight_decay( 'kernel2', shape=[5, 5, 128, 128], stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2), wd=0.0, collect=[CONV_VARIABLES]) biases2 = _bias_on_cpu('biases2', [128], tf.constant_initializer(0.1), collect=[CONV_VARIABLES]) #with tf.variable_scope('conv3') as scope: kernel3 = _variable_with_weight_decay( 'kernel3', shape=[5, 5, 256, 256], stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2), wd=0.0, collect=[CONV_VARIABLES]) biases3 = _bias_on_cpu('biases3', [256], tf.constant_initializer(0.1), collect=[CONV_VARIABLES]) #with tf.variable_scope('local4') as scope: kernel4 = _variable_with_weight_decay( 'kernel4', shape=[int(image_size / 4)**2 * 256, hk], stddev=0.04, wd=0.004, collect=[CONV_VARIABLES]) biases4 = _bias_on_cpu('biases4', [hk], tf.constant_initializer(0.1), collect=[CONV_VARIABLES]) #with tf.variable_scope('local5') as scope: kernel5 = _variable_with_weight_decay( 'kernel5', [hk, 10], stddev=np.sqrt(2.0 / (int(image_size / 4)**2 * 256)) / math.ceil(5 / 2), wd=0.0, collect=[CONV_VARIABLES]) biases5 = _bias_on_cpu('biases5', [10], tf.constant_initializer(0.1), collect=[CONV_VARIABLES]) #scale2 = tf.Variable(tf.ones([hk])) #beta2 = tf.Variable(tf.zeros([hk])) params = [ kernel1, biases1, kernel2, biases2, kernel3, biases3, kernel4, biases4, kernel5, biases5 ] ######## # Build a Graph that computes the logits predictions from the # inference model. FM_h = tf.placeholder(tf.float32, [None, 14, 14, 128]) noise = tf.placeholder(tf.float32, [None, image_size, image_size, 3]) adv_noise = tf.placeholder(tf.float32, [None, image_size, image_size, 3]) x = tf.placeholder(tf.float32, [None, image_size, image_size, 3]) adv_x = tf.placeholder(tf.float32, [None, image_size, image_size, 3]) # Auto-Encoder # Enc_Layer2 = EncLayer(inpt=adv_x, n_filter_in=3, n_filter_out=128, filter_size=3, W=kernel1, b=biases1, activation=tf.nn.relu) pretrain_adv = Enc_Layer2.get_train_ops2(xShape=tf.shape(adv_x)[0], Delta=Delta2, epsilon=epsilon2, batch_size=L, learning_rate=learning_rate, W=kernel1, b=biases1, perturbFMx=adv_noise, perturbFM_h=FM_h) Enc_Layer3 = EncLayer(inpt=x, n_filter_in=3, n_filter_out=128, filter_size=3, W=kernel1, b=biases1, activation=tf.nn.relu) pretrain_benign = Enc_Layer3.get_train_ops2( xShape=tf.shape(x)[0], Delta=Delta2, epsilon=epsilon2, batch_size=L, learning_rate=learning_rate, W=kernel1, b=biases1, perturbFMx=noise, perturbFM_h=FM_h) cost = tf.reduce_sum((Enc_Layer2.cost + Enc_Layer3.cost) / 2.0) ### x_image = x + noise y_conv = inference(x_image, FM_h, params) softmax_y_conv = tf.nn.softmax(y_conv) y_ = tf.placeholder(tf.float32, [None, 10]) adv_x += adv_noise y_adv_conv = inference(adv_x, FM_h, params) adv_y_ = tf.placeholder(tf.float32, [None, 10]) # Calculate loss. Apply Taylor Expansion for the output layer perturbW = perturbFM * params[8] loss = cifar10.TaylorExp(y_conv, y_, y_adv_conv, adv_y_, L, alpha, perturbW) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. #pretrain_step = tf.train.AdamOptimizer(1e-4).minimize(pretrain_adv, global_step=global_step, var_list=[kernel1, biases1]); pretrain_var_list = tf.get_collection(AECODER_VARIABLES) train_var_list = tf.get_collection(CONV_VARIABLES) #print(pretrain_var_list) #print(train_var_list) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): pretrain_step = tf.train.AdamOptimizer(learning_rate).minimize( pretrain_adv + pretrain_benign, global_step=global_step, var_list=pretrain_var_list) train_op = cifar10.train(loss, global_step, learning_rate, _var_list=train_var_list) sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) sess.run(kernel1.initializer) dp_epsilon = 1.0 _gamma = sess.run(gamma) _gamma_x = Delta2 / L epsilon2_update = epsilon2 / (1.0 + 1.0 / _gamma + 1 / _gamma_x) print(epsilon2_update / _gamma + epsilon2_update / _gamma_x) print(epsilon2_update) delta_r = fgsm_eps * (image_size**2) _sensitivityW = sess.run(sensitivity) delta_h = _sensitivityW * (14**2) #delta_h = 1.0 * delta_r; #sensitivity*(14**2) = sensitivity*(\beta**2) can also be used #dp_mult = (Delta2/(L*epsilon2))/(delta_r / dp_epsilon) + (2*Delta2/(L*epsilon2))/(delta_h / dp_epsilon) dp_mult = (Delta2 / (L * epsilon2_update)) / (delta_r / dp_epsilon) + ( 2 * Delta2 / (L * epsilon2_update)) / (delta_h / dp_epsilon) dynamic_eps = tf.placeholder(tf.float32) """y_test = inference(x, FM_h, params) softmax_y = tf.nn.softmax(y_test); c_x_adv = fgsm(x, softmax_y, eps=dynamic_eps/3, clip_min=-1.0, clip_max=1.0) x_adv = tf.reshape(c_x_adv, [L, image_size, image_size, 3])""" attack_switch = { 'fgsm': True, 'ifgsm': True, 'deepfool': False, 'mim': True, 'spsa': False, 'cwl2': False, 'madry': True, 'stm': False } ch_model_probs = CustomCallableModelWrapper( callable_fn=inference_test_input_probs, output_layer='probs', params=params, image_size=image_size, adv_noise=adv_noise) # define each attack method's tensor mu_alpha = tf.placeholder(tf.float32, [1]) attack_tensor_dict = {} # FastGradientMethod if attack_switch['fgsm']: print('creating attack tensor of FastGradientMethod') fgsm_obj = FastGradientMethod(model=ch_model_probs, sess=sess) #x_adv_test_fgsm = fgsm_obj.generate(x=x, eps=fgsm_eps, clip_min=-1.0, clip_max=1.0, ord=2) # testing now x_adv_test_fgsm = fgsm_obj.generate(x=x, eps=mu_alpha, clip_min=-1.0, clip_max=1.0) # testing now attack_tensor_dict['fgsm'] = x_adv_test_fgsm # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init) # default: eps_iter=0.05, nb_iter=10 if attack_switch['ifgsm']: print('creating attack tensor of BasicIterativeMethod') ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess) #x_adv_test_ifgsm = ifgsm_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_ifgsm = ifgsm_obj.generate(x=x, eps=mu_alpha, eps_iter=fgsm_eps / 3, nb_iter=3, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['ifgsm'] = x_adv_test_ifgsm # MomentumIterativeMethod # default: eps_iter=0.06, nb_iter=10 if attack_switch['mim']: print('creating attack tensor of MomentumIterativeMethod') mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess) #x_adv_test_mim = mim_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, decay_factor=1.0, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_mim = mim_obj.generate(x=x, eps=mu_alpha, eps_iter=fgsm_eps / 3, nb_iter=3, decay_factor=1.0, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['mim'] = x_adv_test_mim # MadryEtAl (Projected Grdient with random init, same as rand+fgsm) # default: eps_iter=0.01, nb_iter=40 if attack_switch['madry']: print('creating attack tensor of MadryEtAl') madry_obj = MadryEtAl(model=ch_model_probs, sess=sess) #x_adv_test_madry = madry_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_madry = madry_obj.generate(x=x, eps=mu_alpha, eps_iter=fgsm_eps / 3, nb_iter=3, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['madry'] = x_adv_test_madry #====================== attack ========================= #adv_logits, _ = inference(c_x_adv + W_conv1Noise, perturbFM, params) # Create a saver. saver = tf.train.Saver(tf.all_variables()) # Build an initialization operation to run below. init = tf.initialize_all_variables() sess.run(init) # Start the queue runners. #tf.train.start_queue_runners(sess=sess) summary_writer = tf.summary.FileWriter(os.getcwd() + dirCheckpoint, sess.graph) # load the most recent models _global_step = 0 ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: print(ckpt.model_checkpoint_path) saver.restore(sess, ckpt.model_checkpoint_path) _global_step = int( ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]) else: print('No checkpoint file found') T = int(int(math.ceil(D / L)) * epochs + 1) # number of steps step_for_epoch = int(math.ceil(D / L)) #number of steps for one epoch perturbH_test = np.random.laplace(0.0, 0, 14 * 14 * 128) perturbH_test = np.reshape(perturbH_test, [-1, 14, 14, 128]) #W_conv1Noise = np.random.laplace(0.0, Delta2/(L*epsilon2), 32 * 32 * 3).astype(np.float32) #W_conv1Noise = np.reshape(_W_conv1Noise, [32, 32, 3]) perturbFM_h = np.random.laplace(0.0, 2 * Delta2 / (epsilon2_update * L), 14 * 14 * 128) perturbFM_h = np.reshape(perturbFM_h, [-1, 14, 14, 128]) #_W_adv = np.random.laplace(0.0, 0, 32 * 32 * 3).astype(np.float32) #_W_adv = np.reshape(_W_adv, [32, 32, 3]) #_perturbFM_h_adv = np.random.laplace(0.0, 0, 10*10*128) #_perturbFM_h_adv = np.reshape(_perturbFM_h_adv, [10, 10, 128]); test_size = len(cifar10_data.test.images) #beta = redistributeNoise(os.getcwd() + '/LRP_0_25_v12.txt') #BenignLNoise = generateIdLMNoise(image_size, Delta2, eps_benign, L) #generateNoise(image_size, Delta2, eps_benign, L, beta); #AdvLnoise = generateIdLMNoise(image_size, Delta2, eps_adv, L) Noise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L) #generateNoise(image_size, Delta2, eps_adv, L, beta); Noise_test = generateIdLMNoise( image_size, 0, epsilon2_update, L) #generateNoise(image_size, 0, 2*epsilon2, test_size, beta); emsemble_L = int(L / 3) preT_epochs = 100 pre_T = int(int(math.ceil(D / L)) * preT_epochs + 1) """logfile.write("pretrain: \n") for step in range(_global_step, _global_step + pre_T): d_eps = random.random()*0.5; batch = cifar10_data.train.next_batch(L); #Get a random batch. adv_images = sess.run(x_adv, feed_dict = {x: batch[0], dynamic_eps: d_eps, FM_h: perturbH_test}) for iter in range(0, 2): adv_images = sess.run(x_adv, feed_dict = {x: adv_images, dynamic_eps: d_eps, FM_h: perturbH_test}) #sess.run(pretrain_step, feed_dict = {x: batch[0], noise: AdvLnoise, FM_h: perturbFM_h}); batch = cifar10_data.train.next_batch(L); sess.run(pretrain_step, feed_dict = {x: np.append(batch[0], adv_images, axis = 0), noise: Noise, FM_h: perturbFM_h}); if step % int(25*step_for_epoch) == 0: cost_value = sess.run(cost, feed_dict={x: cifar10_data.test.images, noise: Noise_test, FM_h: perturbH_test})/(test_size*128) logfile.write("step \t %d \t %g \n"%(step, cost_value)) print(cost_value) print('pre_train finished')""" _global_step = 0 for step in xrange(_global_step, _global_step + T): start_time = time.time() d_eps = random.random() * 0.5 batch = cifar10_data.train.next_batch(emsemble_L) #Get a random batch. y_adv_batch = batch[1] """adv_images = sess.run(x_adv, feed_dict = {x: batch[0], dynamic_eps: d_eps, FM_h: perturbH_test}) for iter in range(0, 2): adv_images = sess.run(x_adv, feed_dict = {x: adv_images, dynamic_eps: d_eps, FM_h: perturbH_test})""" adv_images_ifgsm = sess.run(attack_tensor_dict['ifgsm'], feed_dict={ x: batch[0], adv_noise: Noise, mu_alpha: [d_eps] }) batch = cifar10_data.train.next_batch(emsemble_L) y_adv_batch = np.append(y_adv_batch, batch[1], axis=0) adv_images_mim = sess.run(attack_tensor_dict['mim'], feed_dict={ x: batch[0], adv_noise: Noise, mu_alpha: [d_eps] }) batch = cifar10_data.train.next_batch(emsemble_L) y_adv_batch = np.append(y_adv_batch, batch[1], axis=0) adv_images_madry = sess.run(attack_tensor_dict['madry'], feed_dict={ x: batch[0], adv_noise: Noise, mu_alpha: [d_eps] }) adv_images = np.append(np.append(adv_images_ifgsm, adv_images_mim, axis=0), adv_images_madry, axis=0) batch = cifar10_data.train.next_batch(L) #Get a random batch. sess.run(pretrain_step, feed_dict={ x: batch[0], adv_x: adv_images, adv_noise: Noise_test, noise: Noise, FM_h: perturbFM_h }) _, loss_value = sess.run( [train_op, loss], feed_dict={ x: batch[0], y_: batch[1], adv_x: adv_images, adv_y_: y_adv_batch, noise: Noise, adv_noise: Noise_test, FM_h: perturbFM_h }) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' # report the result periodically if step % (50 * step_for_epoch) == 0 and step >= (300 * step_for_epoch): '''predictions_form_argmax = np.zeros([test_size, 10]) softmax_predictions = sess.run(softmax_y_conv, feed_dict={x: cifar10_data.test.images, noise: Noise_test, FM_h: perturbH_test}) argmax_predictions = np.argmax(softmax_predictions, axis=1) """for n_draws in range(0, 2000): _BenignLNoise = generateIdLMNoise(image_size, Delta2, epsilon2, L) _perturbFM_h = np.random.laplace(0.0, 2*Delta2/(epsilon2*L), 14*14*128) _perturbFM_h = np.reshape(_perturbFM_h, [-1, 14, 14, 128]);""" for j in range(test_size): pred = argmax_predictions[j] predictions_form_argmax[j, pred] += 2000; """softmax_predictions = sess.run(softmax_y_conv, feed_dict={x: cifar10_data.test.images, noise: _BenignLNoise, FM_h: _perturbFM_h}) argmax_predictions = np.argmax(softmax_predictions, axis=1)""" final_predictions = predictions_form_argmax; is_correct = [] is_robust = [] for j in range(test_size): is_correct.append(np.argmax(cifar10_data.test.labels[j]) == np.argmax(final_predictions[j])) robustness_from_argmax = robustness.robustness_size_argmax(counts=predictions_form_argmax[j],eta=0.05,dp_attack_size=fgsm_eps, dp_epsilon=1.0, dp_delta=0.05, dp_mechanism='laplace') / dp_mult is_robust.append(robustness_from_argmax >= fgsm_eps) acc = np.sum(is_correct)*1.0/test_size robust_acc = np.sum([a and b for a,b in zip(is_robust, is_correct)])*1.0/np.sum(is_robust) robust_utility = np.sum(is_robust)*1.0/test_size log_str = "step: {:.1f}\t epsilon: {:.1f}\t benign: {:.4f} \t {:.4f} \t {:.4f} \t {:.4f} \t".format(step, total_eps, acc, robust_acc, robust_utility, robust_acc*robust_utility)''' #===================adv samples===================== log_str = "step: {:.1f}\t epsilon: {:.1f}\t".format( step, total_eps) """adv_images_dict = {} for atk in attack_switch.keys(): if attack_switch[atk]: adv_images_dict[atk] = sess.run(attack_tensor_dict[atk], feed_dict ={x:cifar10_data.test.images}) print("Done with the generating of Adversarial samples")""" #===================adv samples===================== adv_acc_dict = {} robust_adv_acc_dict = {} robust_adv_utility_dict = {} test_bach_size = 5000 for atk in attack_switch.keys(): print(atk) if atk not in adv_acc_dict: adv_acc_dict[atk] = -1 robust_adv_acc_dict[atk] = -1 robust_adv_utility_dict[atk] = -1 if attack_switch[atk]: test_bach = cifar10_data.test.next_batch( test_bach_size) adv_images_dict = sess.run(attack_tensor_dict[atk], feed_dict={ x: test_bach[0], adv_noise: Noise_test, mu_alpha: [fgsm_eps] }) print("Done adversarial examples") ### PixelDP Robustness ### predictions_form_argmax = np.zeros( [test_bach_size, 10]) softmax_predictions = sess.run(softmax_y_conv, feed_dict={ x: adv_images_dict, noise: Noise, FM_h: perturbFM_h }) argmax_predictions = np.argmax(softmax_predictions, axis=1) for n_draws in range(0, 1000): _BenignLNoise = generateIdLMNoise( image_size, Delta2, epsilon2_update, L) _perturbFM_h = np.random.laplace( 0.0, 2 * Delta2 / (epsilon2_update * L), 14 * 14 * 128) _perturbFM_h = np.reshape(_perturbFM_h, [-1, 14, 14, 128]) if n_draws == 500: print("n_draws = 500") for j in range(test_bach_size): pred = argmax_predictions[j] predictions_form_argmax[j, pred] += 1 softmax_predictions = sess.run( softmax_y_conv, feed_dict={ x: adv_images_dict, noise: (_BenignLNoise / 10 + Noise), FM_h: perturbFM_h }) * sess.run( softmax_y_conv, feed_dict={ x: adv_images_dict, noise: Noise, FM_h: (_perturbFM_h / 10 + perturbFM_h) }) #softmax_predictions = sess.run(softmax_y_conv, feed_dict={x: adv_images_dict, noise: (_BenignLNoise), FM_h: perturbFM_h}) * sess.run(softmax_y_conv, feed_dict={x: adv_images_dict, noise: Noise, FM_h: (_perturbFM_h)}) argmax_predictions = np.argmax(softmax_predictions, axis=1) final_predictions = predictions_form_argmax is_correct = [] is_robust = [] for j in range(test_bach_size): is_correct.append( np.argmax(test_bach[1][j]) == np.argmax( final_predictions[j])) robustness_from_argmax = robustness.robustness_size_argmax( counts=predictions_form_argmax[j], eta=0.05, dp_attack_size=fgsm_eps, dp_epsilon=dp_epsilon, dp_delta=0.05, dp_mechanism='laplace') / dp_mult is_robust.append( robustness_from_argmax >= fgsm_eps) adv_acc_dict[atk] = np.sum( is_correct) * 1.0 / test_bach_size robust_adv_acc_dict[atk] = np.sum([ a and b for a, b in zip(is_robust, is_correct) ]) * 1.0 / np.sum(is_robust) robust_adv_utility_dict[atk] = np.sum( is_robust) * 1.0 / test_bach_size ############################## for atk in attack_switch.keys(): if attack_switch[atk]: # added robust prediction log_str += " {}: {:.4f} {:.4f} {:.4f} {:.4f}".format( atk, adv_acc_dict[atk], robust_adv_acc_dict[atk], robust_adv_utility_dict[atk], robust_adv_acc_dict[atk] * robust_adv_utility_dict[atk]) print(log_str) logfile.write(log_str + '\n') # Save the model checkpoint periodically. if step % (10 * step_for_epoch) == 0 and (step > _global_step): num_examples_per_step = L examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) """if step % (50*step_for_epoch) == 0 and (step >= 900*step_for_epoch):
def mnist_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, train_dir=TRAIN_DIR, filename=FILENAME, load_model=LOAD_MODEL, testing=False, label_smoothing=0.1, adversarial_training = ADVERSARIAL_TRAINING, attacking = ATTACKING,origin_method=ORIGIN_METHOD, save_model=SAVE_MODEL,model_type=MODEL_TYPE): """ MNIST CleverHans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param train_dir: Directory storing the saved model :param filename: Filename to save model under :param load_model: True for load, False for not load :param testing: if true, test error is calculated :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ keras.layers.core.K.set_learning_phase(0) # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) if not hasattr(backend, "tf"): raise RuntimeError("This tutorial requires keras to be configured" " to use the TensorFlow backend.") if keras.backend.image_dim_ordering() != 'tf': keras.backend.set_image_dim_ordering('tf') print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to " "'th', temporarily setting to 'tf'") # Create TF session and set as Keras backend session os.environ["CUDA_VISIBLE_DEVICES"] = '0' # only use No.0 GPU config = tf.ConfigProto() config.allow_soft_placement=True config.gpu_options.allow_growth = True sess = tf.Session(config=config) keras.backend.set_session(sess) # Get MNIST test data mnist = MNIST(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) x_train, y_train = mnist.get_set('train') x_test, y_test = mnist.get_set('test') # Obtain Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Define TF model graph the_model = modelA if model_type == 'a': the_model = modelA elif model_type == 'b': the_model = modelB elif model_type == 'c': the_model = modelC else: exit('the model type must be a or b or c.') model = the_model(img_rows=img_rows, img_cols=img_cols, channels=nchannels, nb_filters=64, nb_classes=nb_classes) wrap = KerasModelWrapper(model) preds = model(x) # Initialize the Fast Gradient Sign Method (FGSM) attack object and graph if origin_method == 'fgsm': att_method = FastGradientMethod(wrap, sess=sess) att_method_params = {'eps': 0.2, 'clip_min': 0., 'clip_max': 1.} elif origin_method == 'bim': att_method = BasicIterativeMethod(wrap, sess=sess) att_method_params = {'eps': 0.2, 'eps_iter': 0.06, 'nb_iter': 10, 'clip_min': 0., 'clip_max': 1.} elif origin_method == 'mifgsm': att_method = MomentumIterativeMethod(wrap, sess=sess) att_method_params = {'eps': 0.2, 'eps_iter': 0.08, 'nb_iter': 10, 'decay_factor': 0.4, 'clip_min': 0., 'clip_max': 1.} else: exit("the attack method must be fgsm,bim,mifgsm") # Evaluate the accuracy of the MNIST model on adversarial examples print(att_method_params) adv_x = att_method.generate(x, **att_method_params) # Consider the attack to be constant adv_x = tf.stop_gradient(adv_x) preds_adv = model(adv_x) def attack(x): return att_method.generate(x, **att_method_params) def evaluate2(): # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params) report.clean_train_clean_eval = acc print('AT Test accuracy on legitimate examples: %0.4f' % acc) # Accuracy of the adversarially trained model on adversarial examples accuracy = model_eval(sess, x, y, preds_adv, x_test, y_test, args=eval_params) print('AT Test accuracy on adversarial examples: %0.4f' % accuracy) report.adv_train_adv_eval = accuracy # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': train_dir, 'filename': filename } rng = np.random.RandomState([2017, 8, 30]) train_dir = train_dir + '/' + model_type + '/' + origin_method if not os.path.exists(train_dir): os.makedirs(train_dir) ckpt = tf.train.get_checkpoint_state(train_dir) print(train_dir, ckpt) ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path if load_model and ckpt_path: saver = tf.train.Saver() print(ckpt_path) saver.restore(sess, ckpt_path) print("Model loaded from: {}".format(ckpt_path)) evaluate2() else: print("Model was not loaded, training from scratch.") loss2 = CrossEntropy(wrap, smoothing=label_smoothing,attack=attack) train(sess, loss2, x_train, y_train, evaluate=evaluate2, args=train_params, rng=rng) if save_model: saver = tf.train.Saver(max_to_keep=1) saver.save(sess, '{}/{}.ckpt'.format(train_dir,origin_method), global_step=NB_EPOCHS) keras.models.save_model(model, '{}/{}_mnist.h5'.format(train_dir,origin_method)) print("model has been saved") # >>> other method >>> if adversarial_training: method = ['fgsm','bim','mifgsm'] for i in range(3): attacking = method[i] if attacking == 'fgsm': att_method = FastGradientMethod(wrap, sess=sess) att_method_params = {'eps': 0.2, 'clip_min': 0., 'clip_max': 1.} elif attacking == 'bim': att_method = BasicIterativeMethod(wrap,sess=sess) att_method_params = {'eps': 0.2, 'eps_iter':0.06, 'nb_iter':10, 'clip_min': 0., 'clip_max': 1.} elif attacking == 'mifgsm': att_method = MomentumIterativeMethod(wrap,sess=sess) att_method_params = {'eps': 0.2, 'eps_iter':0.08, 'nb_iter':10, 'decay_factor':0.4, 'clip_min': 0., 'clip_max': 1.} else: exit("the attack method must be fgsm,bim,mifgsm") # Evaluate the accuracy of the MNIST model on adversarial examples print(att_method_params) adv_x = att_method.generate(x, **att_method_params) # Consider the attack to be constant adv_x = tf.stop_gradient(adv_x) preds_adv = model(adv_x) eval_par = {'batch_size': batch_size} start_time = time.time() acc = model_eval(sess, x, y, preds_adv, x_test, y_test, args=eval_par) print('Test accuracy on adversarial examples: %0.4f' % acc) end_time = time.time() print("{} attack time is {}\n".format(attacking,end_time-start_time)) report.clean_train_adv_eval = acc gc.collect()
def test(): """ """ tf.reset_default_graph() g = tf.get_default_graph() with g.as_default(): # Placeholder nodes. images_holder = tf.placeholder( tf.float32, [None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS]) label_holder = tf.placeholder(tf.float32, [None, FLAGS.NUM_CLASSES]) is_training = tf.placeholder(tf.bool, ()) # model model = model_cifar100.RDPCNN(images_holder, label_holder, FLAGS.INPUT_SIGMA, is_training) # for adv examples model_loss = model.loss() model_acc = model.cnn_accuracy # robust def inference(x): logits, _ = model.cnn.prediction(x) return logits def inference_prob(x): _, probs = model.cnn.prediction(x) return probs graph_dict = {} graph_dict["images_holder"] = images_holder graph_dict["label_holder"] = label_holder graph_dict["is_training"] = is_training config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config, graph=g) as sess: sess.run(tf.global_variables_initializer()) # load model model.tf_load(sess, name=FLAGS.CNN_CKPT_RESTORE_NAME) # adv test #################################################################################################### x_advs = {} ch_model_logits = CallableModelWrapper(callable_fn=inference, output_layer='logits') ch_model_probs = CallableModelWrapper(callable_fn=inference_prob, output_layer='probs') # FastGradientMethod fgsm_obj = FastGradientMethod(model=ch_model_probs, sess=sess) x_advs["fgsm"] = fgsm_obj.generate(x=images_holder, eps=FLAGS.ATTACK_SIZE, clip_min=0.0, clip_max=1.0) # testing now # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init) # default: eps_iter=0.05, nb_iter=10 ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess) x_advs["ifgsm"] = ifgsm_obj.generate(x=images_holder, eps=FLAGS.ATTACK_SIZE, eps_iter=FLAGS.ATTACK_SIZE / 10, nb_iter=10, clip_min=0.0, clip_max=1.0) # MomentumIterativeMethod # default: eps_iter=0.06, nb_iter=10 mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess) x_advs["mim"] = mim_obj.generate(x=images_holder, eps=FLAGS.ATTACK_SIZE, eps_iter=FLAGS.ATTACK_SIZE / 10, nb_iter=10, decay_factor=1.0, clip_min=0.0, clip_max=1.0) # MadryEtAl (Projected Grdient with random init, same as rand+fgsm) # default: eps_iter=0.01, nb_iter=40 madry_obj = MadryEtAl(model=ch_model_probs, sess=sess) x_advs["madry"] = madry_obj.generate(x=images_holder, eps=FLAGS.ATTACK_SIZE, eps_iter=FLAGS.ATTACK_SIZE / 10, nb_iter=10, clip_min=0.0, clip_max=1.0) graph_dict["x_advs"] = x_advs #################################################################################################### # tensorboard writer #test_writer = model_utils.init_writer(FLAGS.TEST_LOG_PATH, g) print("\nTest") if FLAGS.local: total_test_batch = 2 else: total_test_batch = None dp_info = np.load(FLAGS.DP_INFO_NPY, allow_pickle=True).item() test_info(sess, model, True, graph_dict, dp_info, FLAGS.TEST_LOG_FILENAME, total_batch=total_test_batch) robust_info(sess, model, graph_dict, FLAGS.ROBUST_LOG_FILENAME)
def b_dcgan(dataset, args): z_dim = args.z_dim x_dim = dataset.x_dim batch_size = args.batch_size dataset_size = dataset.dataset_size session = get_session() test_x = tf.placeholder(tf.float32, shape=(batch_size, 28, 28, 1)) x = tf.placeholder(tf.float32, shape=(batch_size, 28, 28, 1)) y = tf.placeholder(tf.float32, shape=(batch_size, 10)) unlabeled_batch_ph = tf.placeholder(tf.float32, shape=(batch_size, 28, 28, 1)) labeled_image_ph = tf.placeholder(tf.float32, shape=(batch_size, 28, 28, 1)) if args.random_seed is not None: tf.set_random_seed(args.random_seed) # due to how much the TF code sucks all functions take fixed batch_size at all times dcgan = BDCGAN( x_dim, z_dim, dataset_size, batch_size=batch_size, J=args.J, M=args.M, lr=args.lr, optimizer=args.optimizer, gen_observed=args.gen_observed, adv_train=args.adv_train, num_classes=dataset.num_classes if args.semi_supervised else 1) if args.adv_test and args.semi_supervised: if args.basic_iterative: fgsm = BasicIterativeMethod(dcgan, sess=session) dcgan.adv_constructor = fgsm fgsm_params = { 'eps': args.eps, 'eps_iter': float(args.eps / 4), 'nb_iter': 4, 'ord': np.inf, 'clip_min': 0., 'clip_max': 1. } #,'y_target': None} else: fgsm = FastGradientMethod(dcgan, sess=session) dcgan.adv_constructor = fgsm eval_params = {'batch_size': batch_size} fgsm_params = {'eps': args.eps, 'clip_min': 0., 'clip_max': 1.} adv_x = fgsm.generate(x, **fgsm_params) adv_test_x = fgsm.generate(test_x, **fgsm_params) preds = dcgan.get_probs(adv_x) if args.adv_train: unlabeled_targets = np.zeros([batch_size, dcgan.K + 1]) unlabeled_targets[:, 0] = 1 fgsm_targeted_params = { 'eps': args.eps, 'clip_min': 0., 'clip_max': 1., 'y_target': unlabeled_targets } saver = tf.train.Saver() print("Starting session") session.run(tf.global_variables_initializer()) prev_iters = 0 if args.load_chkpt: saver.restore(session, args.chkpt) # Assume checkpoint is of the form "model_300" prev_iters = int(args.chkpt.split('/')[-1].split('_')[1]) print("Model restored from iteration:", prev_iters) print("Starting training loop") num_train_iter = args.train_iter if hasattr(dataset, "supervised_batches"): # implement own data feeder if data doesnt fit in memory supervised_batches = dataset.supervised_batches(args.N, batch_size) else: supervised_batches = get_supervised_batches( dataset, args.N, batch_size, list(range(dataset.num_classes))) if args.semi_supervised: test_image_batches, test_label_batches = get_test_batches( dataset, batch_size) optimizer_dict = { "semi_d": dcgan.d_optim_semi_adam, "sup_d": dcgan.s_optim_adam, "adv_d": dcgan.d_optim_adam, "gen": dcgan.g_optims_adam } else: optimizer_dict = { "adv_d": dcgan.d_optim_adam, "gen": dcgan.g_optims_adam } base_learning_rate = args.lr # for now we use same learning rate for Ds and Gs lr_decay_rate = args.lr_decay for train_iter in range(1 + prev_iters, 1 + num_train_iter): if train_iter == 5000: print("Switching to user-specified optimizer") if args.semi_supervised: optimizer_dict = { "semi_d": dcgan.d_optim_semi, "sup_d": dcgan.s_optim, "adv_d": dcgan.d_optim, "gen": dcgan.g_optims } else: optimizer_dict = { "adv_d": dcgan.d_optim, "gen": dcgan.g_optims } learning_rate = base_learning_rate * np.exp(-lr_decay_rate * min( 1.0, (train_iter * batch_size) / float(dataset_size))) batch_z = np.random.uniform(-1, 1, [batch_size, z_dim]) image_batch, batch_label = dataset.next_batch(batch_size, class_id=None) batch_targets = np.zeros([batch_size, 11]) batch_targets[:, 0] = 1 if args.semi_supervised: labeled_image_batch, labels = next(supervised_batches) if args.adv_train: adv_labeled = session.run( fgsm.generate(labeled_image_ph, **fgsm_targeted_params), feed_dict={labeled_image_ph: labeled_image_batch}) adv_unlabeled = session.run( fgsm.generate(unlabeled_batch_ph, **fgsm_params), feed_dict={unlabeled_batch_ph: image_batch}) _, d_loss = session.run( [optimizer_dict["semi_d"], dcgan.d_loss_semi], feed_dict={ dcgan.labeled_inputs: labeled_image_batch, dcgan.labels: get_gan_labels(labels), dcgan.inputs: image_batch, dcgan.z: batch_z, dcgan.d_semi_learning_rate: learning_rate, dcgan.adv_unlab: adv_unlabeled, dcgan.adv_labeled: adv_labeled }) else: _, d_loss = session.run( [optimizer_dict["semi_d"], dcgan.d_loss_semi], feed_dict={ dcgan.labeled_inputs: labeled_image_batch, dcgan.labels: get_gan_labels(labels), dcgan.inputs: image_batch, dcgan.z: batch_z, dcgan.d_semi_learning_rate: learning_rate }) _, s_loss = session.run([optimizer_dict["sup_d"], dcgan.s_loss], feed_dict={ dcgan.inputs: labeled_image_batch, dcgan.lbls: labels }) else: # regular GAN _, d_loss = session.run( [optimizer_dict["adv_d"], dcgan.d_loss], feed_dict={ dcgan.inputs: image_batch, dcgan.z: batch_z, dcgan.d_learning_rate: learning_rate }) if args.wasserstein: session.run(dcgan.clip_d, feed_dict={}) g_losses = [] for gi in range(dcgan.num_gen): # compute g_sample loss batch_z = np.random.uniform(-1, 1, [batch_size, z_dim]) for m in range(dcgan.num_mcmc): _, g_loss = session.run([ optimizer_dict["gen"][gi * dcgan.num_mcmc + m], dcgan.generation["g_losses"][gi * dcgan.num_mcmc + m] ], feed_dict={ dcgan.z: batch_z, dcgan.g_learning_rate: learning_rate }) g_losses.append(g_loss) # if args.adv_test: # probs, logits = dcgan.discriminator(adv_x,dcgan.K+1,reuse = True) # labels = tf.placeholder(tf.float32, # [args.batch_size, dcgan.K+1], name='real_targets') # compare_labels = tf.convert_to_tensor(np.array([np.append(0,i) for i in batch_label])) # print(session.run(model_loss(compare_labels,probs), feed_dict = {x:image_batch})) # if args.adv_test: # #preds = dcgan.get_probs(adv_x) # #eval_preds = session.run(preds, feed_dict = {x:image_batch}) # #print(eval_preds[0]) # #adv_exs = session.run(adv_test_x, feed_dict = {x:test_image_batches}) # # adv_acc = model_eval( # # session, x, y, preds, image_batch, batch_label, args=eval_params) # # #print(session.run(model_loss(compare_labels,probs), feed_dict = {x:image_batch})) # # print("Adversarial loss = %2.f" % (1-adv_acc)) # print(get_test_accuracy(session,dcgan,adv_set,test_label_batches)) if train_iter > 0 and train_iter % args.n_save == 0: print("Iter %i" % train_iter) # collect samples if args.save_samples: # saving samples all_sampled_imgs = [] for gi in range(dcgan.num_gen): _imgs, _ps = [], [] for _ in range(10): sample_z = np.random.uniform(-1, 1, size=(batch_size, z_dim)) sampled_imgs, sampled_probs = session.run([ dcgan.generation["gen_samplers"][gi * dcgan.num_mcmc], dcgan.generation["d_probs"][gi * dcgan.num_mcmc] ], feed_dict={ dcgan.z: sample_z }) _imgs.append(sampled_imgs) _ps.append(sampled_probs) sampled_imgs = np.concatenate(_imgs) sampled_probs = np.concatenate(_ps) all_sampled_imgs.append( [sampled_imgs, sampled_probs[:, 1:].sum(1)]) print("Disc loss = %.2f, Gen loss = %s" % (d_loss, ", ".join(["%.2f" % gl for gl in g_losses]))) #if args.adv_test: #preds = dcgan.get_probs(adv_x) #eval_preds = session.run(preds, feed_dict = {x:image_batch}) #print(eval_preds[0]) #adv_exs = session.run(adv_test_x, feed_dict = {x:test_image_batches}) # adv_acc = model_eval( # session, x, y, preds, image_batch, batch_label, args=eval_params) # #print(session.run(model_loss(compare_labels,probs), feed_dict = {x:image_batch})) # print("Adversarial loss = %2.f" % (1-adv_acc)) #print(get_test_accuracy(session,dcgan,adv_set,test_label_batches)) # adv_x = fgsm.generate(x,**fgsm_params) # preds = dcgan.get_probs(adv_x) # acc = model_eval( # session, x, y, preds, image_batch, batch_label, args=eval_params) # print("Adversarial loss = %2.f" % (1-acc)) if args.semi_supervised: # get test set performance on real labels only for both GAN-based classifier and standard one s_acc, ss_acc, non_adv_acc, ex_prob = get_test_accuracy( session, dcgan, test_image_batches, test_label_batches) if args.adv_test: adv_set = [] for test_images in test_image_batches: adv_set.append( session.run(adv_x, feed_dict={x: test_images})) adv_sup_acc, adv_ss_acc, correct_uncertainty, incorrect_uncertainty, adv_acc, adv_ex_prob = get_adv_test_accuracy( session, dcgan, adv_set, test_label_batches) print("Adversarial semi-sup accuracy with filter: %.2f" % adv_sup_acc) print("Adverarial semi-sup accuracy: %.2f" % adv_ss_acc) print("Uncertainty for correct predictions: %.2f" % correct_uncertainty) print("Uncertainty for incorrect predictions: %.2f" % incorrect_uncertainty) print("non_adversarial_classification_accuracy: %.2f" % non_adv_acc) print("adversarial_classification_accuracy: %.2f" % adv_acc) if args.save_samples: print("saving adversarial test images and test images") i = 0 for x, y in zip(adv_set[-1], test_image_batches[-1]): np.save( args.out_dir + '/adv_test' + str(train_iter) + '_' + str(i), x) np.save( args.out_dir + '/test' + str(train_iter) + '_' + str(i), y) i = i + 1 if i == 5: #save 5 adversarial images break print("Supervised acc: %.2f" % (s_acc)) print("Semi-sup acc: %.2f" % (ss_acc)) print("saving results and samples") results = { "disc_loss": float(d_loss), "gen_losses": list(map(float, g_losses)) } if args.semi_supervised: #results["example_non_adversarial_probs"] = list(ex_prob.flatten()) #results["example_adversarial_probs"] = list(adv_ex_prob.flatten()) results["non_adversarial_classification_accuracy"] = float( non_adv_acc) results["adversarial_classification_accuracy"] = float(adv_acc) results["adversarial_uncertainty_correct"] = float( correct_uncertainty) results["adversarial_uncertainty_incorrect"] = float( incorrect_uncertainty) results["supervised_acc"] = float(s_acc) results['adversarial_filtered_semi_supervised_acc'] = float( adv_sup_acc) results["adversarial_unfilted_semi_supervised_acc"] = float( adv_ss_acc) results["semi_supervised_acc"] = float(ss_acc) results["timestamp"] = time.time() results["previous_chkpt"] = args.chkpt with open( os.path.join(args.out_dir, 'results_%i.json' % train_iter), 'w') as fp: json.dump(results, fp) if args.save_samples: for gi in range(dcgan.num_gen): print_images(all_sampled_imgs[gi], "B_DCGAN_%i_%.2f" % (gi, g_losses[gi * dcgan.num_mcmc]), train_iter, directory=args.out_dir) print_images(image_batch, "RAW", train_iter, directory=args.out_dir) if args.save_weights: var_dict = {} for var in tf.trainable_variables(): var_dict[var.name] = session.run(var.name) np.savez_compressed( os.path.join(args.out_dir, "weights_%i.npz" % train_iter), **var_dict) print("Done saving weights") if train_iter > 0 and train_iter % args.save_chkpt == 0: save_path = saver.save( session, os.path.join(args.out_dir, "model_%i" % train_iter)) print("Model checkpointed in file: %s" % save_path) session.close()
]) model.compile(optimizer=keras.optimizers.Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.fit(train_images, train_labels, epochs=5) model.save(filename) # Save your model in case of retraining. model.summary() # evaluate accuracy test_loss, test_acc = model.evaluate(test_images2, test_labels) print('Test accuracy:', test_acc) # - Step 4 - Implement untargeted attack using the Basic Iterative Method wrap = KerasModelWrapper(model) bim = BasicIterativeMethod(wrap, sess=sess) bim_params = {'eps': 0.1, 'eps_iter': 0.01, 'clip_min': 0., 'clip_max': 1.} # - Step 5 - From each category in the Fashion-MNIST test set, # select 10 images to generate adversarial examples using 5 and 10 iterations, respectively. orig_images = np.empty([0, 28, 28, 1]) orig_labels = np.empty([0]) for clz in range(nb_classes): idxs = np.where(test_labels == clz)[0][:10] orig_images = np.append(orig_images, test_images2[idxs], axis=0) orig_labels = np.append(orig_labels, test_labels[idxs], axis=0) bim_params["nb_iter"] = 5 adv_images5 = bim.generate_np(orig_images, **bim_params) bim_params["nb_iter"] = 10 adv_images10 = bim.generate_np(orig_images, **bim_params)
def mnist_tutorial_jsma(train_start=0, train_end=5500, test_start=0, test_end=1000, nb_epochs=8, batch_size=100, nb_classes=10, nb_filters=64, learning_rate=0.001): """ MNIST tutorial for the Jacobian-based saliency map approach (JSMA) :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param nb_classes: number of output classes :param learning_rate: learning rate for training :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Create TF session and set as Keras backend session sess = tf.Session() print("Created TensorFlow session.") set_log_level(logging.DEBUG) # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) label_smooth = .1 Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) y = tf.placeholder(tf.float32, shape=(None, 10)) # Define TF model graph model = make_basic_cnn() preds = model(x) print("Defined TensorFlow model graph.") ########################################################################### # Training the model using TensorFlow ########################################################################### # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } # sess.run(tf.global_variables_initializer()) rng = np.random.RandomState([2017, 8, 30]) print("x_train shape: ", X_train.shape) print("y_train shape: ", Y_train.shape) # do not log model_train(sess, x, y, preds, X_train, Y_train, args=train_params,verbose=False, rng=rng) f_out_clean = open("Clean_jsma_elastic_against5.log", "w") # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params) assert X_test.shape[0] == test_end - test_start, X_test.shape print('Test accuracy on legitimate test examples: {0}'.format(accuracy)) f_out_clean.write('Test accuracy on legitimate test examples: ' + str(accuracy) + '\n') # Clean test against JSMA jsma_params = {'theta': 1., 'gamma': 0.1, 'clip_min': 0., 'clip_max': 1., 'y_target': None} jsma = SaliencyMapMethod(model, back='tf', sess=sess) adv_x_jsma = jsma.generate(x, **jsma_params) preds_adv_jsma = model.get_probs(adv_x_jsma) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv_jsma, X_test, Y_test, args=eval_params) print('Clean test accuracy on JSMA adversarial examples: %0.4f' % acc) f_out_clean.write('Clean test accuracy on JSMA adversarial examples: ' + str(acc) + '\n') ################################################################ # Clean test against FGSM fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} fgsm = FastGradientMethod(model, sess=sess) adv_x_fgsm = fgsm.generate(x, **fgsm_params) preds_adv_fgsm = model.get_probs(adv_x_fgsm) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv_fgsm, X_test, Y_test, args=eval_params) print('Clean test accuracy on FGSM adversarial examples: %0.4f' % acc) f_out_clean.write('Clean test accuracy on FGSM adversarial examples: ' + str(acc) + '\n') ################################################################ # Clean test against BIM bim_params = {'eps': 0.3, 'eps_iter': 0.01, 'nb_iter': 100, 'clip_min': 0., 'clip_max': 1.} bim = BasicIterativeMethod(model, sess=sess) adv_x_bim = bim.generate(x, **bim_params) preds_adv_bim = model.get_probs(adv_x_bim) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv_bim, X_test, Y_test, args=eval_params) print('Clean test accuracy on BIM adversarial examples: %0.4f' % acc) f_out_clean.write('Clean test accuracy on BIM adversarial examples: ' + str(acc) + '\n') ################################################################ # Clean test against EN en_params = {'binary_search_steps': 1, # 'y': None, 'max_iterations': 100, 'learning_rate': 0.1, 'batch_size': batch_size, 'initial_const': 10} en = ElasticNetMethod(model, back='tf', sess=sess) adv_x_en = en.generate(x, **en_params) preds_adv_en = model.get_probs(adv_x_en) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv_en, X_test, Y_test, args=eval_params) print('Clean test accuracy on EN adversarial examples: %0.4f' % acc) f_out_clean.write('Clean test accuracy on EN adversarial examples: ' + str(acc) + '\n') ################################################################ # Clean test against DF deepfool_params = {'nb_candidate': 10, 'overshoot': 0.02, 'max_iter': 50, 'clip_min': 0., 'clip_max': 1.} deepfool = DeepFool(model, sess=sess) adv_x_df = deepfool.generate(x, **deepfool_params) preds_adv_df = model.get_probs(adv_x_df) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv_df, X_test, Y_test, args=eval_params) print('Clean test accuracy on DF adversarial examples: %0.4f' % acc) f_out_clean.write('Clean test accuracy on DF adversarial examples: ' + str(acc) + '\n') ################################################################ # Clean test against VAT vat_params = {'eps': 2.0, 'num_iterations': 1, 'xi': 1e-6, 'clip_min': 0., 'clip_max': 1.} vat = VirtualAdversarialMethod(model, sess=sess) adv_x_vat = vat.generate(x, **vat_params) preds_adv_vat = model.get_probs(adv_x_vat) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv_vat, X_test, Y_test, args=eval_params) print('Clean test accuracy on VAT adversarial examples: %0.4f\n' % acc) f_out_clean.write('Clean test accuracy on VAT adversarial examples: ' + str(acc) + '\n') f_out_clean.close() ########################################################################### # Craft adversarial examples using the Jacobian-based saliency map approach ########################################################################### print('Crafting ' + str(X_train.shape[0]) + ' * ' + str(nb_classes-1) + ' adversarial examples') model_2 = make_basic_cnn() preds_2 = model(x) # need this for constructing the array sess.run(tf.global_variables_initializer()) # run this again # sess.run(tf.global_variables_initializer()) # 1. Instantiate a SaliencyMapMethod attack object jsma = SaliencyMapMethod(model_2, back='tf', sess=sess) jsma_params = {'theta': 1., 'gamma': 0.1, 'clip_min': 0., 'clip_max': 1., 'y_target': None} adv_random = jsma.generate(x, **jsma_params) preds_adv_random = model_2.get_probs(adv_random) # 2. Instantiate FGSM attack fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} fgsm = FastGradientMethod(model_2, sess=sess) adv_x_fgsm = fgsm.generate(x, **fgsm_params) preds_adv_fgsm = model_2.get_probs(adv_x_fgsm) # 3. Instantiate Elastic net attack en_params = {'binary_search_steps': 5, #'y': None, 'max_iterations': 100, 'learning_rate': 0.1, 'batch_size': batch_size, 'initial_const': 10} enet = ElasticNetMethod(model_2, sess=sess) adv_x_en = enet.generate(x, **en_params) preds_adv_elastic_net = model_2.get_probs(adv_x_en) # 4. Deepfool deepfool_params = {'nb_candidate':10, 'overshoot':0.02, 'max_iter': 50, 'clip_min': 0., 'clip_max': 1.} deepfool = DeepFool(model_2, sess=sess) adv_x_df = deepfool.generate(x, **deepfool_params) preds_adv_deepfool = model_2.get_probs(adv_x_df) # 5. Base Iterative bim_params = {'eps': 0.3, 'eps_iter': 0.01, 'nb_iter': 100, 'clip_min': 0., 'clip_max': 1.} base_iter = BasicIterativeMethod(model_2, sess=sess) adv_x_bi = base_iter.generate(x, **bim_params) preds_adv_base_iter = model_2.get_probs(adv_x_bi) # 6. C & W Attack cw = CarliniWagnerL2(model_2, back='tf', sess=sess) cw_params = {'binary_search_steps': 1, # 'y': None, 'max_iterations': 100, 'learning_rate': 0.1, 'batch_size': batch_size, 'initial_const': 10} adv_x_cw = cw.generate(x, **cw_params) preds_adv_cw = model_2.get_probs(adv_x_cw) #7 vat_params = {'eps': 2.0, 'num_iterations': 1, 'xi': 1e-6, 'clip_min': 0., 'clip_max': 1.} vat = VirtualAdversarialMethod(model_2, sess=sess) adv_x = vat.generate(x, **vat_params) preds_adv_vat = model_2.get_probs(adv_x) # ==> generate 10 targeted classes for every train data regardless # This call runs the Jacobian-based saliency map approach # Loop over the samples we want to perturb into adversarial examples X_train_adv_set = [] Y_train_adv_set = [] for index in range(X_train.shape[0]): print('--------------------------------------') x_val = X_train[index:(index+1)] y_val = Y_train[index] # add normal sample in!!!! X_train_adv_set.append(x_val) Y_train_adv_set.append(y_val) # We want to find an adversarial example for each possible target class # (i.e. all classes that differ from the label given in the dataset) current_class = int(np.argmax(y_val)) target_classes = other_classes(nb_classes, current_class) # Loop over all target classes for target in target_classes: # print('Generating adv. example for target class %i' % target) # This call runs the Jacobian-based saliency map approach one_hot_target = np.zeros((1, nb_classes), dtype=np.float32) one_hot_target[0, target] = 1 jsma_params['y_target'] = one_hot_target adv_x = jsma.generate_np(x_val, **jsma_params) # append to X_train_adv_set and Y_train_adv_set X_train_adv_set.append(adv_x) Y_train_adv_set.append(y_val) # shape is: (1, 28, 28, 1) # print("adv_x shape is: ", adv_x.shape) # check for success rate # res = int(model_argmax(sess, x, preds, adv_x) == target) print('-------------Finished Generating Np Adversarial Data-------------------------') X_train_data = np.concatenate(X_train_adv_set, axis=0) Y_train_data = np.stack(Y_train_adv_set, axis=0) print("X_train_data shape is: ", X_train_data.shape) print("Y_train_data shape is: ", Y_train_data.shape) # saves the output so later no need to re-fun file np.savez("jsma_training_data.npz", x_train=X_train_data , y_train=Y_train_data) # >>> data = np.load('/tmp/123.npz') # >>> data['a'] f_out = open("Adversarial_jsma_elastic_against5.log", "w") # evaluate the function against 5 attacks # fgsm, base iterative, jsma, elastic net, and deepfool def evaluate_against_all(): # 1 Clean Data eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params) print('Legitimate accuracy: %0.4f' % accuracy) tmp = 'Legitimate accuracy: '+ str(accuracy) + "\n" f_out.write(tmp) # 2 JSMA accuracy = model_eval(sess, x, y, preds_adv_random, X_test, Y_test, args=eval_params) print('JSMA accuracy: %0.4f' % accuracy) tmp = 'JSMA accuracy:'+ str(accuracy) + "\n" f_out.write(tmp) # 3 FGSM accuracy = model_eval(sess, x, y, preds_adv_fgsm, X_test, Y_test, args=eval_params) print('FGSM accuracy: %0.4f' % accuracy) tmp = 'FGSM accuracy:' + str(accuracy) + "\n" f_out.write(tmp) # 4 Base Iterative accuracy = model_eval(sess, x, y, preds_adv_base_iter, X_test, Y_test, args=eval_params) print('Base Iterative accuracy: %0.4f' % accuracy) tmp = 'Base Iterative accuracy:' + str(accuracy) + "\n" f_out.write(tmp) # 5 Elastic Net accuracy = model_eval(sess, x, y, preds_adv_elastic_net, X_test, Y_test, args=eval_params) print('Elastic Net accuracy: %0.4f' % accuracy) tmp = 'Elastic Net accuracy:' + str(accuracy) + "\n" f_out.write(tmp) # 6 DeepFool accuracy = model_eval(sess, x, y, preds_adv_deepfool, X_test, Y_test, args=eval_params) print('DeepFool accuracy: %0.4f' % accuracy) tmp = 'DeepFool accuracy:' + str(accuracy) + "\n" f_out.write(tmp) # 7 C & W Attack accuracy = model_eval(sess, x, y, preds_adv_cw, X_test, Y_test, args=eval_params) print('C & W accuracy: %0.4f' % accuracy) tmp = 'C & W accuracy:' + str(accuracy) + "\n" f_out.write(tmp) f_out.write("*******End of Epoch***********\n\n") # 8 Virtual Adversarial accuracy = model_eval(sess, x, y, preds_adv_vat, X_test, Y_test, args=eval_params) print('VAT accuracy: %0.4f' % accuracy) tmp = 'VAT accuracy:' + str(accuracy) + "\n" f_out.write(tmp) f_out.write("*******End of Epoch***********\n\n") print("*******End of Epoch***********\n\n") # report.adv_train_adv_eval = accuracy print("Now Adversarial Training with Elastic Net + modified X_train and Y_train") # trained_model.out train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': '/home/stephen/PycharmProjects/jsma-runall-mac/', 'filename': 'trained_model.out' } model_train(sess, x, y, preds_2, X_train_data, Y_train_data, predictions_adv=preds_adv_elastic_net, evaluate=evaluate_against_all, verbose=False, args=train_params, rng=rng) # Close TF session sess.close() return report
class TestBasicIterativeMethod(TestFastGradientMethod): def setUp(self): super(TestBasicIterativeMethod, self).setUp() self.sess = tf.Session() self.model = SimpleModel() self.attack = BasicIterativeMethod(self.model, sess=self.sess) def test_generate_np_gives_adversarial_example_linfinity(self): self.help_generate_np_gives_adversarial_example(ord=np.infty, eps=.5, nb_iter=20) def test_generate_np_gives_adversarial_example_l1(self): self.help_generate_np_gives_adversarial_example(ord=1, eps=.5, nb_iter=20) def test_generate_np_gives_adversarial_example_l2(self): self.help_generate_np_gives_adversarial_example(ord=2, eps=.5, nb_iter=20) def test_do_not_reach_lp_boundary(self): """ Make sure that iterative attack don't reach boundary of Lp neighbourhood if nb_iter * eps_iter is relatively small compared to epsilon. """ for ord in [1, 2, np.infty]: _, _, delta = self.generate_adversarial_examples_np( ord=ord, eps=.5, nb_iter=10, eps_iter=.01) self.assertTrue(np.max(0.5 - delta) > 0.25) def test_attack_strength(self): """ If clipping is not done at each iteration (not passing clip_min and clip_max to fgm), this attack fails by np.mean(orig_labels == new_labels) == .39. """ x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) x_adv = self.attack.generate_np(x_val, eps=1.0, ord=np.inf, clip_min=0.5, clip_max=0.7, nb_iter=5) orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue(np.mean(orig_labs == new_labs) < 0.1) def test_generate_np_does_not_cache_graph_computation_for_nb_iter(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) x_adv = self.attack.generate_np(x_val, eps=1.0, ord=np.inf, clip_min=-5.0, clip_max=5.0, nb_iter=10) orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue(np.mean(orig_labs == new_labs) < 0.1) ok = [False] old_grads = tf.gradients def fn(*x, **y): ok[0] = True return old_grads(*x, **y) tf.gradients = fn x_adv = self.attack.generate_np(x_val, eps=1.0, ord=np.inf, clip_min=-5.0, clip_max=5.0, nb_iter=11) orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue(np.mean(orig_labs == new_labs) < 0.1) tf.gradients = old_grads self.assertTrue(ok[0])
def train_zero_knowledge_gandef_model(train_start=0, train_end=60000, test_start=0, test_end=10000, smoke_test=True, save=False, testing=False, backprop_through_attack=False, num_threads=None): """ MNIST cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param train_batch_size: size of training batches :param test_batch_size: size of testing batches :param learning_rate: learning rate for training :param save: if true, the final model will be saved :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get MNIST test data X_train, Y_train, X_test, Y_test = data_fashion_mnist() if smoke_test: X_train, Y_train, X_test, Y_test = X_train[: 256], Y_train[: 256], X_test[: 256], Y_test[: 256] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) y_soft = tf.placeholder(tf.float32, shape=(None, 10)) # Prepare optimizer learning_rate = 1e-4 clf_opt = tf.train.AdamOptimizer(learning_rate) dic_opt = tf.train.AdamOptimizer(learning_rate * 10) # Train an MNIST model train_params = { 'nb_epochs': 80, 'batch_size': 128, 'trade_off': 2, 'inner_epochs': 1 } rng = np.random.RandomState([2017, 8, 30]) # Adversarial training print("Start adversarial training") zero_knowledge_gandef_model = make_zero_knowledge_gandef_model( name="model_zero_knowledge_gandef") aug_x = gaussian_augment(x, std=1) preds_clean = zero_knowledge_gandef_model(x) preds_aug = zero_knowledge_gandef_model(aug_x) def cross_entropy(truth, preds, mean=True): # Get the logits operator op = preds.op if op.type == "Softmax": logits, = op.inputs else: logits = preds # Calculate cross entropy loss out = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=truth) # Take average loss and return if mean: out = tf.reduce_mean(out) return out def sigmoid_entropy(truth, preds, mean=True): # Get the logits operator op = preds.op if op.type == "Softmax": logits, = op.inputs else: logits = preds # Calculate cross entropy loss out = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=truth) # Take average loss and return if mean: out = tf.reduce_mean(out) return out # Perform and evaluate adversarial training gan_train_v2(sess, x, y_soft, preds_clean, X_train, Y_train, loss_func=[cross_entropy, sigmoid_entropy], optimizer=[clf_opt, dic_opt], predictions_adv=preds_aug, evaluate=None, args=train_params, rng=rng, var_list=zero_knowledge_gandef_model.get_gan_params()) # Evaluate the accuracy of the MNIST model on Clean examples preds_clean = zero_knowledge_gandef_model(x) eval_params = { 'batch_size': 128, 'use_dic': False, 'is_clean': True, 'reject_threshold': 0.5 } clean_acc = confident_model_eval(sess, x, y_soft, preds_clean, X_test, Y_test, args=eval_params) print('Test accuracy on Clean test examples: %0.4f\n' % clean_acc) report.adv_train_clean_eval = clean_acc # Evaluate the accuracy of the MNIST model on FGSM examples fgsm_params = {'eps': 0.6, 'clip_min': -1., 'clip_max': 1.} fgsm_att = FastGradientMethod(zero_knowledge_gandef_model, sess=sess) fgsm_adv = fgsm_att.generate(x, **fgsm_params) preds_fgsm_adv = zero_knowledge_gandef_model(fgsm_adv) eval_params = { 'batch_size': 128, 'use_dic': False, 'is_clean': False, 'reject_threshold': 0.5 } fgsm_acc = confident_model_eval(sess, x, y_soft, preds_fgsm_adv, X_test, Y_test, args=eval_params) print('Test accuracy on FGSM test examples: %0.4f\n' % fgsm_acc) report.adv_train_adv_eval = fgsm_acc # Evaluate the accuracy of the MNIST model on BIM examples bim_params = {'eps': 0.6, 'eps_iter': 0.1, 'clip_min': -1., 'clip_max': 1.} bim_att = BasicIterativeMethod(zero_knowledge_gandef_model, sess=sess) bim_adv = bim_att.generate(x, **bim_params) preds_bim_adv = zero_knowledge_gandef_model(bim_adv) eval_params = { 'batch_size': 128, 'use_dic': False, 'is_clean': False, 'reject_threshold': 0.5 } bim_acc = confident_model_eval(sess, x, y_soft, preds_bim_adv, X_test, Y_test, args=eval_params) print('Test accuracy on BIM test examples: %0.4f\n' % bim_acc) report.adv_train_adv_eval = bim_acc # Evaluate the accuracy of the MNIST model on PGD examples pgd_params = { 'eps': 0.6, 'eps_iter': 0.02, 'nb_iter': 40, 'clip_min': -1., 'clip_max': 1., 'rand_init': True } pgd_att = MadryEtAl(zero_knowledge_gandef_model, sess=sess) pgd_adv = pgd_att.generate(x, **bim_params) preds_pgd_adv = zero_knowledge_gandef_model(pgd_adv) eval_params = { 'batch_size': 128, 'use_dic': False, 'is_clean': False, 'reject_threshold': 0.5 } pgd_acc = confident_model_eval(sess, x, y_soft, preds_pgd_adv, X_test, Y_test, args=eval_params) print('Test accuracy on PGD test examples: %0.4f\n' % pgd_acc) report.adv_train_adv_eval = pgd_acc # Save model if save: model_path = "models/zero_knowledge_gandef" vars_to_save = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='model_zero_knowledge_gandef*') assert len(vars_to_save) > 0 saver = tf.train.Saver(var_list=vars_to_save) saver.save(sess, model_path) print('Model saved\n') else: print('Model not saved\n')