def deep_fool_attack(): counter = 0 image_iterator = importer.load_images_generator(importer.batch_shape) tf.reset_default_graph() sess = tf.Session() x_input = tf.placeholder(tf.float32, shape=importer.batch_shape) folder_path = os.path.join(config.ADVERSARIAL_FOLDER, "deep_full_base") os.makedirs(folder_path, exist_ok=True) while True: with tf.Session() as sess: filenames, images = next(image_iterator, (None, None)) model = Inception_V3_Model(np.float32(images)) params = {} attack = DeepFool(model=model, sess=sess) params['max_iter'] = 5 variables = tf.get_collection(tf.GraphKeys.VARIABLES) saver = tf.train.Saver(variables) saver.restore(sess, importer.checkpoint_path) x_adv = attack.generate(x_input, **params) #writer = tf.summary.FileWriter("/tmp/log/", sess.graph) adversarial_images = sess.run(x_adv, feed_dict={x_input: images}) utils.image_saver(advesrsarial_images, filenames, folder_path) print("adversarial_images counter:{}".format(counter)) #writer.close() counter += 1 if counter == 999: print("Attack is finished") break
class TestDeepFool(CleverHansTest): def setUp(self): super(TestDeepFool, self).setUp() self.sess = tf.Session() self.model = SimpleModel() self.attack = DeepFool(self.model, sess=self.sess) def test_generate_np_gives_adversarial_example(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) x_adv = self.attack.generate_np(x_val, over_shoot=0.02, max_iter=50, nb_candidate=2, clip_min=-5, clip_max=5) orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue(np.mean(orig_labs == new_labs) < 0.1) def test_generate_gives_adversarial_example(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1) x = tf.placeholder(tf.float32, x_val.shape) x_adv_p = self.attack.generate(x, over_shoot=0.02, max_iter=50, nb_candidate=2, clip_min=-5, clip_max=5) self.assertEqual(x_val.shape, x_adv_p.shape) x_adv = self.sess.run(x_adv_p, {x: x_val}) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue(np.mean(orig_labs == new_labs) < 0.1) def test_generate_np_gives_clipped_adversarial_examples(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) x_adv = self.attack.generate_np(x_val, over_shoot=0.02, max_iter=50, nb_candidate=2, clip_min=-0.2, clip_max=0.3) self.assertTrue(-0.201 < np.min(x_adv)) self.assertTrue(np.max(x_adv) < .301)
class TestDeepFool(CleverHansTest): def setUp(self): super(TestDeepFool, self).setUp() import tensorflow as tf # The world's simplest neural network def my_model(x): W1 = tf.constant([[1.5, .3], [-2, 0.3]], dtype=tf.float32) h1 = tf.nn.sigmoid(tf.matmul(x, W1)) W2 = tf.constant([[-2.4, 1.2], [0.5, -2.3]], dtype=tf.float32) res = tf.matmul(h1, W2) return res self.sess = tf.Session() self.model = my_model self.attack = DeepFool(self.model, sess=self.sess) def test_generate_np_gives_adversarial_example(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) x_adv = self.attack.generate_np(x_val, over_shoot=0.02, max_iter=50, nb_candidate=2, clip_min=-5, clip_max=5) orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue(np.mean(orig_labs == new_labs) < 0.1) def test_generate_gives_adversarial_example(self): import tensorflow as tf x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1) x = tf.placeholder(tf.float32, x_val.shape) x_adv_p = self.attack.generate(x, over_shoot=0.02, max_iter=50, nb_candidate=2, clip_min=-5, clip_max=5) x_adv = self.sess.run(x_adv_p, {x: x_val}) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue(np.mean(orig_labs == new_labs) < 0.1) def test_generate_np_gives_clipped_adversarial_examples(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) x_adv = self.attack.generate_np(x_val, over_shoot=0.02, max_iter=50, nb_candidate=2, clip_min=-0.2, clip_max=0.3) self.assertTrue(-0.201 < np.min(x_adv)) self.assertTrue(np.max(x_adv) < .301)
def _DeepFool(self): deepfool_attack = DeepFool(self.wrapped_model, sess=self.sess) params = { 'nb_candidate': 10, 'max_iter': 100, 'clip_min': 0., 'clip_max': 1., 'verbose': False } adv_x = deepfool_attack.generate(self.x, **params) self.save_images(adv_x, self.save_loc)
class DeepFoolAttack(AdversarialAttack): def __init__(self, model, n_candidates=10, overshoot=0.02, max_iterations=50, clip_min=-1., clip_max=1.): super().__init__(model=model, clip_min=clip_min, clip_max=clip_max) self._n_candidate = n_candidates self._overshoot = overshoot self._max_iterations = max_iterations with self.graph.as_default(): self._method = DeepFool(self._model, sess=self.session, nb_candidate=self._n_candidate, overshoot=self._overshoot, max_iter=self._max_iterations, nb_classes=self.n_classes, clip_min=self._clip_min, clip_max=self._clip_max) def attack_method(self, labels): return self._method.generate(x=self._x_clean)
def df(X, which, prob, magn): wrapped = DeepFool(KerasModelWrapper(which.model), sess=session) X = X.copy() idx = np.random.uniform(size=len(X)) idx = np.where(idx < prob)[0] for i in tqdm(range(0, len(idx), CHILD_BATCH_SIZE), desc=f'batch: ', leave=False): tensor = tf.convert_to_tensor(X[idx[i:i + CHILD_BATCH_SIZE]]) init = tf.global_variables_initializer() session.run(init) tensor = wrapped.generate(tensor, clip_min=0., clip_max=magn * 0.3 + 0.3) X[idx[i:i + CHILD_BATCH_SIZE]] = session.run(tensor) return X
class TestDeepFool(CleverHansTest): def setUp(self): super(TestDeepFool, self).setUp() self.sess = tf.Session() self.model = SimpleModel() self.attack = DeepFool(self.model, sess=self.sess) def test_generate_np_gives_adversarial_example(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) x_adv = self.attack.generate_np(x_val, over_shoot=0.02, max_iter=50, nb_candidate=2, clip_min=-5, clip_max=5) orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue(np.mean(orig_labs == new_labs) < 0.1) def test_generate_gives_adversarial_example(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1) x = tf.placeholder(tf.float32, x_val.shape) x_adv_p = self.attack.generate(x, over_shoot=0.02, max_iter=50, nb_candidate=2, clip_min=-5, clip_max=5) x_adv = self.sess.run(x_adv_p, {x: x_val}) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue(np.mean(orig_labs == new_labs) < 0.1) def test_generate_np_gives_clipped_adversarial_examples(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) x_adv = self.attack.generate_np(x_val, over_shoot=0.02, max_iter=50, nb_candidate=2, clip_min=-0.2, clip_max=0.3) self.assertTrue(-0.201 < np.min(x_adv)) self.assertTrue(np.max(x_adv) < .301)
def next_images(): tf.logging.set_verbosity(tf.logging.INFO) print("{} generator graph is ready!".format(mode)) tf.reset_default_graph() sess = tf.Session() x_input = tf.placeholder(tf.float32, shape=importer.batch_shape) params = {} model = InceptionModelLogits(importer.num_classes, x_input) if mode == 'deep_fool': graph = DeepFool(model, sess=sess) params['max_iter'] = 5 else: raise Exception("Not supported mode") print('graph params: {}'.format(params)) variables = tf.get_collection(tf.GraphKeys.VARIABLES) saver = tf.train.Saver(variables) saver.restore(sess, importer.checkpoint_path) image_iterator = importer.load_images_generator(batch_shape) while True: filenames, images = next(image_iterator, (None, None)) if filenames is None: break true_classes = importer.filename_to_class(filenames) target = np.expand_dims(np.zeros(importer.num_classes), 1) if mode == 'carlini_wagner': assert (len(true_classes) == 1) target[true_classes[0]] = 1 params["y"] = target x_adv = graph.generate(x_input, **params) adversarial_images = sess.run(x_adv, feed_dict={x_input: images}) print("Image:{}, diff:{}".format( filenames[0], np.sum(np.abs(images[0] - adversarial_images[0])))) if is_return_orig_images: yield filenames, adversarial_images, images else: yield filenames, adversarial_images
def mnist_tutorial_jsma(train_start=0, train_end=5500, test_start=0, test_end=1000, nb_epochs=8, batch_size=100, nb_classes=10, nb_filters=64, learning_rate=0.001): """ MNIST tutorial for the Jacobian-based saliency map approach (JSMA) :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param nb_classes: number of output classes :param learning_rate: learning rate for training :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Create TF session and set as Keras backend session sess = tf.Session() print("Created TensorFlow session.") set_log_level(logging.DEBUG) # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) label_smooth = .1 Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) y = tf.placeholder(tf.float32, shape=(None, 10)) # Define TF model graph model = make_basic_cnn() preds = model(x) print("Defined TensorFlow model graph.") ########################################################################### # Training the model using TensorFlow ########################################################################### # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } # sess.run(tf.global_variables_initializer()) rng = np.random.RandomState([2017, 8, 30]) print("x_train shape: ", X_train.shape) print("y_train shape: ", Y_train.shape) # do not log model_train(sess, x, y, preds, X_train, Y_train, args=train_params,verbose=False, rng=rng) f_out_clean = open("Clean_jsma_elastic_against5.log", "w") # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params) assert X_test.shape[0] == test_end - test_start, X_test.shape print('Test accuracy on legitimate test examples: {0}'.format(accuracy)) f_out_clean.write('Test accuracy on legitimate test examples: ' + str(accuracy) + '\n') # Clean test against JSMA jsma_params = {'theta': 1., 'gamma': 0.1, 'clip_min': 0., 'clip_max': 1., 'y_target': None} jsma = SaliencyMapMethod(model, back='tf', sess=sess) adv_x_jsma = jsma.generate(x, **jsma_params) preds_adv_jsma = model.get_probs(adv_x_jsma) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv_jsma, X_test, Y_test, args=eval_params) print('Clean test accuracy on JSMA adversarial examples: %0.4f' % acc) f_out_clean.write('Clean test accuracy on JSMA adversarial examples: ' + str(acc) + '\n') ################################################################ # Clean test against FGSM fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} fgsm = FastGradientMethod(model, sess=sess) adv_x_fgsm = fgsm.generate(x, **fgsm_params) preds_adv_fgsm = model.get_probs(adv_x_fgsm) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv_fgsm, X_test, Y_test, args=eval_params) print('Clean test accuracy on FGSM adversarial examples: %0.4f' % acc) f_out_clean.write('Clean test accuracy on FGSM adversarial examples: ' + str(acc) + '\n') ################################################################ # Clean test against BIM bim_params = {'eps': 0.3, 'eps_iter': 0.01, 'nb_iter': 100, 'clip_min': 0., 'clip_max': 1.} bim = BasicIterativeMethod(model, sess=sess) adv_x_bim = bim.generate(x, **bim_params) preds_adv_bim = model.get_probs(adv_x_bim) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv_bim, X_test, Y_test, args=eval_params) print('Clean test accuracy on BIM adversarial examples: %0.4f' % acc) f_out_clean.write('Clean test accuracy on BIM adversarial examples: ' + str(acc) + '\n') ################################################################ # Clean test against EN en_params = {'binary_search_steps': 1, # 'y': None, 'max_iterations': 100, 'learning_rate': 0.1, 'batch_size': batch_size, 'initial_const': 10} en = ElasticNetMethod(model, back='tf', sess=sess) adv_x_en = en.generate(x, **en_params) preds_adv_en = model.get_probs(adv_x_en) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv_en, X_test, Y_test, args=eval_params) print('Clean test accuracy on EN adversarial examples: %0.4f' % acc) f_out_clean.write('Clean test accuracy on EN adversarial examples: ' + str(acc) + '\n') ################################################################ # Clean test against DF deepfool_params = {'nb_candidate': 10, 'overshoot': 0.02, 'max_iter': 50, 'clip_min': 0., 'clip_max': 1.} deepfool = DeepFool(model, sess=sess) adv_x_df = deepfool.generate(x, **deepfool_params) preds_adv_df = model.get_probs(adv_x_df) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv_df, X_test, Y_test, args=eval_params) print('Clean test accuracy on DF adversarial examples: %0.4f' % acc) f_out_clean.write('Clean test accuracy on DF adversarial examples: ' + str(acc) + '\n') ################################################################ # Clean test against VAT vat_params = {'eps': 2.0, 'num_iterations': 1, 'xi': 1e-6, 'clip_min': 0., 'clip_max': 1.} vat = VirtualAdversarialMethod(model, sess=sess) adv_x_vat = vat.generate(x, **vat_params) preds_adv_vat = model.get_probs(adv_x_vat) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv_vat, X_test, Y_test, args=eval_params) print('Clean test accuracy on VAT adversarial examples: %0.4f\n' % acc) f_out_clean.write('Clean test accuracy on VAT adversarial examples: ' + str(acc) + '\n') f_out_clean.close() ########################################################################### # Craft adversarial examples using the Jacobian-based saliency map approach ########################################################################### print('Crafting ' + str(X_train.shape[0]) + ' * ' + str(nb_classes-1) + ' adversarial examples') model_2 = make_basic_cnn() preds_2 = model(x) # need this for constructing the array sess.run(tf.global_variables_initializer()) # run this again # sess.run(tf.global_variables_initializer()) # 1. Instantiate a SaliencyMapMethod attack object jsma = SaliencyMapMethod(model_2, back='tf', sess=sess) jsma_params = {'theta': 1., 'gamma': 0.1, 'clip_min': 0., 'clip_max': 1., 'y_target': None} adv_random = jsma.generate(x, **jsma_params) preds_adv_random = model_2.get_probs(adv_random) # 2. Instantiate FGSM attack fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} fgsm = FastGradientMethod(model_2, sess=sess) adv_x_fgsm = fgsm.generate(x, **fgsm_params) preds_adv_fgsm = model_2.get_probs(adv_x_fgsm) # 3. Instantiate Elastic net attack en_params = {'binary_search_steps': 5, #'y': None, 'max_iterations': 100, 'learning_rate': 0.1, 'batch_size': batch_size, 'initial_const': 10} enet = ElasticNetMethod(model_2, sess=sess) adv_x_en = enet.generate(x, **en_params) preds_adv_elastic_net = model_2.get_probs(adv_x_en) # 4. Deepfool deepfool_params = {'nb_candidate':10, 'overshoot':0.02, 'max_iter': 50, 'clip_min': 0., 'clip_max': 1.} deepfool = DeepFool(model_2, sess=sess) adv_x_df = deepfool.generate(x, **deepfool_params) preds_adv_deepfool = model_2.get_probs(adv_x_df) # 5. Base Iterative bim_params = {'eps': 0.3, 'eps_iter': 0.01, 'nb_iter': 100, 'clip_min': 0., 'clip_max': 1.} base_iter = BasicIterativeMethod(model_2, sess=sess) adv_x_bi = base_iter.generate(x, **bim_params) preds_adv_base_iter = model_2.get_probs(adv_x_bi) # 6. C & W Attack cw = CarliniWagnerL2(model_2, back='tf', sess=sess) cw_params = {'binary_search_steps': 1, # 'y': None, 'max_iterations': 100, 'learning_rate': 0.1, 'batch_size': batch_size, 'initial_const': 10} adv_x_cw = cw.generate(x, **cw_params) preds_adv_cw = model_2.get_probs(adv_x_cw) #7 vat_params = {'eps': 2.0, 'num_iterations': 1, 'xi': 1e-6, 'clip_min': 0., 'clip_max': 1.} vat = VirtualAdversarialMethod(model_2, sess=sess) adv_x = vat.generate(x, **vat_params) preds_adv_vat = model_2.get_probs(adv_x) # ==> generate 10 targeted classes for every train data regardless # This call runs the Jacobian-based saliency map approach # Loop over the samples we want to perturb into adversarial examples X_train_adv_set = [] Y_train_adv_set = [] for index in range(X_train.shape[0]): print('--------------------------------------') x_val = X_train[index:(index+1)] y_val = Y_train[index] # add normal sample in!!!! X_train_adv_set.append(x_val) Y_train_adv_set.append(y_val) # We want to find an adversarial example for each possible target class # (i.e. all classes that differ from the label given in the dataset) current_class = int(np.argmax(y_val)) target_classes = other_classes(nb_classes, current_class) # Loop over all target classes for target in target_classes: # print('Generating adv. example for target class %i' % target) # This call runs the Jacobian-based saliency map approach one_hot_target = np.zeros((1, nb_classes), dtype=np.float32) one_hot_target[0, target] = 1 jsma_params['y_target'] = one_hot_target adv_x = jsma.generate_np(x_val, **jsma_params) # append to X_train_adv_set and Y_train_adv_set X_train_adv_set.append(adv_x) Y_train_adv_set.append(y_val) # shape is: (1, 28, 28, 1) # print("adv_x shape is: ", adv_x.shape) # check for success rate # res = int(model_argmax(sess, x, preds, adv_x) == target) print('-------------Finished Generating Np Adversarial Data-------------------------') X_train_data = np.concatenate(X_train_adv_set, axis=0) Y_train_data = np.stack(Y_train_adv_set, axis=0) print("X_train_data shape is: ", X_train_data.shape) print("Y_train_data shape is: ", Y_train_data.shape) # saves the output so later no need to re-fun file np.savez("jsma_training_data.npz", x_train=X_train_data , y_train=Y_train_data) # >>> data = np.load('/tmp/123.npz') # >>> data['a'] f_out = open("Adversarial_jsma_elastic_against5.log", "w") # evaluate the function against 5 attacks # fgsm, base iterative, jsma, elastic net, and deepfool def evaluate_against_all(): # 1 Clean Data eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params) print('Legitimate accuracy: %0.4f' % accuracy) tmp = 'Legitimate accuracy: '+ str(accuracy) + "\n" f_out.write(tmp) # 2 JSMA accuracy = model_eval(sess, x, y, preds_adv_random, X_test, Y_test, args=eval_params) print('JSMA accuracy: %0.4f' % accuracy) tmp = 'JSMA accuracy:'+ str(accuracy) + "\n" f_out.write(tmp) # 3 FGSM accuracy = model_eval(sess, x, y, preds_adv_fgsm, X_test, Y_test, args=eval_params) print('FGSM accuracy: %0.4f' % accuracy) tmp = 'FGSM accuracy:' + str(accuracy) + "\n" f_out.write(tmp) # 4 Base Iterative accuracy = model_eval(sess, x, y, preds_adv_base_iter, X_test, Y_test, args=eval_params) print('Base Iterative accuracy: %0.4f' % accuracy) tmp = 'Base Iterative accuracy:' + str(accuracy) + "\n" f_out.write(tmp) # 5 Elastic Net accuracy = model_eval(sess, x, y, preds_adv_elastic_net, X_test, Y_test, args=eval_params) print('Elastic Net accuracy: %0.4f' % accuracy) tmp = 'Elastic Net accuracy:' + str(accuracy) + "\n" f_out.write(tmp) # 6 DeepFool accuracy = model_eval(sess, x, y, preds_adv_deepfool, X_test, Y_test, args=eval_params) print('DeepFool accuracy: %0.4f' % accuracy) tmp = 'DeepFool accuracy:' + str(accuracy) + "\n" f_out.write(tmp) # 7 C & W Attack accuracy = model_eval(sess, x, y, preds_adv_cw, X_test, Y_test, args=eval_params) print('C & W accuracy: %0.4f' % accuracy) tmp = 'C & W accuracy:' + str(accuracy) + "\n" f_out.write(tmp) f_out.write("*******End of Epoch***********\n\n") # 8 Virtual Adversarial accuracy = model_eval(sess, x, y, preds_adv_vat, X_test, Y_test, args=eval_params) print('VAT accuracy: %0.4f' % accuracy) tmp = 'VAT accuracy:' + str(accuracy) + "\n" f_out.write(tmp) f_out.write("*******End of Epoch***********\n\n") print("*******End of Epoch***********\n\n") # report.adv_train_adv_eval = accuracy print("Now Adversarial Training with Elastic Net + modified X_train and Y_train") # trained_model.out train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': '/home/stephen/PycharmProjects/jsma-runall-mac/', 'filename': 'trained_model.out' } model_train(sess, x, y, preds_2, X_train_data, Y_train_data, predictions_adv=preds_adv_elastic_net, evaluate=evaluate_against_all, verbose=False, args=train_params, rng=rng) # Close TF session sess.close() return report
def JSMA_FGSM_BIM(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=6, batch_size=128, learning_rate=0.001, clean_train=True, testing=False, backprop_through_attack=False, nb_filters=64): """ MNIST cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param clean_train: if true, train on clean examples :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session sess = tf.Session() # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) source_samples = batch_size # Use label smoothing # Hopefully this doesn't screw up JSMA... assert Y_train.shape[1] == 10 label_smooth = .1 Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) y = tf.placeholder(tf.float32, shape=(None, 10)) model_path = "models/mnist" # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } eval_par = {'batch_size': batch_size} rng = np.random.RandomState([2017, 8, 30]) if clean_train: model = make_basic_cnn(nb_filters=nb_filters) preds = model.get_probs(x) def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test # examples eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params) report.clean_train_clean_eval = acc assert X_test.shape[0] == test_end - test_start, X_test.shape print('Test accuracy on legitimate examples: %0.4f' % acc) model_train(sess, x, y, preds, X_train, Y_train, evaluate=evaluate, args=train_params, rng=rng) print("#####Starting attacks on clean model#####") ################################################################# #Clean test against JSMA jsma_params = { 'theta': 1., 'gamma': 0.1, 'clip_min': 0., 'clip_max': 1., 'y_target': None } jsma = SaliencyMapMethod(model, back='tf', sess=sess) adv_x = jsma.generate(x, **jsma_params) preds_adv = model.get_probs(adv_x) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print('Clean test accuracy on JSMA adversarial examples: %0.4f' % acc) ################################################################ #Clean test against FGSM fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} fgsm = FastGradientMethod(model, sess=sess) adv_x = fgsm.generate(x, **fgsm_params) preds_adv = model.get_probs(adv_x) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print('Clean test accuracy on FGSM adversarial examples: %0.4f' % acc) ################################################################ #Clean test against BIM bim_params = { 'eps': 0.3, 'eps_iter': 0.01, 'nb_iter': 100, 'clip_min': 0., 'clip_max': 1. } bim = BasicIterativeMethod(model, sess=sess) adv_x = bim.generate(x, **bim_params) preds_adv = model.get_probs(adv_x) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print('Clean test accuracy on BIM adversarial examples: %0.4f' % acc) ################################################################ #Clean test against EN en_params = { 'binary_search_steps': 1, #'y': None, 'max_iterations': 100, 'learning_rate': 0.1, 'batch_size': source_samples, 'initial_const': 10 } en = ElasticNetMethod(model, back='tf', sess=sess) adv_x = en.generate(x, **en_params) preds_adv = model.get_probs(adv_x) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print('Clean test accuracy on EN adversarial examples: %0.4f' % acc) ################################################################ #Clean test against DF deepfool_params = { 'nb_candidate': 10, 'overshoot': 0.02, 'max_iter': 50, 'clip_min': 0., 'clip_max': 1. } deepfool = DeepFool(model, sess=sess) adv_x = deepfool.generate(x, **deepfool_params) preds_adv = model.get_probs(adv_x) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print('Clean test accuracy on DF adversarial examples: %0.4f' % acc) ################################################################ #Clean test against VAT vat_params = { 'eps': 2.0, 'num_iterations': 1, 'xi': 1e-6, 'clip_min': 0., 'clip_max': 1. } vat = VirtualAdversarialMethod(model, sess=sess) adv_x = vat.generate(x, **vat_params) preds_adv = model.get_probs(adv_x) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print('Clean test accuracy on VAT adversarial examples: %0.4f\n' % acc) ################################################################ print("Repeating the process, using adversarial training\n") # Redefine TF model graph model_2 = make_basic_cnn(nb_filters=nb_filters) preds_2 = model_2(x) ################################################################# #Adversarial test against JSMA jsma_params = { 'theta': 1., 'gamma': 0.1, 'clip_min': 0., 'clip_max': 1., 'y_target': None } jsma = SaliencyMapMethod(model, back='tf', sess=sess) adv_x = jsma.generate(x, **jsma_params) preds_adv_jsma = model.get_probs(adv_x) ################################################################ #Adversarial test against FGSM fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} fgsm = FastGradientMethod(model, sess=sess) adv_x = fgsm.generate(x, **fgsm_params) preds_adv_fgsm = model.get_probs(adv_x) ################################################################ #Adversarial test against BIM bim_params = { 'eps': 0.3, 'eps_iter': 0.01, 'nb_iter': 100, 'clip_min': 0., 'clip_max': 1. } bim = BasicIterativeMethod(model, sess=sess) adv_x = bim.generate(x, **bim_params) preds_adv_bim = model.get_probs(adv_x) ################################################################ #Adversarial test against EN en_params = { 'binary_search_steps': 5, #'y': None, 'max_iterations': 100, 'learning_rate': 0.1, 'batch_size': source_samples, 'initial_const': 10 } en = ElasticNetMethod(model, back='tf', sess=sess) adv_x = en.generate(x, **en_params) preds_adv_en = model.get_probs(adv_x) ################################################################ #Adversarial test against DF deepfool_params = { 'nb_candidate': 10, 'overshoot': 0.02, 'max_iter': 200, 'clip_min': 0., 'clip_max': 1. } deepfool = DeepFool(model, sess=sess) adv_x = deepfool.generate(x, **deepfool_params) preds_adv_df = model.get_probs(adv_x) ################################################################ #Adversarial test against VAT vat_params = { 'eps': 2.0, 'num_iterations': 1, 'xi': 1e-6, 'clip_min': 0., 'clip_max': 1. } vat = VirtualAdversarialMethod(model, sess=sess) adv_x = vat.generate(x, **vat_params) preds_adv_vat = model.get_probs(adv_x) ################################################################ print("#####Evaluate trained model#####") def evaluate_2(): # Evaluate the accuracy of the MNIST model on JSMA adversarial examples acc = model_eval(sess, x, y, preds_adv_jsma, X_test, Y_test, args=eval_par) print('Test accuracy on JSMA adversarial examples: %0.4f' % acc) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv_fgsm, X_test, Y_test, args=eval_par) print('Test accuracy on FGSM adversarial examples: %0.4f' % acc) # Evaluate the accuracy of the MNIST model on BIM adversarial examples acc = model_eval(sess, x, y, preds_adv_bim, X_test, Y_test, args=eval_par) print('Test accuracy on BIM adversarial examples: %0.4f' % acc) # Evaluate the accuracy of the MNIST model on EN adversarial examples acc = model_eval(sess, x, y, preds_adv_en, X_test, Y_test, args=eval_par) print('Test accuracy on EN adversarial examples: %0.4f' % acc) # Evaluate the accuracy of the MNIST model on DF adversarial examples acc = model_eval(sess, x, y, preds_adv_df, X_test, Y_test, args=eval_par) print('Test accuracy on DF adversarial examples: %0.4f' % acc) # Evaluate the accuracy of the MNIST model on VAT adversarial examples acc = model_eval(sess, x, y, preds_adv_vat, X_test, Y_test, args=eval_par) print('Test accuracy on VAT adversarial examples: %0.4f\n' % acc) preds_2_adv = [ preds_adv_jsma, preds_adv_fgsm, preds_adv_bim # ,preds_adv_en # ,preds_adv_df ] model_train(sess, x, y, preds_2, X_train, Y_train, predictions_adv=preds_2_adv, evaluate=evaluate_2, args=train_params, rng=rng)
def mnist_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, train_dir=TRAIN_DIR, filename=FILENAME, load_model=LOAD_MODEL, testing=True, label_smoothing=0.1): """ MNIST CleverHans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param train_dir: Directory storing the saved model :param filename: Filename to save model under :param load_model: True for load, False for not load :param testing: if true, test error is calculated :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ tf.keras.backend.set_learning_phase(0) # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) if keras.backend.image_data_format() != 'channels_last': raise NotImplementedError("this tutorial requires keras to be configured to channels_last format") # Create TF session and set as Keras backend session sess = tf.Session() keras.backend.set_session(sess) # Get MNIST test data mnist = MNIST(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) x_train, y_train = mnist.get_set('train') x_test, y_test = mnist.get_set('test') # Obtain Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Define TF model graph model = cnn_model(img_rows=img_rows, img_cols=img_cols, channels=nchannels, nb_filters=64, nb_classes=nb_classes) preds = model(x) print("Defined TensorFlow model graph.") def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params) report.clean_train_clean_eval = acc print('Test accuracy on legitimate examples: %0.4f' % acc) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': train_dir, 'filename': filename } rng = np.random.RandomState([2017, 8, 30]) if not os.path.exists(train_dir): os.mkdir(train_dir) ckpt = tf.train.get_checkpoint_state(train_dir) print(train_dir, ckpt) ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path wrap = KerasModelWrapper(model) if load_model and ckpt_path: saver = tf.train.Saver() print(ckpt_path) saver.restore(sess, ckpt_path) print("Model loaded from: {}".format(ckpt_path)) evaluate() else: print("Model was not loaded, training from scratch.") loss = CrossEntropy(wrap, smoothing=label_smoothing) train(sess, loss, x_train, y_train, evaluate=evaluate, args=train_params, rng=rng) # Calculate training error if testing: eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, x_train, y_train, args=eval_params) report.train_clean_train_clean_eval = acc df = DeepFool(wrap, sess=sess) adv_x = df.generate(x) batch = 1000 x_adv_test = None x_adv_train = None for i in tqdm(range(int(len(x_test) / batch))): tmp = sess.run(adv_x, feed_dict={x: x_test[i*batch:(i+1)*batch]}) if x_adv_test is None: x_adv_test = tmp else: x_adv_test = np.concatenate((x_adv_test, tmp)) for i in tqdm(range(int(len(x_train) / batch))): tmp = sess.run(adv_x, feed_dict={x: x_train[i*batch:(i+1)*batch]}) if x_adv_train is None: x_adv_train = tmp else: x_adv_train = np.concatenate((x_adv_train, tmp)) def evaluate_adv(): # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, x_adv_test, y_test, args=eval_params) report.clean_train_clean_eval = acc print('Test accuracy on legitimate examples: %0.4f' % acc) evaluate_adv() save_list = [x_adv_train, x_adv_test] print(x_adv_train.shape) print(x_adv_test.shape) pickle.dump(save_list, open("./df.pkl", 'wb'))
x_input1 = tf.placeholder(tf.float32, shape=batch_shape) x_input2 = tf.placeholder(tf.float32, shape=batch_shape) prediction = model(x_input2, x_input1) # prediction = sess.run(predictions, feed_dict={phase_train_placeholder: False}) # Define FGSM for the model steps = 1 df_params = { 'nb_classes': 2, 'max_iter': 150, 'clip_min': 0.0, 'clip_max': 1.0, 'nb_candidate': 2 } DF = DeepFool(model, back='tf', sess=sess) adv_x = DF.generate(x_input1, x_input2, faces2_batch, **df_params) inception_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='InceptionResnetV1') saver = tf.train.Saver(inception_vars, max_to_keep=3) # sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) pretrained_model = '/home/fan/facenet_adversarial_faces/models/facenet/20170512-110547/' if pretrained_model: print('Restoring pretrained model: %s' % pretrained_model) # facenet.load_model(pretrained_model) model_exp = os.path.expanduser(pretrained_model) print('Model directory: %s' % model_exp) _, ckpt_file = facenet.get_model_filenames(model_exp) # print('Metagraph file: %s' % meta_file) print('Checkpoint file: %s' % ckpt_file)
def eval(sess, model_name, X_train, Y_train, X_test, Y_test, cnn=False, rbf=False, fgsm=False, jsma=False, df=False, bim=False): """ Load model saved in model_name.json and model_name_weights.h5 and evaluate its accuracy on legitimate test samples and adversarial samples. Use cnn=True if the model is CNN based. """ # open text file and output accuracy results to it text_file = open("cifar_results.txt", "w") # load saved model print("Load model ... ") ''' json = open('models/{}.json'.format(model_name), 'r') model = json.read() json.close() loaded_model = model_from_json(model) loaded_model.load_weights("models/{}_weights.h5".format(model_name)) ''' if rbf: loaded_model = load_model("rbfmodels/{}.h5".format(model_name), custom_objects={'RBFLayer': RBFLayer}) text_file.write('Evaluating on rbfmodels/{}.h5\n\n'.format(model_name)) else: loaded_model = load_model("models/{}.h5".format(model_name)) text_file.write('Evaluating on models/{}.h5\n\n'.format(model_name)) # Set placeholders if cnn: x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3)) else: x = tf.placeholder(tf.float32, shape=(None, 3072)) y = tf.placeholder(tf.float32, shape=(None, 10)) predictions = loaded_model(x) accuracy = model_eval(sess, x, y, predictions, X_test, Y_test, args={"batch_size": 128}) text_file.write('Test accuracy on legitimate test examples: {0}\n'.format( str(accuracy))) #print('Test accuracy on legitimate test examples: ' + str(accuracy)) # Craft adversarial examples depending on the input parameters wrap = KerasModelWrapper(loaded_model) # FGSM if fgsm: fgsm = FastGradientMethod(wrap, sess=sess) fgsm_params = {'eps': 0.3} adv_x = fgsm.generate(x, **fgsm_params) adv_x = tf.stop_gradient(adv_x) preds_adv = loaded_model(adv_x) # Evaluate the accuracy of the CIFAR-10 model on adversarial examples accuracy = model_eval(sess, x, y, preds_adv, X_test, Y_test, args={"batch_size": 128}) text_file.write( 'Test accuracy on fgsm adversarial test examples: {0}\n'.format( str(accuracy))) #print('Test accuracy on fgsm adversarial test examples: ' + str(accuracy)) # JSMA if jsma: jsma = SaliencyMapMethod(wrap, sess=sess) jsma_params = { 'theta': 2., 'gamma': 0.145, 'clip_min': 0., 'clip_max': 1., 'y_target': None } adv_x = jsma.generate(x, **jsma_params) adv_x = tf.stop_gradient(adv_x) preds_adv = loaded_model(adv_x) # Evaluate the accuracy of the CIFAR-10 model on adversarial examples accuracy = model_eval(sess, x, y, preds_adv, X_test, Y_test, args={"batch_size": 128}) text_file.write( 'Test accuracy on jsma adversarial test examples: {0}\n'.format( str(accuracy))) #print('Test accuracy on jsma adversarial test examples: ' + str(accuracy)) # DeepFool if df: df = DeepFool(wrap, sess=sess) df_params = {'nb_candidate': 10, 'max_iter': 50} adv_x = df.generate(x, **df_params) adv_x = tf.stop_gradient(adv_x) preds_adv = loaded_model(adv_x) # Evaluate the accuracy of the CIFAR-10 model on adversarial examples accuracy = model_eval(sess, x, y, preds_adv, X_test, Y_test, args={"batch_size": 128}) text_file.write( 'Test accuracy on df adversarial test examples: {0}\n'.format( str(accuracy))) #print('Test accuracy on df adversarial test examples: ' + str(accuracy)) # Basic Iterative Method # Commented out as it is hanging on batch #0 at the moment ''' if bim: bim = ProjectedGradientDescent(wrap, sess=sess) bim_params = {'eps': 0.3} adv_x = bim.generate(x, **bim_params) adv_x = tf.stop_gradient(adv_x) preds_adv = loaded_model(adv_x) # Evaluate the accuracy of the CIFAR-10 model on adversarial examples accuracy = model_eval(sess, x, y, preds_adv, X_test, Y_test, args={ "batch_size" : 128}) text_file.write('Test accuracy on bim adversarial test examples: {0}\n'.format(str(accuracy))) #print('Test accuracy on bim adversarial test examples: ' + str(accuracy)) ''' print('Accuracy results outputted to cifar10_results.txt') text_file.close() # Close TF session sess.close()
# Define the update func loss = w * losses optimizer = tf.train.AdamOptimizer(learning_rate=args.lr) train_step = optimizer.minimize(loss) # Test acc on legit data logits = wrap.get_logits(x[0]) acc, acc_op = tf.metrics.accuracy( labels=tf.argmax(y, 1), predictions=tf.argmax(logits, 1)) # Define adv attack deepfool = DeepFool(wrap, sess=sess) deepfool_params = {'eps': args.noise_eps, 'clip_min': 0., 'clip_max': 1.} # Attack images x_deepfool = deepfool.generate(x[0], **deepfool_params) # Consider the attack to be constant x_deepfool = tf.stop_gradient(x_deepfool) # Evaluate predictions on adv attacks preds_deepfool = model(x_deepfool) acc_deepfool, acc_op_deepfool = tf.metrics.accuracy( labels=tf.argmax(y, 1), predictions=tf.argmax(preds_deepfool, 1)) # Define adv attack momentum_iterative = MomentumIterativeMethod(wrap, sess=sess) momentum_iterative_params = { 'eps': args.noise_eps, 'clip_min': 0., 'clip_max': 1. }
def baseline_deepfool(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=6, batch_size=128, learning_rate=0.001, clean_train=True, testing=False, backprop_through_attack=False, nb_filters=64): """ MNIST cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param clean_train: if true, train on clean examples :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session sess = tf.Session() # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) # Use label smoothing assert Y_train.shape[1] == 10 label_smooth = .1 Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) y = tf.placeholder(tf.float32, shape=(None, 10)) model_path = "models/mnist" # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } deepfool_params = { 'nb_candidate': 10, 'overshoot': 0.02, 'max_iter': 50, 'clip_min': 0., 'clip_max': 1. } rng = np.random.RandomState([2017, 8, 30]) if clean_train: model = make_basic_cnn(nb_filters=nb_filters) preds = model.get_probs(x) def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test # examples eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params) report.clean_train_clean_eval = acc assert X_test.shape[0] == test_end - test_start, X_test.shape print('Test accuracy on legitimate examples: %0.4f' % acc) # # HERE already trained model, thus we need a new one (model_2) model_train(sess, x, y, preds, X_train, Y_train, evaluate=evaluate, args=train_params, rng=rng) # Calculate training error if testing: eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, X_train, Y_train, args=eval_params) report.train_clean_train_clean_eval = acc # Initialize the Fast Gradient Sign Method (FGSM) attack object and # graph deepfool = DeepFool(model, sess=sess) adv_x = deepfool.generate(x, **deepfool_params) preds_adv = model.get_probs(adv_x) # Evaluate the accuracy of the MNIST model on adversarial examples eval_par = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print('Test accuracy on DeepFool adversarial examples: %0.4f\n' % acc) report.clean_train_adv_eval = acc # Calculate training error if testing: eval_par = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_adv, X_train, Y_train, args=eval_par) report.train_clean_train_adv_eval = acc print("Repeating the process, using adversarial training") # Redefine TF model graph model_2 = make_basic_cnn(nb_filters=nb_filters) preds_2 = model_2(x) deepfool2 = DeepFool(model_2, sess=sess) adv_x_2 = deepfool2.generate(x, **deepfool_params) if not backprop_through_attack: # For the fgsm attack used in this tutorial, the attack has zero # gradient so enabling this flag does not change the gradient. # For some other attacks, enabling this flag increases the cost of # training, but gives the defender the ability to anticipate how # the atacker will change their strategy in response to updates to # the defender's parameters. adv_x_2 = tf.stop_gradient(adv_x_2) preds_2_adv = model_2(adv_x_2) # # let's generate DeepFool examples # # let's generate FGSM examples # fgsm = FastGradientMethod(model_2, sess=sess) fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} adv_x_fgsm = fgsm.generate(x, **fgsm_params) if not backprop_through_attack: # For the fgsm attack used in this tutorial, the attack has zero # gradient so enabling this flag does not change the gradient. # For some other attacks, enabling this flag increases the cost of # training, but gives the defender the ability to anticipate how # the atacker will change their strategy in response to updates to # the defender's parameters. adv_x_fgsm = tf.stop_gradient(adv_x_fgsm) preds_2_fgsm = model_2(adv_x_fgsm) # DON'T WANT TO TRAIN on FGSM adv examples yet def evaluate_2(): # Accuracy of adversarially trained model on legitimate test inputs eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds_2, X_test, Y_test, args=eval_params) print('Test accuracy on legitimate examples: %0.4f' % accuracy) report.adv_train_clean_eval = accuracy # Accuracy of the adversarially trained model on FGSM adversarial examples accuracy = model_eval(sess, x, y, preds_2_fgsm, X_test, Y_test, args=eval_params) print('Test accuracy on FGSM adversarial examples: %0.4f' % accuracy) report.adv_train_adv_eval = accuracy # Accuracy of the DeepFool adv trained model on DeepFool examples eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds_2_adv, X_test, Y_test, args=eval_params) print('Test accuracy on DeepFool adversarial examples: %0.4f' % accuracy) # Perform and evaluate adversarial training model_train(sess, x, y, preds_2, X_train, Y_train, predictions_adv=preds_2_adv, evaluate=evaluate_2, args=train_params, rng=rng) # Calculate training errors if testing: eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds_2, X_train, Y_train, args=eval_params) report.train_adv_train_clean_eval = accuracy accuracy = model_eval(sess, x, y, preds_2_adv, X_train, Y_train, args=eval_params) report.train_adv_train_adv_eval = accuracy return report
def train(alpha, eps2_ratio, gen_ratio, fgsm_eps, LR, logfile): logfile.write("fgsm_eps \t %g, LR \t %g, alpha \t %d , eps2_ratio \t %d , gen_ratio \t %d \n"%(fgsm_eps, LR, alpha, eps2_ratio, gen_ratio)) ############################# ##Hyper-parameter Setting#### ############################# hk = 256; #number of hidden units at the last layer Delta2 = (14*14+2)*25; #global sensitivity for the first hidden layer Delta3_adv = 2*hk #10*(hk + 1/4 * hk**2) #10*(hk) #global sensitivity for the output layer Delta3_benign = 2*hk #10*(hk); #global sensitivity for the output layer D = 50000; #size of the dataset L = 2499; #batch size image_size = 28; padding = 4; #numHidUnits = 14*14*32 + 7*7*64 + M + 10; #number of hidden units #gen_ratio = 1 epsilon1 = 0.0; #0.175; #epsilon for dpLRP epsilon2 = 0.1*(1 + gen_ratio); #epsilon for the first hidden layer epsilon3 = 0.1*(1); #epsilon for the last hidden layer total_eps = epsilon1 + epsilon2 + epsilon3 print(total_eps) uncert = 0.1; #uncertainty modeling at the output layer infl = 1; #inflation rate in the privacy budget redistribution R_lowerbound = 1e-5; #lower bound of the LRP c = [0, 40, 50, 200] #norm bounds epochs = 200; #number of epochs preT_epochs = 50; #number of epochs T = int(D/L*epochs + 1); #number of steps T pre_T = int(D/L*preT_epochs + 1); step_for_epoch = int(D/L); #number of steps for one epoch broken_ratio = 1 #alpha = 9.0 # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] #eps2_ratio = 10; # [1/10, 1/8, 1/6, 1/4, 1/2, 1, 2, 4, 6, 8, 10] #eps_benign = 1/(1+eps2_ratio)*(2*epsilon2) #eps_adv = eps2_ratio/(1+eps2_ratio)*(2*epsilon2) #fgsm_eps = 0.1 rand_alpha = 0.05 ##Robustness## robustness_T = (fgsm_eps*18*18*L*epsilon2)/Delta2; #### LRPfile = os.getcwd() + '/Relevance_R_0_075.txt'; ############################# mnist = input_data.read_data_sets("MNIST_data/", one_hot = True); ############################# ##Construct the Model######## ############################# #Step 4: Randomly initiate the noise, Compute 1/|L| * Delta3 for the output layer# #Compute the 1/|L| * Delta3 for the last hidden layer# """eps3_ratio = Delta3_adv/Delta3_benign; eps3_benign = 1/(1+eps3_ratio)*(epsilon3) eps3_adv = eps3_ratio/(1+eps3_ratio)*(epsilon3)""" loc, scale3_benign, scale3_adv = 0., Delta3_benign/(epsilon3*L), Delta3_adv/(epsilon3*L); ### #End Step 4# # Parameters Declarification W_conv1 = weight_variable('W_conv1', [5, 5, 1, 32], collect=[AECODER_VARIABLES]); b_conv1 = bias_variable('b_conv1', [32], collect=[AECODER_VARIABLES]); shape = W_conv1.get_shape().as_list() w_t = tf.reshape(W_conv1, [-1, shape[-1]]) w = tf.transpose(w_t) sing_vals = tf.svd(w, compute_uv=False) sensitivity = tf.reduce_max(sing_vals) gamma = 2*(14*14 + 2)*25/(L*sensitivity) dp_epsilon=1.0 #0.1 delta_r = fgsm_eps*(image_size**2); #delta_h = 1.0 * delta_r; #sensitivity*(14**2) = sensitivity*(\beta**2) can also be used #dp_mult = (Delta2/(L*epsilon2))/(delta_r / dp_epsilon) + (2*Delta2/(L*epsilon2))/(delta_h / dp_epsilon) W_conv2 = weight_variable('W_conv2', [5, 5, 32, 64], collect=[CONV_VARIABLES]); b_conv2 = bias_variable('b_conv2', [64], collect=[CONV_VARIABLES]); W_fc1 = weight_variable('W_fc1', [4 * 4 * 64, hk], collect=[CONV_VARIABLES]); b_fc1 = bias_variable('b_fc1', [hk], collect=[CONV_VARIABLES]); W_fc2 = weight_variable('W_fc2', [hk, 10], collect=[CONV_VARIABLES]); b_fc2 = bias_variable('b_fc2', [10], collect=[CONV_VARIABLES]); """scale2 = tf.Variable(tf.ones([hk])) beta2 = tf.Variable(tf.zeros([hk])) tf.add_to_collections([CONV_VARIABLES], scale2) tf.add_to_collections([CONV_VARIABLES], beta2)""" params = [W_conv1, b_conv1, W_conv2, b_conv2, W_fc1, b_fc1, W_fc2, b_fc2] ### #Step 5: Create the model# noise = tf.placeholder(tf.float32, [None, image_size, image_size, 1]); adv_noise = tf.placeholder(tf.float32, [None, image_size, image_size, 1]); keep_prob = tf.placeholder(tf.float32); x = tf.placeholder(tf.float32, [None, image_size*image_size]); x_image = tf.reshape(x, [-1,image_size,image_size,1]); #perturbFMx = np.random.laplace(0.0, Delta2/(2*epsilon2*L), 28*28) #perturbFMx = np.reshape(perturbFMx, [-1, 28, 28, 1]); # pretrain ### #Enc_Layer1 = EncLayer(inpt=x_image, n_filter_in = 1, n_filter_out = 32, filter_size = 5, W=W_conv1, b=b_conv1, activation=tf.nn.relu) #pretrain = Enc_Layer1.get_train_ops2(xShape = tf.shape(x_image)[0], Delta = Delta2, epsilon = 2*epsilon2, batch_size = L, learning_rate= LR, W = W_conv1, b = b_conv1, perturbFMx = noise) ########### adv_x = tf.placeholder(tf.float32, [None, image_size*image_size]); adv_image = tf.reshape(adv_x, [-1,image_size,image_size,1]); #perturbFMx_adv = np.random.laplace(0.0, Delta2/(2*epsilon2*L), 28*28) #perturbFMx_adv = np.reshape(perturbFMx_adv, [-1, 28, 28, 1]); # pretrain adv ### #perturbFM_h = np.random.laplace(0.0, 2*Delta2/(epsilon2*L), 14*14*32) #perturbFM_h = np.reshape(perturbFM_h, [-1, 14, 14, 32]); FM_h = tf.placeholder(tf.float32, [None, 14, 14, 32]); Enc_Layer2 = EncLayer(inpt=adv_image, n_filter_in = 1, n_filter_out = 32, filter_size = 5, W=W_conv1, b=b_conv1, activation=tf.nn.relu) pretrain_adv = Enc_Layer2.get_train_ops2(xShape = tf.shape(adv_image)[0], Delta = Delta2, batch_size = L, learning_rate= LR, W = W_conv1, b = b_conv1, perturbFMx = adv_noise, perturbFM_h = FM_h) Enc_Layer3 = EncLayer(inpt=x_image, n_filter_in = 1, n_filter_out = 32, filter_size = 5, W=W_conv1, b=b_conv1, activation=tf.nn.relu) pretrain_benign = Enc_Layer3.get_train_ops2(xShape = tf.shape(x_image)[0], Delta = Delta2, batch_size = L, learning_rate= LR, W = W_conv1, b = b_conv1, perturbFMx = noise, perturbFM_h = FM_h) ########### x_image += noise; x_image = tf.clip_by_value(x_image, -10, 10) #Clip the values of each input feature. adv_image += adv_noise; adv_image = tf.clip_by_value(adv_image, -10, 10) #Clip the values of each input feature. #perturbFM = np.random.laplace(0.0, scale3_benign, hk) #perturbFM = np.reshape(perturbFM, [hk]); perturbFM = np.random.laplace(0.0, scale3_benign, hk * 10) perturbFM = np.reshape(perturbFM, [hk, 10]); y_conv = inference(x_image, perturbFM, hk, FM_h, params); softmax_y_conv = tf.nn.softmax(y_conv) #robust_mask = inference_robust_mask(y_conv, Delta2, L, epsilon2, robustness_T) #perturbFM = np.random.laplace(0.0, scale3_adv, hk) #perturbFM = np.reshape(perturbFM, [hk]); y_adv_conv = inference(adv_image, perturbFM, hk, FM_h, params); #adv_robust_mask = inference_robust_mask(y_adv_conv, Delta2, L, epsilon2, robustness_T) # test model perturbFM_test = np.random.laplace(0.0, 0, hk) perturbFM_test = np.reshape(perturbFM_test, [hk]); x_test = tf.reshape(x, [-1,image_size,image_size,1]); y_test = inference(x_test, perturbFM_test, hk, FM_h, params); #test_robust_mask = inference_robust_mask(y_test, Delta2, L, epsilon2, robustness_T) #Define a place holder for the output label# y_ = tf.placeholder(tf.float32, [None, 10]); adv_y_ = tf.placeholder(tf.float32, [None, 10]); #End Step 5# ############################# ############################# ##Define loss and Optimizer## ############################# ''' Computes differentially private sigmoid cross entropy given `logits`. Measures the probability error in discrete classification tasks in which each class is independent and not mutually exclusive. For brevity, let `x = logits`, `z = labels`. The logistic loss is z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x)) = z * -log(1 / (1 + exp(-x))) + (1 - z) * -log(exp(-x) / (1 + exp(-x))) = z * log(1 + exp(-x)) + (1 - z) * (-log(exp(-x)) + log(1 + exp(-x))) = z * log(1 + exp(-x)) + (1 - z) * (x + log(1 + exp(-x)) = (1 - z) * x + log(1 + exp(-x)) = x - x * z + log(1 + exp(-x)) For x < 0, to avoid overflow in exp(-x), we reformulate the above x - x * z + log(1 + exp(-x)) = log(exp(x)) - x * z + log(1 + exp(-x)) = - x * z + log(1 + exp(x)) Hence, to ensure stability and avoid overflow, the implementation uses this equivalent formulation max(x, 0) - x * z + log(1 + exp(-abs(x))) `logits` and `labels` must have the same type and shape. Let denote neg_abs_logits = -abs(y_conv) = -abs(h_fc1 * W_fc2). By Applying Taylor Expansion, we have: Taylor = max(y_conv, 0) - y_conv * y_ + log(1 + exp(-abs(y_conv))); = max(h_fc1 * W_fc2, 0) - (y_ * h_fc1) * W_fc2 + (math.log(2.0) + 0.5*neg_abs_logits + 1.0/8.0*neg_abs_logits**2) = max(h_fc1 * W_fc2, 0) - (y_ * h_fc1) * W_fc2 + (math.log(2.0) + 0.5*(-abs(h_fc1 * W_fc2)) + 1.0/8.0*(-abs(h_fc1 * W_fc2))**2) = F1 + F2 where: F1 = max(h_fc1 * W_fc2, 0) + (math.log(2.0) + 0.5*(-abs(h_fc1 * W_fc2)) + 1.0/8.0*(-abs(h_fc1 * W_fc2))**2) and F2 = - (y_ * h_fc1) * W_fc2 To ensure that Taylor is differentially private, we need to perturb all the coefficients, including the term y_ * h_fc1 * W_fc2. Note that h_fc1 is differentially private, since its computation on top of the DP Affine transformation does not access the original data. Therefore, F1 should be differentially private. We need to preserve DP in F2, which reads the groundtruth label y_, as follows: By applying Funtional Mechanism, we perturb (y_ * h_fc1) * W_fc2 as ((y_ * h_fc1) + perturbFM) * W_fc2 = (y_ * h_fc1)*W_fc2 + (perturbFM * W_fc2): perturbFM = np.random.laplace(0.0, scale3, hk * 10) perturbFM = np.reshape(perturbFM/L, [hk, 10]); where scale3 = Delta3/(epsilon3) = 2*hk/(epsilon3); To allow computing gradients at zero, we define custom versions of max and abs functions [Tensorflow]. Source: https://github.com/tensorflow/tensorflow/blob/r1.4/tensorflow/python/ops/nn_impl.py @ TensorFlow ''' ### Taylor for benign x zeros = array_ops.zeros_like(y_conv, dtype=y_conv.dtype) cond = (y_conv >= zeros) relu_logits = array_ops.where(cond, y_conv, zeros) neg_abs_logits = array_ops.where(cond, -y_conv, y_conv) #Taylor = math_ops.add(relu_logits - y_conv * y_, math_ops.log1p(math_ops.exp(neg_abs_logits))) Taylor_benign = math_ops.add(relu_logits - y_conv * y_, math.log(2.0) + 0.5*neg_abs_logits + 1.0/8.0*neg_abs_logits**2) - tf.reduce_sum(perturbFM*W_fc2) #Taylor_benign = tf.abs(y_conv - y_) ### Taylor for adv_x zeros_adv = array_ops.zeros_like(y_adv_conv, dtype=y_conv.dtype) cond_adv = (y_adv_conv >= zeros_adv) relu_logits_adv = array_ops.where(cond_adv, y_adv_conv, zeros_adv) neg_abs_logits_adv = array_ops.where(cond_adv, -y_adv_conv, y_adv_conv) #Taylor = math_ops.add(relu_logits - y_conv * y_, math_ops.log1p(math_ops.exp(neg_abs_logits))) Taylor_adv = math_ops.add(relu_logits_adv - y_adv_conv * adv_y_, math.log(2.0) + 0.5*neg_abs_logits_adv + 1.0/8.0*neg_abs_logits_adv**2) - tf.reduce_sum(perturbFM*W_fc2) #Taylor_adv = tf.abs(y_adv_conv - adv_y_) ### Adversarial training loss adv_loss = (1/(L + L*alpha))*(Taylor_benign + alpha * Taylor_adv) '''Some time, using learning rate decay can help to stablize training process. However, use this carefully, since it may affect the convergent speed.''' global_step = tf.Variable(0, trainable=False) pretrain_var_list = tf.get_collection(AECODER_VARIABLES) train_var_list = tf.get_collection(CONV_VARIABLES) #print(pretrain_var_list) #print(train_var_list) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): pretrain_step = tf.train.AdamOptimizer(LR).minimize(pretrain_adv+pretrain_benign, global_step=global_step, var_list=pretrain_var_list); train_step = tf.train.AdamOptimizer(LR).minimize(adv_loss, global_step=global_step, var_list=train_var_list); sess = tf.InteractiveSession(); # Define the correct prediction and accuracy # This needs to be changed to "Robust Prediction" correct_prediction_x = tf.equal(tf.argmax(y_test,1), tf.argmax(y_,1)); accuracy_x = tf.reduce_mean(tf.cast(correct_prediction_x, tf.float32)); ############# # use these to get predictions wrt to robust conditions """robust_correct_prediction_x = tf.multiply(test_robust_mask, tf.cast(correct_prediction_x, tf.float32)) accuracy_x_robust = tf.reduce_sum(robust_correct_prediction_x) / tf.reduce_sum(test_robust_mask) #certified_utility = 2/(1/accuracy_x_robust + 1/(tf.reduce_sum(test_robust_mask)/(1.0*tf.cast(tf.size(test_robust_mask), tf.float32)))) certified_utility = (1.0*tf.reduce_sum(test_robust_mask))/(1.0*tf.cast(tf.size(test_robust_mask), tf.float32))""" ############# # craft adversarial samples from x for training dynamic_eps = tf.placeholder(tf.float32); emsemble_L = int(L/3) softmax_y = tf.nn.softmax(y_test) #c_x_adv = fgsm(x, softmax_y, eps=fgsm_eps, clip_min=0.0, clip_max=1.0) c_x_adv = fgsm(x, softmax_y, eps=(dynamic_eps)/10, clip_min=-1.0, clip_max=1.0) # for I-FGSM x_adv = tf.reshape(c_x_adv, [emsemble_L,image_size*image_size]); #====================== attack ========================= #attack_switch = {'randfgsm':True, 'fgsm':True, 'ifgsm':True, 'deepfool':True, 'mim':True, 'spsa':False, 'cwl2':False, 'madry':True, 'stm':True} #attack_switch = {'fgsm':True, 'ifgsm':True, 'deepfool':True, 'mim':True, 'spsa':False, 'cwl2':False, 'madry':True, 'stm':True} attack_switch = {'fgsm':True, 'ifgsm':True, 'deepfool':False, 'mim':True, 'spsa':False, 'cwl2':False, 'madry':True, 'stm':False} #other possible attacks: # ElasticNetMethod # FastFeatureAdversaries # LBFGS # SaliencyMapMethod # VirtualAdversarialMethod # y_test = logits (before softmax) # softmax_y_test = preds (probs, after softmax) softmax_y_test = tf.nn.softmax(y_test) # create saver saver = tf.train.Saver(tf.all_variables()) sess.run(W_conv1.initializer) _gamma = sess.run(gamma) _gamma_x = Delta2/L epsilon2_update = epsilon2/(1.0 + 1.0/_gamma + 1/_gamma_x) print(epsilon2_update/_gamma + epsilon2_update/_gamma_x) print(epsilon2_update) _sensitivityW = sess.run(sensitivity) delta_h = _sensitivityW*(14**2) dp_mult = (Delta2/(L*epsilon2_update))/(delta_r / dp_epsilon) + (2*Delta2/(L*epsilon2_update))/(delta_h / dp_epsilon) ############################# iterativeStep = 100 # load the most recent models _global_step = 0 ckpt = tf.train.get_checkpoint_state(os.getcwd() + './tmp/train') if ckpt and ckpt.model_checkpoint_path: print(ckpt.model_checkpoint_path); saver.restore(sess, ckpt.model_checkpoint_path) _global_step = int(ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]) else: print('No checkpoint file found') start_time = time.time(); # adv pretrain model (Auto encoder layer) cost = tf.reduce_sum(Enc_Layer2.cost); logfile.write("pretrain: \n") # define cleverhans abstract models for using cleverhans attacks ch_model_logits = CustomCallableModelWrapper(callable_fn=inference_test_input, output_layer='logits', hk=hk, params=params, image_size=image_size, adv_noise = adv_noise) ch_model_probs = CustomCallableModelWrapper(callable_fn=inference_test_input_probs, output_layer='probs', hk=hk, params=params, image_size=image_size, adv_noise = adv_noise) # rand+fgsm # if attack_switch['randfgsm']: # randfgsm_obj = FastGradientMethod(model=ch_model_probs, sess=sess) # x_randfgsm_t = (fgsm_eps - rand_alpha) * randfgsm_obj.generate(x=x, eps=fgsm_eps, clip_min=-1.0, clip_max=1.0) # x_rand_t = rand_alpha * tf.sign(tf.random_normal(shape=tf.shape(x), mean=0.0, stddev=1.0)) # define each attack method's tensor mu_alpha = tf.placeholder(tf.float32, [1]); attack_tensor_dict = {} # FastGradientMethod if attack_switch['fgsm']: print('creating attack tensor of FastGradientMethod') fgsm_obj = FastGradientMethod(model=ch_model_probs, sess=sess) #x_adv_test_fgsm = fgsm_obj.generate(x=x, eps=fgsm_eps, clip_min=-1.0, clip_max=1.0, ord=2) # testing now x_adv_test_fgsm = fgsm_obj.generate(x=x, eps=mu_alpha, clip_min=-1.0, clip_max=1.0) # testing now attack_tensor_dict['fgsm'] = x_adv_test_fgsm # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init) # default: eps_iter=0.05, nb_iter=10 if attack_switch['ifgsm']: print('creating attack tensor of BasicIterativeMethod') ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess) #x_adv_test_ifgsm = ifgsm_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_ifgsm = ifgsm_obj.generate(x=x, eps=mu_alpha, eps_iter=mu_alpha/iterativeStep, nb_iter=iterativeStep, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['ifgsm'] = x_adv_test_ifgsm # Deepfool if attack_switch['deepfool']: print('creating attack tensor of DeepFool') deepfool_obj = DeepFool(model=ch_model_logits, sess=sess) #x_adv_test_deepfool = deepfool_obj.generate(x=x, nb_candidate=10, overshoot=0.02, max_iter=50, nb_classes=10, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_deepfool = deepfool_obj.generate(x=x, nb_candidate=10, overshoot=0.02, max_iter=50, nb_classes=10, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['deepfool'] = x_adv_test_deepfool # MomentumIterativeMethod # default: eps_iter=0.06, nb_iter=10 if attack_switch['mim']: print('creating attack tensor of MomentumIterativeMethod') mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess) #x_adv_test_mim = mim_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, decay_factor=1.0, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_mim = mim_obj.generate(x=x, eps=mu_alpha, eps_iter=mu_alpha/iterativeStep, nb_iter=iterativeStep, decay_factor=1.0, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['mim'] = x_adv_test_mim # SPSA # note here the epsilon is the infinity norm instead of precent of perturb # Maybe exclude this method first, since it seems to have some constrain about the data value range if attack_switch['spsa']: print('creating attack tensor of SPSA') spsa_obj = SPSA(model=ch_model_logits, sess=sess) #x_adv_test_spsa = spsa_obj.generate(x=x, epsilon=fgsm_eps, num_steps=10, is_targeted=False, early_stop_loss_threshold=None, learning_rate=0.01, delta=0.01,spsa_samples=1000, spsa_iters=1, ord=2) x_adv_test_spsa = spsa_obj.generate(x=x, epsilon=fgsm_eps, num_steps=10, is_targeted=False, early_stop_loss_threshold=None, learning_rate=0.01, delta=0.01,spsa_samples=1000, spsa_iters=1) attack_tensor_dict['spsa'] = x_adv_test_spsa # CarliniWagnerL2 # confidence=0 is fron their paper # it is said to be slow, maybe exclude first if attack_switch['cwl2']: print('creating attack tensor of CarliniWagnerL2') cwl2_obj = CarliniWagnerL2(model=ch_model_logits, sess=sess) #x_adv_test_cwl2 = cwl2_obj.generate(x=x, confidence=0, batch_size=1000, learning_rate=0.005, binary_search_steps=5, max_iterations=500, abort_early=True, initial_const=0.01, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_cwl2 = cwl2_obj.generate(x=x, confidence=0, batch_size=1000, learning_rate=0.005, binary_search_steps=5, max_iterations=500, abort_early=True, initial_const=0.01, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['cwl2'] = x_adv_test_cwl2 # MadryEtAl (Projected Grdient with random init, same as rand+fgsm) # default: eps_iter=0.01, nb_iter=40 if attack_switch['madry']: print('creating attack tensor of MadryEtAl') madry_obj = MadryEtAl(model=ch_model_probs, sess=sess) #x_adv_test_madry = madry_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_madry = madry_obj.generate(x=x, eps=mu_alpha, eps_iter=fgsm_eps/iterativeStep, nb_iter=iterativeStep, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['madry'] = x_adv_test_madry # SpatialTransformationMethod # the params are pretty different from on the paper # so I use default # exclude since there's bug if attack_switch['stm']: print('creating attack tensor of SpatialTransformationMethod') stm_obj = SpatialTransformationMethod(model=ch_model_probs, sess=sess) #x_adv_test_stm = stm_obj.generate(x=x, batch_size=1000, n_samples=None, dx_min=-0.1, dx_max=0.1, n_dxs=2, dy_min=-0.1, dy_max=0.1, n_dys=2, angle_min=-30, angle_max=30, n_angles=6, ord=2) x_adv_test_stm = stm_obj.generate(x=x, batch_size=1000, n_samples=None, dx_min=-0.1, dx_max=0.1, n_dxs=2, dy_min=-0.1, dy_max=0.1, n_dys=2, angle_min=-30, angle_max=30, n_angles=6) attack_tensor_dict['stm'] = x_adv_test_stm #====================== attack ========================= sess.run(tf.initialize_all_variables()); ##perturb h for training perturbFM_h = np.random.laplace(0.0, 2*Delta2/(epsilon2_update*L), 14*14*32) perturbFM_h = np.reshape(perturbFM_h, [-1, 14, 14, 32]); ##perturb h for testing perturbFM_h_test = np.random.laplace(0.0, 0, 14*14*32) perturbFM_h_test = np.reshape(perturbFM_h_test, [-1, 14, 14, 32]); '''for i in range(_global_step, _global_step + pre_T): d_eps = random.random(); batch = mnist.train.next_batch(L); #Get a random batch. adv_images = sess.run(x_adv, feed_dict = {x:batch[0], y_:batch[1], FM_h: perturbFM_h_test, dynamic_eps: d_eps}) for iter in range(0, 9): adv_images = sess.run(x_adv, feed_dict = {x:adv_images, y_:batch[1], FM_h: perturbFM_h_test, dynamic_eps: d_eps}) """batch = mnist.train.next_batch(emsemble_L) adv_images_mim = sess.run(attack_tensor_dict['mim'], feed_dict = {x:batch[0], y_: batch[1]}) batch = mnist.train.next_batch(emsemble_L) adv_images_madry = sess.run(attack_tensor_dict['mim'], feed_dict = {x:batch[0], y_: batch[1]}) train_images = np.append(np.append(adv_images, adv_images_mim, axis = 0),adv_images_madry, axis = 0)""" batch_2 = mnist.train.next_batch(L); pretrain_step.run(feed_dict={adv_x: np.append(adv_images, batch_2[0], axis = 0), adv_noise: AdvLnoise, FM_h: perturbFM_h}); if i % int(5*step_for_epoch) == 0: cost_value = sess.run(cost, feed_dict={adv_x:mnist.test.images, adv_noise: AdvLnoise_test, FM_h: perturbFM_h_test})/(test_size*32) logfile.write("step \t %d \t %g \n"%(i, cost_value)) print(cost_value) pre_train_finish_time = time.time() print('pre_train finished in: ' + parse_time(pre_train_finish_time - start_time))''' # train and test model with adv samples max_benign_acc = -1; max_robust_benign_acc = -1 #max_adv_acc = -1; test_size = len(mnist.test.images) AdvLnoise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L); AdvLnoise_test = generateIdLMNoise(image_size, 0, epsilon2_update, test_size); Lnoise_empty = generateIdLMNoise(image_size, 0, epsilon2_update, L); BenignLNoise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L); last_eval_time = -1 accum_time = 0 accum_epoch = 0 max_adv_acc_dict = {} max_robust_adv_acc_dict = {} #max_robust_adv_utility_dict = {} for atk in attack_switch.keys(): if atk not in max_adv_acc_dict: max_adv_acc_dict[atk] = -1 max_robust_adv_acc_dict[atk] = -1 for i in range(_global_step, _global_step + T): # this batch is for generating adv samples batch = mnist.train.next_batch(emsemble_L); #Get a random batch. y_adv_batch = batch[1] #The number of epochs we print out the result. Print out the result every 5 epochs. if i % int(10*step_for_epoch) == 0 and i > int(10*step_for_epoch): cost_value = sess.run(cost, feed_dict={adv_x:mnist.test.images, adv_noise: AdvLnoise_test, FM_h: perturbFM_h_test})/(test_size*32) print(cost_value) if last_eval_time < 0: last_eval_time = time.time() #===================benign samples===================== predictions_form_argmax = np.zeros([test_size, 10]) #test_bach = mnist.test.next_batch(test_size) softmax_predictions = softmax_y_conv.eval(feed_dict={x: mnist.test.images, noise: BenignLNoise, FM_h: perturbFM_h}) argmax_predictions = np.argmax(softmax_predictions, axis=1) for n_draws in range(0, 1): _BenignLNoise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L); _perturbFM_h = np.random.laplace(0.0, 2*Delta2/(epsilon2_update*L), 14*14*32) _perturbFM_h = np.reshape(_perturbFM_h, [-1, 14, 14, 32]); for j in range(test_size): pred = argmax_predictions[j] predictions_form_argmax[j, pred] += 1; softmax_predictions = softmax_y_conv.eval(feed_dict={x: mnist.test.images, noise: (BenignLNoise + _BenignLNoise/2), FM_h: (perturbFM_h + _perturbFM_h/2)}) argmax_predictions = np.argmax(softmax_predictions, axis=1) final_predictions = predictions_form_argmax; is_correct = [] is_robust = [] for j in range(test_size): is_correct.append(np.argmax(mnist.test.labels[j]) == np.argmax(final_predictions[j])) robustness_from_argmax = robustness.robustness_size_argmax(counts=predictions_form_argmax[j],eta=0.05,dp_attack_size=fgsm_eps, dp_epsilon=1.0, dp_delta=0.05, dp_mechanism='laplace') / (dp_mult) is_robust.append(robustness_from_argmax >= fgsm_eps) acc = np.sum(is_correct)*1.0/test_size robust_acc = np.sum([a and b for a,b in zip(is_robust, is_correct)])*1.0/np.sum(is_robust) robust_utility = np.sum(is_robust)*1.0/test_size max_benign_acc = max(max_benign_acc, acc) max_robust_benign_acc = max(max_robust_benign_acc, robust_acc*robust_utility) log_str = "step: {:.1f}\t epsilon: {:.1f}\t benign: {:.4f} \t {:.4f} \t {:.4f} \t {:.4f} \t".format(i, total_eps, acc, robust_acc, robust_utility, robust_acc*robust_utility) #===================adv samples===================== #log_str = "step: {:.1f}\t epsilon: {:.1f}\t".format(i, total_eps) """adv_images_dict = {} for atk in attack_switch.keys(): if attack_switch[atk]: adv_images_dict[atk] = sess.run(attack_tensor_dict[atk], feed_dict = {x:mnist.test.images, y_:mnist.test.labels}) print("Done with the generating of Adversarial samples")""" #===================adv samples===================== adv_acc_dict = {} robust_adv_acc_dict = {} robust_adv_utility_dict = {} for atk in attack_switch.keys(): if atk not in adv_acc_dict: adv_acc_dict[atk] = -1 robust_adv_acc_dict[atk] = -1 robust_adv_utility_dict[atk] = -1 if attack_switch[atk]: adv_images_dict = sess.run(attack_tensor_dict[atk], feed_dict = {x:mnist.test.images, y_: mnist.test.labels, adv_noise: AdvLnoise_test, mu_alpha:[fgsm_eps]}) ### PixelDP Robustness ### predictions_form_argmax = np.zeros([test_size, 10]) softmax_predictions = softmax_y_conv.eval(feed_dict={x: adv_images_dict, noise: BenignLNoise, FM_h: perturbFM_h}) argmax_predictions = np.argmax(softmax_predictions, axis=1) for n_draws in range(0, 2000): if n_draws % 1000 == 0: print(n_draws) _BenignLNoise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L); _perturbFM_h = np.random.laplace(0.0, 2*Delta2/(epsilon2_update*L), 14*14*32) _perturbFM_h = np.reshape(_perturbFM_h, [-1, 14, 14, 32]); for j in range(test_size): pred = argmax_predictions[j] predictions_form_argmax[j, pred] += 1; softmax_predictions = softmax_y_conv.eval(feed_dict={x: adv_images_dict, noise: BenignLNoise, FM_h: (perturbFM_h + _perturbFM_h/2)}) * softmax_y_conv.eval(feed_dict={x: adv_images_dict, noise: (BenignLNoise + _BenignLNoise/2), FM_h: perturbFM_h}) #softmax_predictions = softmax_y_conv.eval(feed_dict={x: adv_images_dict, noise: BenignLNoise, FM_h: (_perturbFM_h)}) * softmax_y_conv.eval(feed_dict={x: adv_images_dict, noise: (_BenignLNoise), FM_h: perturbFM_h}) argmax_predictions = np.argmax(softmax_predictions, axis=1) final_predictions = predictions_form_argmax; is_correct = [] is_robust = [] for j in range(test_size): is_correct.append(np.argmax(mnist.test.labels[j]) == np.argmax(final_predictions[j])) robustness_from_argmax = robustness.robustness_size_argmax(counts=predictions_form_argmax[j],eta=0.05,dp_attack_size=fgsm_eps, dp_epsilon=1.0, dp_delta=0.05, dp_mechanism='laplace') / (dp_mult) is_robust.append(robustness_from_argmax >= fgsm_eps) adv_acc_dict[atk] = np.sum(is_correct)*1.0/test_size robust_adv_acc_dict[atk] = np.sum([a and b for a,b in zip(is_robust, is_correct)])*1.0/np.sum(is_robust) robust_adv_utility_dict[atk] = np.sum(is_robust)*1.0/test_size ############################## for atk in attack_switch.keys(): if attack_switch[atk]: # added robust prediction log_str += " {}: {:.4f} {:.4f} {:.4f} {:.4f}".format(atk, adv_acc_dict[atk], robust_adv_acc_dict[atk], robust_adv_utility_dict[atk], robust_adv_acc_dict[atk]*robust_adv_utility_dict[atk]) max_adv_acc_dict[atk] = max(max_adv_acc_dict[atk], adv_acc_dict[atk]) max_robust_adv_acc_dict[atk] = max(max_robust_adv_acc_dict[atk], robust_adv_acc_dict[atk]*robust_adv_utility_dict[atk]) print(log_str) logfile.write(log_str + '\n') # logfile.write("step \t %d \t %g \t %g \n"%(i, benign_acc, adv_acc)) # print("step \t %d \t %g \t %g"%(i, benign_acc, adv_acc)); # estimate end time """if i > 0 and i % int(10*step_for_epoch) == 0: current_time_interval = time.time() - last_eval_time last_eval_time = time.time() print('during last eval interval, {} epoch takes {}'.format(10, parse_time(current_time_interval))) accum_time += current_time_interval accum_epoch += 10 estimate_time = ((_global_step + T - i) / step_for_epoch) * (accum_time / accum_epoch) print('estimate finish in: {}'.format(parse_time(estimate_time)))""" #print("step \t %d \t adversarial test accuracy \t %g"%(i, accuracy_x.eval(feed_dict={x: adv_images, y_: mnist.test.labels, noise: Lnoise_empty}))); """checkpoint_path = os.path.join(os.getcwd() + '/tmp/train', 'model.ckpt') saver.save(sess, checkpoint_path, global_step=i);""" d_eps = random.random(); y_adv = batch[1] adv_images = sess.run(attack_tensor_dict['ifgsm'], feed_dict = {x:batch[0], y_: batch[1], adv_noise: AdvLnoise, mu_alpha:[d_eps]}) """for iter in range(0, 9): adv_images = sess.run(x_adv, feed_dict = {x:adv_images, y_:batch[1], FM_h: perturbFM_h_test, dynamic_eps: d_eps})""" batch = mnist.train.next_batch(emsemble_L) adv_images_mim = sess.run(attack_tensor_dict['mim'], feed_dict = {x:batch[0], y_: batch[1], adv_noise: AdvLnoise, mu_alpha:[d_eps]}) y_adv = np.append(y_adv, batch[1], axis = 0) batch = mnist.train.next_batch(emsemble_L) adv_images_madry = sess.run(attack_tensor_dict['madry'], feed_dict = {x:batch[0], y_: batch[1], adv_noise: AdvLnoise, mu_alpha:[d_eps]}) y_adv = np.append(y_adv, batch[1], axis = 0) train_images = np.append(np.append(adv_images, adv_images_mim, axis = 0),adv_images_madry, axis = 0) batch = mnist.train.next_batch(L); #Get a random batch. # train with benign and adv samples pretrain_step.run(feed_dict={adv_x: train_images, x: batch[0], adv_noise: AdvLnoise_test, noise: BenignLNoise, FM_h: perturbFM_h}); train_step.run(feed_dict={x: batch[0], adv_x: train_images, y_: batch[1], adv_y_: y_adv, noise: BenignLNoise, adv_noise: AdvLnoise_test, FM_h: perturbFM_h}); duration = time.time() - start_time; # print(parse_time(duration)); #print running time duration# max_acc_string = "max acc: benign: \t{:.4f} {:.4f}".format(max_benign_acc, max_robust_benign_acc) for atk in attack_switch.keys(): if attack_switch[atk]: max_acc_string += " {}: \t{:.4f} {:.4f}".format(atk, max_adv_acc_dict[atk], max_robust_adv_acc_dict[atk]) logfile.write(max_acc_string + '\n') logfile.write(str(duration) + '\n')
def mnist_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=6, batch_size=128, learning_rate=0.001, clean_train=True, testing=False, backprop_through_attack=False, nb_filters=64, num_threads=None): """ MNIST cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param clean_train: if true, train on clean examples :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) # Use label smoothing assert Y_train.shape[1] == 10 label_smooth = .1 Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) y = tf.placeholder(tf.float32, shape=(None, 10)) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } dp_params = {'clip_min': 0., 'clip_max': 1.} rng = np.random.RandomState([2017, 8, 30]) if clean_train: model = make_basic_cnn(nb_filters=nb_filters) preds = model.get_probs(x) init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init) def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test # examples eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params) report.clean_train_clean_eval = acc assert X_test.shape[0] == test_end - test_start, X_test.shape print('Test accuracy on legitimate examples: %0.4f' % acc) model_train(sess, x, y, preds, X_train, Y_train, evaluate=evaluate, args=train_params, rng=rng) s = [] for i in range(0, len(X_test), 1): pred = sess.run(preds, {x: X_test[i:i + 1]}) print(pred) print(Y_test[i:i + 1]) s.append(np.sort(pred)[0, -1] - np.sort(pred)[0, -2]) #Draw a histogram def draw_hist(myList, Title, Xlabel, Ylabel): plt.hist(myList, np.arange(0, 1, 0.01), normed=True, stacked=True, facecolor='blue') plt.xlabel(Xlabel) plt.ylabel(Ylabel) plt.title(Title) plt.show() draw_hist(myList=s, Title='legitimate', Xlabel='difference between max and second largest', Ylabel='Probability') # Calculate training error if testing: eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, X_train, Y_train, args=eval_params) report.train_clean_train_clean_eval = acc # Initialize the deepfool attack object and # graph deepfool = DeepFool(model, back='tf', sess=sess) adv_x = deepfool.generate(x, **dp_params) preds_adv = model.get_probs(adv_x) # Evaluate the accuracy of the MNIST model on adversarial examples eval_par = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print('Test accuracy on adversarial examples: %0.4f\n' % acc) ''' s = [] for i in range(0,len(X_test),1): pred = sess.run(preds_adv, {x: X_test[i:i+1]}) print(pred) print(Y_test[i:i+1]) s.append(np.sort(pred)[0,-1]-np.sort(pred)[0,-2]) #Draw a histogram def draw_hist(myList,Title,Xlabel,Ylabel): plt.hist(myList,np.arange(0,1,0.01),normed=True,stacked=True,facecolor='red') plt.xlabel(Xlabel) plt.ylabel(Ylabel) plt.title(Title) plt.show() draw_hist(myList=s,Title='adversarial',Xlabel='difference between max and second largest', Ylabel='Probability') ''' report.clean_train_adv_eval = acc # Calculate training error if testing: eval_par = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_adv, X_train, Y_train, args=eval_par) report.train_clean_train_adv_eval = acc return report
def adv_generate(nb_epochs=25, batch_size=128, learning_rate=0.001, clean_train=True, testing=False, nb_filters=64, num_threads=None, data='cifar', adv_attack='fgsm', save_dir='data'): """ MNIST cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param clean_train: if true, train on clean examples :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information # set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} config = tf.ConfigProto(**config_args) config.gpu_options.allow_growth = True sess = tf.Session(config=config) if data == "mnist": # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist(train_start=0, train_end=60000, test_start=0, test_end=10000) else: X_train, Y_train, X_test, Y_test = data_cifar10() # print (Y_test.shape) ''' for i in range(Y_test.shape[0]): img = np.squeeze(X_test[i,:,:,:]) imsave(os.path.join("benign", str(i) + ".jpg"), img) for i in range(Y_test.shape[0]): img = np.squeeze(X_test[i,:,:,:]) benign_path = "benign_" + str(np.argmax(Y_test[i,:], axis=0)) if not os.path.exists(benign_path): os.makedirs(benign_path) imsave(os.path.join(benign_path, str(i) + ".jpg"), img) ''' # Use label smoothing assert Y_train.shape[1] == 10 label_smooth = .1 Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) # Define input TF placeholder if data == 'mnist': x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) else: x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3)) y = tf.placeholder(tf.float32, shape=(None, 10)) # model_path = "models/mnist" # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } rng = np.random.RandomState([2018, 7, 18]) if clean_train: if data == 'mnist': model = build_model(0.01, 1e-6) else: model = build_model_cifar(0.01, 1e-6) preds = model(x) def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test # examples eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params) report.clean_train_clean_eval = acc assert X_test.shape[0] == 10000, X_test.shape print('Test accuracy on legitimate examples: %0.4f' % acc) model_train(sess, x, y, preds, X_train, Y_train, evaluate=evaluate, args=train_params, rng=rng) # Calculate training error if testing: eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, X_train, Y_train, args=eval_params) report.train_clean_train_clean_eval = acc if adv_attack == "FGSM": # Initialize the attack object and graph # FGSM print "FGSM ATTACK..." fgsm_params = {'eps': 0.1, 'clip_min': 0., 'clip_max': 1.} fgsm = FastGradientMethod(model, sess=sess) adv_x = fgsm.generate(x, **fgsm_params) preds_adv = model(adv_x) elif adv_attack == "CWL2": # CWL2 print "CWL2 ATTACK..." cwl2_params = {'batch_size': 8} cwl2 = CarliniWagnerL2(model, sess=sess) adv_x = cwl2.generate(x, **cwl2_params) preds_adv = model(adv_x) elif adv_attack == "JSMA": # JSMA print "JSMA ATTACK..." jsma = SaliencyMapMethod(model, back='tf', sess=sess) jsma_params = { 'theta': 1., 'gamma': 0.1, 'clip_min': 0., 'clip_max': 1. } adv_x = jsma.generate(x, **jsma_params) preds_adv = model(adv_x) elif adv_attack == "DeepFool": # DeepFool print "DeepFool ATTACK..." deepfool = DeepFool(model, sess=sess) deepfool_params = { 'nb_candidate': 10, 'overshoot': 0.02, 'max_iter': 50, 'clip_min': 0.0, 'clip_max': 1.0 } adv_x = deepfool.generate(x, **deepfool_params) preds_adv = model(adv_x) elif adv_attack == "LBFGS": # LBFGS print "LBFGS ATTACK..." lbfgs_params = {'y_target': y, 'batch_size': 100} lbfgs = LBFGS(model, sess=sess) adv_x = lbfgs.generate(x, **lbfgs_params) preds_adv = model(adv_x) # Evaluate the accuracy of the MNIST model on adversarial examples eval_par = {'batch_size': batch_size} adv_imgs = [] adv_imgs_test = [] if not adv_attack == "LBFGS": for i in range(5000): adv_imgs_train, _ = sess.run( [adv_x, preds_adv], feed_dict={x: X_train[i * 10:(i + 1) * 10]}) adv_imgs.append(adv_imgs_train) adv_imgs = np.vstack(adv_imgs) print(adv_imgs.shape) for i in range(1000): adv_imgs_tmp, _ = sess.run( [adv_x, preds_adv], feed_dict={x: X_test[i * 10:(i + 1) * 10]}) adv_imgs_test.append(adv_imgs_tmp) adv_imgs_test = np.vstack(adv_imgs_test) else: for i in range(500): target = np_utils.to_categorical( (np.argmax(Y_train[i * 100:(i + 1) * 100], axis=1) + 1) % 10, 10) adv_imgs_train, _ = sess.run([adv_x, preds_adv], feed_dict={ x: X_train[i * 100:(i + 1) * 100], y: target }) print('train image: %s' % str(i)) adv_imgs.append(adv_imgs_train) print(adv_imgs.shape) for i in range(100): target = np_utils.to_categorical( (np.argmax(Y_train[i * 100:(i + 1) * 100], axis=1) + 1) % 10, 10) adv_imgs_train, _ = sess.run([adv_x, preds_adv], feed_dict={ x: X_train[i * 100:(i + 1) * 100], y: target }) adv_imgs_test.append(adv_imgs_tmp) print('test image: %s' % str(i)) adv_imgs_test = np.vstack(adv_imgs_test) ''' for i in range(6): target = np_utils.to_categorical((np.argmax(Y_train[i*10000: (i+1)*10000, ...], axis = 1) + 1) % 10, 10) adv_imgs_train, adv_labels_train = sess.run([adv_x, preds_adv], feed_dict={x: X_train[i*10000: (i+1)*10000,...], y: target}) for i in range(60000): target = np_utils.to_categorical((np.argmax(Y_train[i:i+1, ...], axis = 1) + 1) % 10, 10) adv_imgs_train = sess.run([adv_x], feed_dict={x: X_train[i:i+1,...], y: target}) print (len(adv_imgs_train), adv_imgs_train[0].shape, adv_imgs_train[1]) ''' label_truth_train = np.argmax(Y_train, axis=1) label_truth_test = np.argmax(Y_test, axis=1) save_dir = os.path.join( save_dir, os.path.join(adv_attack)) #, "eps_" + str(eps))) if not os.path.exists(save_dir): os.makedirs(save_dir) print(adv_imgs.shape, adv_imgs_test.shape) provider.save_h5(adv_imgs, label_truth_train, os.path.join(save_dir, "train_adv.h5")) provider.save_h5(adv_imgs_test, label_truth_test, os.path.join(save_dir, "test_adv.h5")) # utils.save_h5(X_train, label_truth_train, "FGSM/train_benign.h5") # utils.save_h5(X_test, label_truth_test, "FGSM/test_benign.h5") ''' for i in range(adv_labels.shape[0]): img = np.squeeze(adv_imgs[i,:,:,:]) imsave(os.path.join("adv", str(i) + ".jpg"), img) for i in range(adv_labels.shape[0]): img = np.squeeze(adv_imgs[i,:,:,:]) adv_path = "adv_" + str(np.argmax(adv_labels[i,:], axis=0)) if not os.path.exists(adv_path): os.makedirs(adv_path) imsave(os.path.join(adv_path, str(i) + ".jpg"), img) ''' acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print('Test accuracy on adversarial examples: %0.4f\n' % acc) report.clean_train_adv_eval = acc # Calculate training error if testing: eval_par = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_adv, X_train, Y_train, args=eval_par) report.train_clean_train_adv_eval = acc return report
def DF(torch_model, dataset, eps_list, opt, c, h, w, clip_min, clip_max): if opt == 'evaluate': acclist = [] for eps in eps_list: sess = tf.Session() x_op = tf.placeholder(tf.float32, shape=( None, c, h, w, )) # Convert pytorch model to a tf_model and wrap it in cleverhans tf_model_fn = convert_pytorch_model_to_tf(torch_model) cleverhans_model = CallableModelWrapper(tf_model_fn, output_layer='logits') # Create an FGSM attack atk_op = DeepFool(cleverhans_model, sess=sess) atk_params = {'clip_min': clip_min, 'clip_max': clip_max} adv_x_op = atk_op.generate(x_op, **atk_params) adv_preds_op = tf_model_fn(adv_x_op) # Run an evaluation of our model against fgsm total = 0 correct = 0 for xs, ys in dataset: xs, ys = xs.to(device), ys.to(device) adv_preds = sess.run(adv_preds_op, feed_dict={x_op: xs}) correct += (np.argmax( adv_preds, axis=1) == ys.cpu().detach().numpy()).sum() total += dataset.batch_size acc = float(correct) / total print('Adv accuracy: {:.3f}'.format(acc * 100)) acclist.append(acc) return acclist elif opt == 'generate': advpacklist = [] for eps in eps_list: advlist = [] sess = tf.Session() x_op = tf.placeholder(tf.float32, shape=( None, c, h, w, )) # Convert pytorch model to a tf_model and wrap it in cleverhans tf_model_fn = convert_pytorch_model_to_tf(torch_model) cleverhans_model = CallableModelWrapper(tf_model_fn, output_layer='logits') # Create an FGSM attack atk_op = DeepFool(cleverhans_model, sess=sess) atk_params = {'clip_min': clip_min, 'clip_max': clip_max} adv_x_op = atk_op.generate(x_op, **atk_params) # Run an evaluation of our model against fgsm for xs, ys in dataset: xs, ys = xs.to(device), ys.to(device) adv = torch.from_numpy(sess.run(adv_x_op, feed_dict={x_op: xs})) if ys == np.argmax(torch_model(xs).data.cpu().numpy()): pred = np.argmax(torch_model(adv).data.cpu().numpy()) if ys != pred: adv = adv.numpy() advlist.append(adv) print(len(advlist)) advpacklist.append(advlist) return advpacklist