def test_get_logits(self): import tensorflow as tf model = KerasModelWrapper(self.model) x = tf.placeholder(tf.float32, shape=(None, 100)) preds = model.get_probs(x) logits = model.get_logits(x) x_val = np.random.rand(2, 100) tf.global_variables_initializer().run(session=self.sess) p_val, logits = self.sess.run([preds, logits], feed_dict={x: x_val}) p_gt = np.exp(logits) / np.sum(np.exp(logits), axis=1, keepdims=True) self.assertTrue(np.allclose(p_val, p_gt, atol=1e-6))
def test_get_probs(self): import tensorflow as tf model = KerasModelWrapper(self.model) x = tf.placeholder(tf.float32, shape=(None, 100)) preds = model.get_probs(x) x_val = np.random.rand(2, 100) tf.global_variables_initializer().run(session=self.sess) p_val = self.sess.run(preds, feed_dict={x: x_val}) self.assertTrue(np.allclose(np.sum(p_val, axis=1), 1, atol=1e-6)) self.assertTrue(np.all(p_val >= 0)) self.assertTrue(np.all(p_val <= 1))
def test_fprop(self): import tensorflow as tf model = KerasModelWrapper(self.model) x = tf.placeholder(tf.float32, shape=(None, 100)) out_dict = model.fprop(x) self.assertEqual(set(out_dict.keys()), set(['l1', 'l2', 'softmax'])) # Test the dimension of the hidden represetation self.assertEqual(int(out_dict['l1'].shape[1]), 20) self.assertEqual(int(out_dict['l2'].shape[1]), 10) # Test the caching x2 = tf.placeholder(tf.float32, shape=(None, 100)) out_dict2 = model.fprop(x2) self.assertEqual(set(out_dict2.keys()), set(['l1', 'l2', 'softmax'])) self.assertEqual(int(out_dict2['l1'].shape[1]), 20)
def test_get_logits_over_interval(self): import tensorflow as tf model = cnn_model() wrap = KerasModelWrapper(model) fgsm_params = {'eps': .5} img = np.ones(shape=(28, 28, 1)) num_points = 21 with tf.Session() as sess: tf.global_variables_initializer().run() logits = utils.get_logits_over_interval(sess, wrap, img, fgsm_params, min_epsilon=-10, max_epsilon=10, num_points=num_points) self.assertEqual(logits.shape[0], num_points)
def attack(img, label, model_name, evaluate=False): with tf.Session() as sess: sess.run(tf.global_variables_initializer()) input_shape = np.array(img[0]).shape model = m.modelB(input_shape) model.load(model_name) wrap = KerasModelWrapper(model) fgsm = FastGradientMethod(wrap, sess=sess) fgsm_params = {'eps': 0.1, 'clip_min': 0, 'clip_max': 1} adv = fgsm.generate_np(img, **fgsm_params) if evaluate: _, test_acc = model.evaluate(adv, label) print("accuracy: {:.2f}%\terror rate: {:.2f}%\n".format(100 * test_acc, (1.0 - test_acc) * 100)) return adv
def mnist_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, train_dir=TRAIN_DIR, filename=FILENAME, load_model=LOAD_MODEL, testing=False, label_smoothing=0.1): """ MNIST CleverHans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param train_dir: Directory storing the saved model :param filename: Filename to save model under :param load_model: True for load, False for not load :param testing: if true, test error is calculated :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ tf.keras.backend.set_learning_phase(0) # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) if keras.backend.image_data_format() != 'channels_last': raise NotImplementedError( "this tutorial requires keras to be configured to channels_last format" ) # Create TF session and set as Keras backend session sess = tf.Session() keras.backend.set_session(sess) # Get MNIST test data mnist = MNIST(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) x_train, y_train = mnist.get_set('train') x_test, y_test = mnist.get_set('test') # Obtain Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Define TF model graph model = cnn_model(img_rows=img_rows, img_cols=img_cols, channels=nchannels, nb_filters=64, nb_classes=nb_classes) preds = model(x) print("Defined TensorFlow model graph.") def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params) report.clean_train_clean_eval = acc # assert X_test.shape[0] == test_end - test_start, X_test.shape print('Test accuracy on legitimate examples: %0.4f' % acc) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': train_dir, 'filename': filename } rng = np.random.RandomState([2017, 8, 30]) if not os.path.exists(train_dir): os.mkdir(train_dir) ckpt = tf.train.get_checkpoint_state(train_dir) print(train_dir, ckpt) ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path wrap = KerasModelWrapper(model) if load_model and ckpt_path: saver = tf.train.Saver() print(ckpt_path) saver.restore(sess, ckpt_path) print("Model loaded from: {}".format(ckpt_path)) evaluate() else: print("Model was not loaded, training from scratch.") loss = CrossEntropy(wrap, smoothing=label_smoothing) train(sess, loss, x_train, y_train, evaluate=evaluate, args=train_params, rng=rng) # Calculate training error if testing: eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, x_train, y_train, args=eval_params) report.train_clean_train_clean_eval = acc # Initialize the Fast Gradient Sign Method (FGSM) attack object and graph fgsm = FastGradientMethod(wrap, sess=sess) fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} adv_x = fgsm.generate(x, **fgsm_params) # Consider the attack to be constant adv_x = tf.stop_gradient(adv_x) preds_adv = model(adv_x) # Evaluate the accuracy of the MNIST model on adversarial examples eval_par = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_adv, x_test, y_test, args=eval_par) print('Test accuracy on adversarial examples: %0.4f\n' % acc) report.clean_train_adv_eval = acc # Calculating train error if testing: eval_par = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_adv, x_train, y_train, args=eval_par) report.train_clean_train_adv_eval = acc print("Repeating the process, using adversarial training") # Redefine TF model graph model_2 = cnn_model(img_rows=img_rows, img_cols=img_cols, channels=nchannels, nb_filters=64, nb_classes=nb_classes) wrap_2 = KerasModelWrapper(model_2) preds_2 = model_2(x) fgsm2 = FastGradientMethod(wrap_2, sess=sess) def attack(x): return fgsm2.generate(x, **fgsm_params) preds_2_adv = model_2(attack(x)) loss_2 = CrossEntropy(wrap_2, smoothing=label_smoothing, attack=attack) def evaluate_2(): # Accuracy of adversarially trained model on legitimate test inputs eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds_2, x_test, y_test, args=eval_params) print('Test accuracy on legitimate examples: %0.4f' % accuracy) report.adv_train_clean_eval = accuracy # Accuracy of the adversarially trained model on adversarial examples accuracy = model_eval(sess, x, y, preds_2_adv, x_test, y_test, args=eval_params) print('Test accuracy on adversarial examples: %0.4f' % accuracy) report.adv_train_adv_eval = accuracy # Perform and evaluate adversarial training train(sess, loss_2, x_train, y_train, evaluate=evaluate_2, args=train_params, rng=rng) # Calculate training errors if testing: eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds_2, x_train, y_train, args=eval_params) report.train_adv_train_clean_eval = accuracy accuracy = model_eval(sess, x, y, preds_2_adv, x_train, y_train, args=eval_params) report.train_adv_train_adv_eval = accuracy return report
def test_get_layer_names(self): model = KerasModelWrapper(self.model) layer_names = model.get_layer_names() self.assertEqual(layer_names, ['l1', 'l2', 'softmax'])
def test_logit_layer_name_is_logits(self): model = KerasModelWrapper(self.model) logits_name = model._get_logits_name() self.assertEqual(logits_name, 'l2')
def test_softmax_layer_name_is_softmax(self): model = KerasModelWrapper(self.model) softmax_name = model._get_softmax_name() self.assertEqual(softmax_name, 'softmax')
def mnist_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, testing=False, label_smoothing=0.1): """ MNIST CleverHans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param testing: if true, training error is calculated :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Force TensorFlow to use single thread to improve reproducibility config = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) if keras.backend.image_data_format() != 'channels_last': raise NotImplementedError( "this tutorial requires keras to be configured to channels_last format" ) # Create TF session and set as Keras backend session sess = tf.Session(config=config) keras.backend.set_session(sess) # Get MNIST test data mnist = MNIST(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) x_train, y_train = mnist.get_set('train') x_test, y_test = mnist.get_set('test') # Obtain Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] # Label smoothing y_train -= label_smoothing * (y_train - 1. / nb_classes) # Define Keras model model = cnn_model(img_rows=img_rows, img_cols=img_cols, channels=nchannels, nb_filters=64, nb_classes=nb_classes) print("Defined Keras model.") # To be able to call the model in the custom loss, we need to call it once # before, see https://github.com/tensorflow/tensorflow/issues/23769 model(model.input) # Initialize the Fast Gradient Sign Method (FGSM) attack object wrap = KerasModelWrapper(model) fgsm = FastGradientMethod(wrap, sess=sess) fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} adv_acc_metric = get_adversarial_acc_metric(model, fgsm, fgsm_params) model.compile(optimizer=keras.optimizers.Adam(learning_rate), loss='categorical_crossentropy', metrics=['accuracy', adv_acc_metric]) # Train an MNIST model model.fit(x_train, y_train, batch_size=batch_size, epochs=nb_epochs, validation_data=(x_test, y_test), verbose=2) # Evaluate the accuracy on legitimate and adversarial test examples _, acc, adv_acc = model.evaluate(x_test, y_test, batch_size=batch_size, verbose=0) report.clean_train_clean_eval = acc report.clean_train_adv_eval = adv_acc print('Test accuracy on legitimate examples: %0.4f' % acc) print('Test accuracy on adversarial examples: %0.4f\n' % adv_acc) # Calculate training error if testing: _, train_acc, train_adv_acc = model.evaluate(x_train, y_train, batch_size=batch_size, verbose=0) report.train_clean_train_clean_eval = train_acc report.train_clean_train_adv_eval = train_adv_acc print("Repeating the process, using adversarial training") # Redefine Keras model model_2 = cnn_model(img_rows=img_rows, img_cols=img_cols, channels=nchannels, nb_filters=64, nb_classes=nb_classes) model_2(model_2.input) wrap_2 = KerasModelWrapper(model_2) fgsm_2 = FastGradientMethod(wrap_2, sess=sess) # Use a loss function based on legitimate and adversarial examples adv_loss_2 = get_adversarial_loss(model_2, fgsm_2, fgsm_params) adv_acc_metric_2 = get_adversarial_acc_metric(model_2, fgsm_2, fgsm_params) model_2.compile(optimizer=keras.optimizers.Adam(learning_rate), loss=adv_loss_2, metrics=['accuracy', adv_acc_metric_2]) # Train an MNIST model model_2.fit(x_train, y_train, batch_size=batch_size, epochs=nb_epochs, validation_data=(x_test, y_test), verbose=2) # Evaluate the accuracy on legitimate and adversarial test examples _, acc, adv_acc = model_2.evaluate(x_test, y_test, batch_size=batch_size, verbose=0) report.adv_train_clean_eval = acc report.adv_train_adv_eval = adv_acc print('Test accuracy on legitimate examples: %0.4f' % acc) print('Test accuracy on adversarial examples: %0.4f\n' % adv_acc) # Calculate training error if testing: _, train_acc, train_adv_acc = model_2.evaluate(x_train, y_train, batch_size=batch_size, verbose=0) report.train_adv_train_clean_eval = train_acc report.train_adv_train_adv_eval = train_adv_acc return report