def test_xe_smoothing(self): loss = CrossEntropy(self.model, smoothing=0.1) l = loss.fprop(self.x, self.y) with tf.Session() as sess: vl1 = sess.run(l, feed_dict={self.x: self.vx, self.y: self.vy}) vl2 = sess.run(l, feed_dict={self.x: self.vx, self.y: self.vy}) self.assertClose(vl1, sum([2.10587597, 1.47194624]) / 2., atol=1e-6) self.assertClose(vl2, sum([2.10587597, 1.47194624]) / 2., atol=1e-6)
def test_xe(self): loss = CrossEntropy(self.model, smoothing=0.) l = loss.fprop(self.x, self.y) with tf.Session() as sess: vl1 = sess.run(l, feed_dict={self.x: self.vx, self.y: self.vy}) vl2 = sess.run(l, feed_dict={self.x: self.vx, self.y: self.vy}) self.assertClose(vl1, sum([2.210599660, 1.53666997]) / 2., atol=1e-6) self.assertClose(vl2, sum([2.210599660, 1.53666997]) / 2., atol=1e-6)
def train(self, nb_filters, label_smoothing): self.model = ModelBasicCNN('model1', self.nb_classes, nb_filters) self.preds = self.model.get_logits(x) self.loss = CrossEntropy(self.model, smoothing=label_smoothing) train(sess, self.loss, self.x_train, self.y_train, evaluate=self.evaluate, args=self.train_params, rng=self.range, var_list=self.model.get_params())
def train_model(model): # Load image from disk x_train, y_train, x_test, y_test = get_mnist(model.input_dir) # Train an MNIST model train_params = { 'nb_epochs': EPOCHS, 'batch_size': BATCH_SIZE, 'learning_rate': LEARNING_RATE } eval_params = {'batch_size': BATCH_SIZE} rng = np.random.RandomState([2017, 8, 30]) def do_eval(preds, x_set, y_set): acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params) print('Test accuracy on train: %0.4f' % (acc)) preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=0.1) def evaluate(): do_eval(preds, x_test, y_test) train(sess, loss, x_train, y_train, evaluate=evaluate, args=train_params, rng=rng) # Calculate training error do_eval(preds, x_train, y_train)
def prep_bbox(sess, x, y, x_train, y_train, x_test, y_test, nb_epochs, batch_size, learning_rate, rng, nb_classes=10, img_rows=28, img_cols=28, nchannels=1): """ Define and train a model that simulates the "remote" black-box oracle described in the original paper. :param sess: the TF session :param x: the input placeholder for MNIST :param y: the ouput placeholder for MNIST :param x_train: the training data for the oracle :param y_train: the training labels for the oracle :param x_test: the testing data for the oracle :param y_test: the testing labels for the oracle :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param rng: numpy.random.RandomState :return: """ # Define TF model graph (for the black-box model) nb_filters = 64 model = ModelBasicCNN('model1', nb_classes, nb_filters) loss = CrossEntropy(model, smoothing=0.1) predictions = model.get_logits(x) print("Defined TensorFlow model graph.") # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } train(sess, loss, x, y, x_train, y_train, args=train_params, rng=rng) # Print out the accuracy on legitimate data eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, predictions, x_test, y_test, args=eval_params) print('Test accuracy of black-box on legitimate test ' 'examples: ' + str(accuracy)) return model, predictions, accuracy
def build_model(self): x = self.x_sym # Define a model. model = make_basic_picklable_cnn() preds = model.get_logits(x) self.loss = CrossEntropy(model, smoothing=0.) self.model = model self.preds = preds
def eval_model(model, input_dir): # Load image from disk _, _, x_test, y_test = get_mnist(input_dir) preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=0.1) # Evaluate an MNIST model eval_params = {'batch_size': BATCH_SIZE} acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params) print('Test accuracy on train: %0.4f' % (acc))
def adverse_train(self, nb_filters, label_smoothing): self.model = ModelBasicCNN('model2', self.nb_classes, nb_filters) fgsm = FastGradientMethod(self.model, sess=sess) def attack(x): return fgsm.generate(x, **self.fgsm_params) self.preds = self.model.get_logits(x) self.loss = CrossEntropy(self.model, smoothing=label_smoothing, attack=attack) adv_x = attack(x) self.preds_adv = self.model.get_logits(adv_x) train(sess, self.loss, self.x_train, self.y_train, evaluate=self.evaluate, args=self.train_params, rng=self.range, var_list=self.model.get_params())
def prepblackbox(self, nb_filters): model = ModelBasicCNN('model1', self.nb_classes, nb_filters) loss = CrossEntropy(model, smoothing=0.1) predictions = model.get_logits(x) print("Defined TensorFlow model graph.") train(sess, loss, self.x_train, self.y_train, args=self.train_params, rng=self.range) eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, predictions, self.x_test, self.y_test, args=eval_params) print('Test accuracy of black-box on legitimate test ' 'examples: ' + str(accuracy)) return model, predictions, accuracy
def __test(): # report = AccuracyReport() tf.set_random_seed(1234) sess = tf.Session() set_log_level(logging.DEBUG) # Get MNIST test data mnist = MNIST(train_start=0, train_end=60000, test_start=0, test_end=10000) x_train, y_train = mnist.get_set('train') x_test, y_test = mnist.get_set('test') # Obtain Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) nb_filters = 64 # Define TF model graph model = ModelBasicCNN('model1', nb_classes, nb_filters) preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=0.1) print("Defined TensorFlow model graph.") # Train an MNIST model train_params = { 'nb_epochs': NB_EPOCHS, 'batch_size': BATCH_SIZE, 'learning_rate': LEARNING_RATE, 'filename': os.path.split(MODEL_PATH)[-1] } rng = np.random.RandomState([2017, 8, 30]) # check if we've trained before, and if we have, use that pre-trained model if os.path.exists(model_path + ".meta"): tf_model_load(sess, model_path) else: train(sess, loss, x_train, y_train, args=train_params, rng=rng) saver = tf.train.Saver() saver.save(sess, model_path) # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': BATCH_SIZE} accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params) assert x_test.shape[0] == test_end - test_start, x_test.shape print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
def prep_bbox(sess, x, y, x_train, y_train, x_test, y_test, nb_epochs, batch_size, learning_rate, rng, nb_classes=10, img_rows=28, img_cols=28, nchannels=1): """ Define and train a model that simulates the "remote" black-box oracle described in the original paper. :param sess: the TF session :param x: the input placeholder for MNIST :param y: the ouput placeholder for MNIST :param x_train: the training data for the oracle :param y_train: the training labels for the oracle :param x_test: the testing data for the oracle :param y_test: the testing labels for the oracle :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param rng: numpy.random.RandomState :return: """ keras.layers.core.K.set_learning_phase(1) config = tf.ConfigProto(device_count = {'GPU' : 1}) # sess = tf.InteractiveSession(config=config) keras.backend.set_session(sess) model_path = MODEL_PATH try: oracle = KerasModelWrapper(load_model(model_path)) except: import errno, os raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), model_path) loss = CrossEntropy(oracle, smoothing=0.1) predictions = oracle.get_logits(x) print("Loaded well-trained Keras oracle.") # Print out the accuracy on legitimate data eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, predictions, x_test, y_test, args=eval_params) print('Test accuracy of black-box on legitimate test ' 'examples: ' + str(accuracy)) return oracle, predictions, accuracy
def train_substitute(bbox_preds, x_sub, y_sub, nb_classes, nb_epochs_s, batch_size, lr, data_aug, lmbda, aug_batch_size, rng, img_rows=28, img_cols=28, nchannels=1): model_sub = ModelSubstitute('model_s', nb_classes) preds_sub = model_sub.get_logits(x) loss_sub = CrossEntropy(model_sub, smoothing=0) print("Defined TensorFlow model graph for the substitute.") grads = jacobian_graph(preds_sub, x, nb_classes) for i in xrange(data_aug): print("Substitute training epoch #" + str(i)) train_params = { 'nb_epochs': nb_epochs_s, 'batch_size': batch_size, 'learning_rate': lr } with TemporaryLogLevel(logging.WARNING, "cleverhans.utils.tf"): train(sess, loss_sub, x_sub, to_categorical(y_sub, nb_classes), init_all=False, args=train_params, rng=rng, var_list=model_sub.get_params()) if i < data_aug - 1: print("Augmenting substitute training data.") lmbda_coef = 2 * int(int(i / 3) != 0) - 1 x_sub = jacobian_augmentation(sess, x, x_sub, y_sub, grads, lmbda_coef * lmbda, aug_batch_size) print("Labeling substitute training data.") y_sub = np.hstack([y_sub, y_sub]) x_sub_prev = x_sub[int(len(x_sub) / 2):] eval_params = {'batch_size': batch_size} bbox_val = batch_eval(sess, [x], [bbox_preds], [x_sub_prev], args=eval_params)[0] y_sub[int(len(x_sub) / 2):] = np.argmax(bbox_val, axis=1) show_plot(x_sub, y_sub) return model_sub, preds_sub, x_sub, y_sub
def init_from_keras_classifier(self, keras_classifier: KerasClassifier): self._graph = keras_classifier.graph self._sess = keras_classifier.session self._input_placeholder = keras_classifier.input self._label_placeholder = keras_classifier.label self._preds = keras_classifier.predictions self._model = KerasModelWrapper(keras_classifier.model) self._loss = CrossEntropy(self._model, smoothing=self._label_smoothing) with self._graph.as_default(): fgsm = FastGradientMethod(self._model, sess=self._sess) fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} adv_x = fgsm.generate(self._input_placeholder, **fgsm_params) # Consider the attack to be constant self._adv_x = tf.stop_gradient(adv_x) # model predictions for adversarial examples self._preds_adv = keras_classifier.model(adv_x) self._keras_classifier = keras_classifier # FIXME[hack]: we need to keep a reference to the KerasClassifier to prevent the session from being closed
def SNNL_example(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, nb_filters=NB_FILTERS, SNNL_factor=SNNL_FACTOR, output_dir=OUTPUT_DIR): """ A simple model trained to minimize Cross Entropy and Maximize Soft Nearest Neighbor Loss at each internal layer. This outputs a TSNE of the sign of the adversarial gradients of a trained model. A model with a negative SNNL_factor will show little or no class clusters, while a model with a 0 SNNL_factor will have class clusters in the adversarial gradient direction. :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param SNNL_factor: multiplier for Soft Nearest Neighbor Loss :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session sess = tf.Session() # Get MNIST data mnist = MNIST(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) x_train, y_train = mnist.get_set('train') x_test, y_test = mnist.get_set('test') # Use Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } eval_params = {'batch_size': batch_size} rng = np.random.RandomState([2017, 8, 30]) def do_eval(preds, x_set, y_set, report_key): acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params) setattr(report, report_key, acc) print('Test accuracy on legitimate examples: %0.4f' % (acc)) model = ModelBasicCNN('model', nb_classes, nb_filters) preds = model.get_logits(x) cross_entropy_loss = CrossEntropy(model) if not SNNL_factor: loss = cross_entropy_loss else: loss = SNNLCrossEntropy(model, factor=SNNL_factor, optimize_temperature=False) def evaluate(): do_eval(preds, x_test, y_test, 'clean_train_clean_eval') train(sess, loss, x_train, y_train, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params()) do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval') def imscatter(points, images, ax=None, zoom=1, cmap="hot"): if ax is None: ax = plt.gca() artists = [] i = 0 if not isinstance(cmap, list): cmap = [cmap] * len(points) for x0, y0 in points: transformed = (images[i] - np.min(images[i])) / \ (np.max(images[i]) - np.min(images[i])) im = OffsetImage(transformed[:, :, 0], zoom=zoom, cmap=cmap[i]) ab = AnnotationBbox(im, (x0, y0), xycoords='data', frameon=False) artists.append(ax.add_artist(ab)) i += 1 ax.update_datalim(np.column_stack(np.transpose(points))) ax.autoscale() ax.get_xaxis().set_ticks([]) ax.get_yaxis().set_ticks([]) return artists adv_grads = tf.sign(tf.gradients(cross_entropy_loss.fprop(x, y), x)) feed_dict = {x: x_test[:batch_size], y: y_test[:batch_size]} adv_grads_val = sess.run(adv_grads, feed_dict=feed_dict) adv_grads_val = np.reshape(adv_grads_val, (batch_size, img_rows * img_cols)) X_embedded = TSNE(n_components=2, verbose=0).fit_transform(adv_grads_val) plt.figure(num=None, figsize=(50, 50), dpi=40, facecolor='w', edgecolor='k') plt.title( "TSNE of Sign of Adv Gradients, SNNLCrossEntropy Model, factor:" + str(FLAGS.SNNL_factor), fontsize=42) imscatter(X_embedded, x_test[:batch_size], zoom=2, cmap="Purples") plt.savefig(output_dir + 'adversarial_gradients_SNNL_factor_' + str(SNNL_factor) + '.png')
def train_sub(sess, x, y, bbox_preds, x_sub, y_sub, nb_classes, nb_epochs_s, batch_size, learning_rate, data_aug, lmbda, aug_batch_size, rng, img_rows=28, img_cols=28, nchannels=1): """ This function creates the substitute by alternatively augmenting the training data and training the substitute. :param sess: TF session :param x: input TF placeholder :param y: output TF placeholder :param bbox_preds: output of black-box model predictions :param x_sub: initial substitute training data :param y_sub: initial substitute training labels :param nb_classes: number of output classes :param nb_epochs_s: number of epochs to train substitute model :param batch_size: size of training batches :param learning_rate: learning rate for training :param data_aug: number of times substitute training data is augmented :param lmbda: lambda from arxiv.org/abs/1602.02697 :param rng: numpy.random.RandomState instance :return: """ # Define TF model graph (for the black-box model) model_sub = ModelSubstitute('model_s', nb_classes) preds_sub = model_sub.get_logits(x) loss_sub = CrossEntropy(model_sub, smoothing=0) print("Defined TensorFlow model graph for the substitute.") # Define the Jacobian symbolically using TensorFlow grads = jacobian_graph(preds_sub, x, nb_classes) # Train the substitute and augment dataset alternatively for rho in xrange(data_aug): print("Substitute training epoch #" + str(rho)) train_params = { 'nb_epochs': nb_epochs_s, 'batch_size': batch_size, 'learning_rate': learning_rate } with TemporaryLogLevel(logging.WARNING, "cleverhans.utils.tf"): train(sess, loss_sub, x, y, x_sub, to_categorical(y_sub, nb_classes), init_all=False, args=train_params, rng=rng, var_list=model_sub.get_params()) # If we are not at last substitute training iteration, augment dataset if rho < data_aug - 1: print("Augmenting substitute training data.") # Perform the Jacobian augmentation lmbda_coef = 2 * int(int(rho / 3) != 0) - 1 x_sub = jacobian_augmentation(sess, x, x_sub, y_sub, grads, lmbda_coef * lmbda, aug_batch_size) print("Labeling substitute training data.") # Label the newly generated synthetic points using the black-box y_sub = np.hstack([y_sub, y_sub]) x_sub_prev = x_sub[int(len(x_sub) / 2):] eval_params = {'batch_size': batch_size} bbox_val = batch_eval(sess, [x], [bbox_preds], [x_sub_prev], args=eval_params)[0] # Note here that we take the argmax because the adversary # only has access to the label (not the probabilities) output # by the black-box model y_sub[int(len(x_sub) / 2):] = np.argmax(bbox_val, axis=1) return model_sub, preds_sub
def zoo(viz_enabled=VIZ_ENABLED, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, source_samples=SOURCE_SAMPLES, learning_rate=LEARNING_RATE, attack_iterations=ATTACK_ITERATIONS, model_path=MODEL_PATH, targeted=TARGETED): """ :param viz_enabled: (boolean) activate plots of adversarial examples :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param nb_classes: number of output classes :param source_samples: number of test inputs to attack :param learning_rate: learning rate for training :param model_path: path to the model file :param targeted: should we run a targeted attack? or untargeted? :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Create TF session sess = tf.Session() print("Created TensorFlow session.") set_log_level(logging.DEBUG) if DATASET == 'MNIST': train_start = 0 train_end = 60000 test_start = 0 test_end = 10000 ds = dataset.MNIST(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end, center=False) elif DATASET == 'SVHN': train_start = 0 train_end = 73257 test_start = 0 test_end = 26032 ds = dataset.SVHN(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) elif DATASET == 'CIFAR10': train_start = 0 train_end = 60000 test_start = 0 test_end = 10000 ds = dataset.CIFAR10(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end, center=False) x_train, y_train, x_test, y_test = ds.get_set('train') + ds.get_set('test') # Obtain Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) nb_filters = 64 # Define TF model graph model = ModelBasicCNN(DATASET, nb_classes, nb_filters, (None, img_rows, img_cols, nchannels)) preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=0.1) print("Defined TensorFlow model graph.") ########################################################################### # Training the model using TensorFlow ########################################################################### # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'filename': os.path.split(model_path)[-1] } rng = np.random.RandomState([2018, 10, 22]) # check if we've trained before, and if we have, use that pre-trained model if os.path.exists(model_path + ".meta"): tf_model_load(sess, model_path) else: train(sess, loss, x, y, x_train, y_train, args=train_params, rng=rng) saver = tf.train.Saver() saver.save(sess, model_path) # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params) assert x_test.shape[0] == test_end - test_start, x_test.shape print('Test accuracy on legitimate test examples: {0}'.format(accuracy)) report.clean_train_clean_eval = accuracy ########################################################################### # Craft adversarial examples using Carlini and Wagner's approach ########################################################################### nb_adv_per_sample = str(nb_classes - 1) if targeted else '1' print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample + ' adversarial examples') print("This could take some time ...") # Instantiate a Zoo attack object zoo = Zoo(model, sess=sess) if viz_enabled: assert source_samples == nb_classes idxs = [ np.where(np.argmax(y_test, axis=1) == i)[0][0] for i in range(nb_classes) ] if targeted: if viz_enabled: # Initialize our array for grid visualization grid_shape = (nb_classes, nb_classes, img_rows, img_cols, nchannels) grid_viz_data = np.zeros(grid_shape, dtype='f') adv_inputs = np.array([[instance] * nb_classes for instance in x_test[idxs]], dtype=np.float32) else: adv_inputs = np.array([[instance] * nb_classes for instance in x_test[:source_samples]], dtype=np.float32) one_hot = np.zeros((nb_classes, nb_classes)) one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1 adv_inputs = adv_inputs.reshape( (source_samples * nb_classes, img_rows, img_cols, nchannels)) adv_ys = np.array([one_hot] * source_samples, dtype=np.float32).reshape( (source_samples * nb_classes, nb_classes)) yname = "y_target" else: if viz_enabled: # Initialize our array for grid visualization grid_shape = (nb_classes, 2, img_rows, img_cols, nchannels) grid_viz_data = np.zeros(grid_shape, dtype='f') adv_inputs = x_test[idxs] else: adv_inputs = x_test[:source_samples] adv_ys = None yname = "y" zoo_params = { 'binary_search_steps': BINARY_SEARCH_STEPS, yname: adv_ys, 'max_iterations': attack_iterations, 'learning_rate': ZOO_LEARNING_RATE, 'batch_size': source_samples * nb_classes if targeted else source_samples, 'initial_const': INIT_CONST, 'solver': SOLVER, 'image_shape': [img_rows, img_cols, nchannels], 'nb_classes': nb_classes } adv = zoo.generate_np(adv_inputs, **zoo_params) eval_params = {'batch_size': np.minimum(nb_classes, source_samples)} if targeted: adv_accuracy = model_eval(sess, x, y, preds, adv, adv_ys, args=eval_params) else: if viz_enabled: adv_accuracy = 1 - model_eval( sess, x, y, preds, adv, y_test[idxs], args=eval_params) else: adv_accuracy = 1 - model_eval(sess, x, y, preds, adv, y_test[:source_samples], args=eval_params) if viz_enabled: for j in range(nb_classes): if targeted: for i in range(nb_classes): grid_viz_data[i, j] = adv[i * nb_classes + j] else: grid_viz_data[j, 0] = adv_inputs[j] grid_viz_data[j, 1] = adv[j] print(grid_viz_data.shape) print('--------------------------------------') # Compute the number of adversarial examples that were successfully found print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy)) report.clean_train_adv_eval = 1. - adv_accuracy # Compute the average distortion introduced by the algorithm percent_perturbed = np.mean( np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5) print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed)) # Close TF session sess.close() # Finally, block & display a grid of all the adversarial examples if viz_enabled: _ = grid_visual(grid_viz_data) return report
def mnist_tutorial_cw(train_start=0, train_end=60000, test_start=0, test_end=10000, viz_enabled=VIZ_ENABLED, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, source_samples=SOURCE_SAMPLES, learning_rate=LEARNING_RATE, attack_iterations=ATTACK_ITERATIONS, model_path=MODEL_PATH, model_path_cls=MODEL_PATH, targeted=TARGETED): # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) rng = np.random.RandomState() # Create TF session sess = tf.Session() print("Created TensorFlow session.") set_log_level(logging.DEBUG) nb_latent_size = 100 # Get MNIST test data mnist = MNIST(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) x_train, y_train = mnist.get_set('train') x_test, y_test = mnist.get_set('test') # Obtain Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) x_t = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) y_t = tf.placeholder(tf.float32, shape=(None, nb_classes)) z = tf.placeholder(tf.float32, shape=(None, nb_latent_size)) z_t = tf.placeholder(tf.float32, shape=(None, nb_latent_size)) #nb_filters = 64 nb_layers = 500 # Define TF model graph model = ModelBasicAE('model', nb_layers, nb_latent_size) cl_model = ModelCls('cl_model') #preds = model.get_logits(x) recons = model.get_layer(x, 'RECON') loss = SquaredError(model) print("Defined TensorFlow model graph.") loss_cls = CrossEntropy(cl_model) y_logits = cl_model.get_layer(z, 'LOGITS') ########################################################################### # Training the model using TensorFlow ########################################################################### # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'filename': os.path.split(model_path)[-1] } train_params_cls = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'filename': os.path.split(model_path_cls)[-1] } rng = np.random.RandomState([2017, 8, 30]) # check if we've trained before, and if we have, use that pre-trained model #if os.path.exists(model_path + ".meta"): # tf_model_load(sess, model_path) #else: eval_params_cls = {'batch_size': batch_size} # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': batch_size} ########################################################################### # Craft adversarial examples using Carlini and Wagner's approach ########################################################################### nb_adv_per_sample = str(nb_classes - 1) if targeted else '1' print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample + ' adversarial examples') print("This could take some time ...") # Instantiate a CW attack object cw = CarliniWagnerAE(model, cl_model, sess=sess) if viz_enabled: assert source_samples == nb_classes idxs = [ np.where(np.argmax(y_test, axis=1) == i)[0][0] for i in range(nb_classes) ] if targeted: if viz_enabled: # Initialize our array for grid visualization grid_shape = (nb_classes, nb_classes, img_rows, img_cols, nchannels) grid_viz_data = np.zeros(grid_shape, dtype='f') grid_viz_data_1 = np.zeros(grid_shape, dtype='f') adv_inputs = np.array([[instance] * (nb_classes - 1) for instance in x_test[idxs]], dtype=np.float32) #adv_input_y = np.array([[instance]*(nb_classes-1) for instance in y_test[idxs]]) adv_input_y = [] for curr_num in range(nb_classes): targ = [] for id in range(nb_classes - 1): targ.append(y_test[idxs[curr_num]]) adv_input_y.append(targ) adv_input_y = np.array(adv_input_y) adv_target_y = [] for curr_num in range(nb_classes): targ = [] for id in range(nb_classes): if (id != curr_num): targ.append(y_test[idxs[id]]) adv_target_y.append(targ) adv_target_y = np.array(adv_target_y) #print("adv_input_y: \n", adv_input_y) #print("adv_target_y: \n", adv_target_y) adv_input_targets = [] for curr_num in range(nb_classes): targ = [] for id in range(nb_classes): if (id != curr_num): targ.append(x_test[idxs[id]]) adv_input_targets.append(targ) adv_input_targets = np.array(adv_input_targets) adv_inputs = adv_inputs.reshape((source_samples * (nb_classes - 1), img_rows, img_cols, nchannels)) adv_input_targets = adv_input_targets.reshape( (source_samples * (nb_classes - 1), img_rows, img_cols, nchannels)) adv_input_y = adv_input_y.reshape( source_samples * (nb_classes - 1), 10) adv_target_y = adv_target_y.reshape( source_samples * (nb_classes - 1), 10) #print("adv_input_y: \n", adv_input_y) #print("adv_target_y: \n", adv_target_y) one_hot = np.zeros((nb_classes, nb_classes)) one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1 train_ae(sess, loss, x_train, x_train, args=train_params, rng=rng, var_list=model.get_params()) saver = tf.train.Saver() saver.save(sess, model_path) x_train_lat = model.get_layer(x_train, 'LATENT') x_test_lat = model.get_layer(x_test, 'LATENT') x_train_lat = sess.run(x_train_lat) x_test_lat = sess.run(x_test_lat) def do_eval_cls(preds, x_set, y_set, x_tar_set, report_key, is_adv=None): acc = model_eval(sess, z, y, preds, z_t, x_set, y_set, x_tar_set, args=eval_params_cls) setattr(report, report_key, acc) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' if report_text: print('Test accuracy on %s examples: %0.4f' % (report_text, acc)) def eval_cls(): do_eval_cls(y_logits, x_test_lat, y_test, x_test_lat, 'clean_train_clean_eval', False) #train_cls(sess, loss_cls, x_train, y_train, evaluate = eval_cls, args = train_params_cls, rng = rng, var_list = cl_model.get_params()) train_cls_lat(sess, loss_cls, x_train_lat, y_train, evaluate=eval_cls, args=train_params_cls, rng=rng, var_list=cl_model.get_params()) saver.save(sess, model_path_cls) #adv_input_y = cl_model.get_layer(adv_inputs, 'LOGITS') #adv_target_y = cl_model.get_layer(adv_input_targets, 'LOGITS') adv_ys = np.array([one_hot] * source_samples, dtype=np.float32).reshape( (source_samples * nb_classes, nb_classes)) yname = "y_target" cw_params_batch_size = source_samples * (nb_classes - 1) cw_params = { 'binary_search_steps': 10, yname: adv_ys, 'max_iterations': attack_iterations, 'learning_rate': CW_LEARNING_RATE, 'batch_size': cw_params_batch_size, 'initial_const': 1 } adv = cw.generate_np(adv_inputs, adv_input_targets, **cw_params) #print("shaep of adv: ", np.shape(adv)) recon_orig = model.get_layer(adv_inputs, 'RECON') lat_adv = model.get_layer(adv, 'LATENT') recon_adv = model.get_layer(adv, 'RECON') lat_orig = model.get_layer(x, 'LATENT') lat_orig_recon = model.get_layer(recons, 'LATENT') #pred_adv_recon = cl_model.get_layer(recon_adv, 'LOGITS') pred_adv_recon = cl_model.get_layer(lat_adv, 'LOGITS') #eval_params = {'batch_size': np.minimum(nb_classes, source_samples)} eval_params = {'batch_size': 90} if targeted: noise, d1, d2, dist_diff, avg_dist_lat = model_eval_ae( sess, x, x_t, recons, adv_inputs, adv_input_targets, adv, recon_adv, lat_orig, lat_orig_recon, args=eval_params) acc = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_target_y, adv_input_targets, args=eval_params_cls) print("noise: ", noise) print("classifier acc: ", acc) recon_adv = sess.run(recon_adv) recon_orig = sess.run(recon_orig) #print("recon_adv[0]\n", recon_adv[0,:,:,0]) curr_class = 0 if viz_enabled: for j in range(nb_classes): if targeted: for i in range(nb_classes): #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i] if (i == j): grid_viz_data[i, j] = recon_orig[curr_class * 9] grid_viz_data_1[i, j] = adv_inputs[curr_class * 9] curr_class = curr_class + 1 else: if (j > i): grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j - 1] grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) + j - 1] else: grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j] grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) + j] #rint(grid_viz_data.shape) print('--------------------------------------') # Compute the number of adversarial examples that were successfully found # Compute the average distortion introduced by the algorithm percent_perturbed = np.mean( np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5) print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed)) # Close TF session #sess.close() # Finally, block & display a grid of all the adversarial examples if viz_enabled: _ = grid_visual(grid_viz_data) _ = grid_visual(grid_viz_data_1) #return report #adversarial training if (adv_train == True): print("starting adversarial training") #sess1 = tf.Session() adv_input_set = [] adv_input_target_set = [] for i in range(20): indices = np.arange(np.shape(x_train)[0]) np.random.shuffle(indices) print("indices: ", indices[1:10]) x_train = x_train[indices] y_train = y_train[indices] idxs = [ np.where(np.argmax(y_train, axis=1) == i)[0][0] for i in range(nb_classes) ] adv_inputs_2 = np.array([[instance] * (nb_classes - 1) for instance in x_train[idxs]], dtype=np.float32) adv_input_targets_2 = [] for curr_num in range(nb_classes): targ = [] for id in range(nb_classes): if (id != curr_num): targ.append(x_train[idxs[id]]) adv_input_targets_2.append(targ) adv_input_targets_2 = np.array(adv_input_targets_2) adv_inputs_2 = adv_inputs_2.reshape( (source_samples * (nb_classes - 1), img_rows, img_cols, nchannels)) adv_input_targets_2 = adv_input_targets_2.reshape( (source_samples * (nb_classes - 1), img_rows, img_cols, nchannels)) adv_input_set.append(adv_inputs_2) adv_input_target_set.append(adv_input_targets_2) adv_input_set = np.array(adv_input_set), adv_input_target_set = np.array(adv_input_target_set) print("shape of adv_input_set: ", np.shape(adv_input_set)) print("shape of adv_input_target_set: ", np.shape(adv_input_target_set)) adv_input_set = np.reshape( adv_input_set, (np.shape(adv_input_set)[0] * np.shape(adv_input_set)[1] * np.shape(adv_input_set)[2], np.shape(adv_input_set)[3], np.shape(adv_input_set)[4], np.shape(adv_input_set)[5])) adv_input_target_set = np.reshape(adv_input_target_set, (np.shape(adv_input_target_set)[0] * np.shape(adv_input_target_set)[1], np.shape(adv_input_target_set)[2], np.shape(adv_input_target_set)[3], np.shape(adv_input_target_set)[4])) print("generated adversarial training set") adv_set = cw.generate_np(adv_input_set, adv_input_target_set, **cw_params) x_train_aim = np.append(x_train, adv_input_set, axis=0) x_train_app = np.append(x_train, adv_set, axis=0) model_adv_trained = ModelBasicAE('model_adv_trained', nb_layers, nb_latent_size) recons_2 = model_adv_trained.get_layer(x, 'RECON') loss_2 = SquaredError(model_adv_trained) train_ae(sess, loss_2, x_train_app, x_train_aim, args=train_params, rng=rng, var_list=model_adv_trained.get_params()) saver = tf.train.Saver() saver.save(sess, model_path) cw2 = CarliniWagnerAE(model_adv_trained, cl_model, sess=sess) adv_2 = cw2.generate_np(adv_inputs, adv_input_targets, **cw_params) #print("shaep of adv: ", np.shape(adv)) recon_orig = model_adv_trained.get_layer(adv_inputs, 'RECON') recon_adv = model_adv_trained.get_layer(adv_2, 'RECON') lat_orig = model_adv_trained.get_layer(x, 'LATENT') lat_orig_recon = model_adv_trained.get_layer(recons, 'LATENT') pred_adv_recon = cl_model.get_layer(recon_adv, 'LOGITS') #eval_params = {'batch_size': np.minimum(nb_classes, source_samples)} eval_params = {'batch_size': 90} if targeted: noise, d1, d2, dist_diff, avg_dist_lat = model_eval_ae( sess, x, x_t, recons, adv_inputs, adv_input_targets, adv_2, recon_adv, lat_orig, lat_orig_recon, args=eval_params) acc = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_target_y, adv_input_targets, args=eval_params_cls) print("noise: ", noise) #print("d1: ", d1) #print("d2: ", d2) #print("d1-d2: ", dist_diff) #print("Avg_dist_lat: ", avg_dist_lat) print("classifier acc: ", acc) recon_adv = sess.run(recon_adv) recon_orig = sess.run(recon_orig) #print("recon_adv[0]\n", recon_adv[0,:,:,0]) curr_class = 0 if viz_enabled: for j in range(nb_classes): for i in range(nb_classes): #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i] if (i == j): grid_viz_data[i, j] = recon_orig[curr_class * 9] grid_viz_data_1[i, j] = adv_inputs[curr_class * 9] curr_class = curr_class + 1 else: if (j > i): grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j - 1] grid_viz_data_1[i, j] = adv_2[i * (nb_classes - 1) + j - 1] else: grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j] grid_viz_data_1[i, j] = adv_2[i * (nb_classes - 1) + j] #rint(grid_viz_data.shape) print('--------------------------------------') # Compute the number of adversarial examples that were successfully found # Compute the average distortion introduced by the algorithm percent_perturbed = np.mean( np.sum((adv_2 - adv_inputs)**2, axis=(1, 2, 3))**.5) print( 'Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed)) # Close TF session sess.close() # Finally, block & display a grid of all the adversarial examples if viz_enabled: _ = grid_visual(grid_viz_data) _ = grid_visual(grid_viz_data_1) return report #binarization defense if (binarization_defense == True or mean_filtering == True): #adv = sess.run(adv) # print(adv[0]) if (binarization_defense == True): adv[adv > 0.5] = 1.0 adv[adv <= 0.5] = 0.0 else: #radius = 2 #adv_list = [mean(adv[i,:,:,0], disk(radius)) for i in range(0, np.shape(adv)[0])] #adv = np.array(adv_list) #adv = np.expand_dims(adv, axis = 3) adv = uniform_filter(adv, 2) #adv = median_filter(adv, 2) #print("after bin ") #print(adv[0]) recon_orig = model.get_layer(adv_inputs, 'RECON') recon_adv = model.get_layer(adv, 'RECON') lat_adv = model.get_layer(adv, 'LATENT') lat_orig = model.get_layer(x, 'LATENT') lat_orig_recon = model.get_layer(recons, 'LATENT') pred_adv_recon = cl_model.get_layer(lat_adv, 'LOGITS') #eval_params = {'batch_size': np.minimum(nb_classes, source_samples)} eval_params = {'batch_size': 90} if targeted: noise, d1, d2, dist_diff, avg_dist_lat = model_eval_ae( sess, x, x_t, recons, adv_inputs, adv_input_targets, adv, recon_adv, lat_orig, lat_orig_recon, args=eval_params) acc1 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_target_y, adv_input_targets, args=eval_params_cls) acc2 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_input_y, adv_input_targets, args=eval_params_cls) print("noise: ", noise) print("classifier acc for target class: ", acc1) print("classifier acc for true class: ", acc2) recon_adv = sess.run(recon_adv) recon_orig = sess.run(recon_orig) #print("recon_adv[0]\n", recon_adv[0,:,:,0]) curr_class = 0 if viz_enabled: for j in range(nb_classes): for i in range(nb_classes): #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i] if (i == j): grid_viz_data[i, j] = recon_orig[curr_class * 9] grid_viz_data_1[i, j] = adv_inputs[curr_class * 9] curr_class = curr_class + 1 else: if (j > i): grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j - 1] grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) + j - 1] else: grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j] grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) + j] sess.close() _ = grid_visual(grid_viz_data) _ = grid_visual(grid_viz_data_1)
def generate_CIFAR10_adv(attacker_name, train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, clean_train=CLEAN_TRAIN, testing=False, nb_filters=NB_FILTERS, num_threads=None, label_smoothing=0.1, args=FLAGS): """ CIFAR10 cleverhans tutorial :param attacker_name: :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ if "batch_size" in ATTACK_PARAM[attacker_name]: global BATCH_SIZE batch_size = ATTACK_PARAM[attacker_name]["batch_size"] BATCH_SIZE = batch_size # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session config_args = {} if num_threads: config_args = dict(intra_op_parallelism_threads=1) config_args["gpu_options"] = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get CIFAR10 data data = CIFAR(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) dataset_size = data.x_train.shape[0] dataset_train = data.to_tensorflow()[0] dataset_train = dataset_train.map( lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4) dataset_train = dataset_train.batch(batch_size) dataset_train = dataset_train.prefetch(16) x_train, y_train = data.get_set('train') x_test, y_test = data.get_set('test') # Use Image Parameters img_rows, img_cols, nchannels = x_test.shape[1:4] nb_classes = y_test.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(BATCH_SIZE, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(BATCH_SIZE, nb_classes)) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } eval_params = {'batch_size': batch_size} rng = np.random.RandomState([2017, 8, 30]) def do_generate_eval(adv_x, pred_adv_x, x_set, y_set, report_key, is_adv=None): adv_images_total, adv_pred_total, gt_label_total, success_rate = untargeted_advx_image_eval(sess, x, y, adv_x, pred_adv_x, x_set, y_set, args=eval_params) setattr(report, report_key, success_rate) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' if report_text: print('adversarial attack successful rate on %s: %0.4f' % (report_text, success_rate)) return adv_images_total, adv_pred_total, gt_label_total, success_rate # shape = (total, H,W,C) def do_eval(preds, x_set, y_set, report_key, is_adv=None): acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params) setattr(report, report_key, acc) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' if report_text: print('Test accuracy on %s examples: %0.4f' % (report_text, acc)) if clean_train: model = ModelAllConvolutional('model1', nb_classes, nb_filters, input_shape=[32, 32, 3]) preds = model.get_logits(x) # tf.tensor def evaluate(): do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False) resume_files = os.listdir(args.resume) loss = CrossEntropy(model, smoothing=label_smoothing) if len(resume_files) == 0: saver = tf.train.Saver() train(sess, loss, None, None, dataset_train=dataset_train, dataset_size=dataset_size, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params()) # è®ç»ƒnb_epochs个epochs save_path = saver.save(sess, "{}/model".format(args.resume), global_step=nb_epochs) print("Model saved in path: %s" % save_path) else: # resume from old latest_checkpoint = tf.train.latest_checkpoint(args.resume) saver = tf.train.Saver() saver.restore(sess, latest_checkpoint) # Calculate training error if testing: evaluate() # Initialize the Fast Gradient Sign Method (FGSM) attack object and # graph attacker = ATTACKERS[attacker_name](model, sess=sess) param_dict = ATTACK_PARAM[attacker_name] print("begin generate adversarial examples of CIFAR-10 using attacker: {}".format(attacker_name)) adv_x = attacker.generate(x, **param_dict) # tensor preds_adv = model.get_logits(adv_x) # generate adversarial examples adv_images_total, adv_pred_total, gt_label_total, success_rate = do_generate_eval(adv_x, preds_adv, x_train, y_train, "clean_train_adv_eval", True) print("attacker: {} attack successful rate for CIFAR-10 train dataset is {}".format(attacker_name, success_rate)) adv_images_total, adv_pred_total, gt_label_total, success_rate = do_generate_eval(adv_x, preds_adv, x_test, y_test, "clean_test_adv_eval", True) print("attacker: {} attack successful rate for CIFAR-10 test dataset is {}".format(attacker_name, success_rate)) return report
def cifar10_tutorial( train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, clean_train=CLEAN_TRAIN, testing=False, backprop_through_attack=BACKPROP_THROUGH_ATTACK, nb_filters=NB_FILTERS, num_threads=None, label_smoothing=0.1, ): """ CIFAR10 cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get CIFAR10 data data = CIFAR10( train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end, ) dataset_size = data.x_train.shape[0] dataset_train = data.to_tensorflow()[0] dataset_train = dataset_train.map( lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4) dataset_train = dataset_train.batch(batch_size) dataset_train = dataset_train.prefetch(16) x_train, y_train = data.get_set("train") x_test, y_test = data.get_set("test") # Use Image Parameters img_rows, img_cols, nchannels = x_test.shape[1:4] nb_classes = y_test.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Train an MNIST model train_params = { "nb_epochs": nb_epochs, "batch_size": batch_size, "learning_rate": learning_rate, } eval_params = {"batch_size": batch_size} fgsm_params = {"eps": 0.3, "clip_min": 0.0, "clip_max": 1.0} rng = np.random.RandomState([2017, 8, 30]) def do_eval(preds, x_set, y_set, report_key, is_adv=None): acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params) setattr(report, report_key, acc) if is_adv is None: report_text = None elif is_adv: report_text = "adversarial" else: report_text = "legitimate" if report_text: print("Test accuracy on %s examples: %0.4f" % (report_text, acc)) if clean_train: model = ModelAllConvolutional("model1", nb_classes, nb_filters, input_shape=[32, 32, 3]) preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=label_smoothing) def evaluate(): do_eval(preds, x_test, y_test, "clean_train_clean_eval", False) train( sess, loss, None, None, dataset_train=dataset_train, dataset_size=dataset_size, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params(), ) # Calculate training error if testing: do_eval(preds, x_train, y_train, "train_clean_train_clean_eval") # Initialize the Fast Gradient Sign Method (FGSM) attack object and # graph fgsm = FastGradientMethod(model, sess=sess) adv_x = fgsm.generate(x, **fgsm_params) preds_adv = model.get_logits(adv_x) # Evaluate the accuracy of the MNIST model on adversarial examples do_eval(preds_adv, x_test, y_test, "clean_train_adv_eval", True) # Calculate training error if testing: do_eval(preds_adv, x_train, y_train, "train_clean_train_adv_eval") print("Repeating the process, using adversarial training") # Create a new model and train it to be robust to FastGradientMethod model2 = ModelAllConvolutional("model2", nb_classes, nb_filters, input_shape=[32, 32, 3]) fgsm2 = FastGradientMethod(model2, sess=sess) def attack(x): return fgsm2.generate(x, **fgsm_params) loss2 = CrossEntropy(model2, smoothing=label_smoothing, attack=attack) preds2 = model2.get_logits(x) adv_x2 = attack(x) if not backprop_through_attack: # For the fgsm attack used in this tutorial, the attack has zero # gradient so enabling this flag does not change the gradient. # For some other attacks, enabling this flag increases the cost of # training, but gives the defender the ability to anticipate how # the atacker will change their strategy in response to updates to # the defender's parameters. adv_x2 = tf.stop_gradient(adv_x2) preds2_adv = model2.get_logits(adv_x2) def evaluate2(): # Accuracy of adversarially trained model on legitimate test inputs do_eval(preds2, x_test, y_test, "adv_train_clean_eval", False) # Accuracy of the adversarially trained model on adversarial examples do_eval(preds2_adv, x_test, y_test, "adv_train_adv_eval", True) # Perform and evaluate adversarial training train( sess, loss2, None, None, dataset_train=dataset_train, dataset_size=dataset_size, evaluate=evaluate2, args=train_params, rng=rng, var_list=model2.get_params(), ) # Calculate training errors if testing: do_eval(preds2, x_train, y_train, "train_adv_train_clean_eval") do_eval(preds2_adv, x_train, y_train, "train_adv_train_adv_eval") return report
'batch_size': FLAGS.batch_size, 'learning_rate': 0.1, 'lr_factor': 0.9, 'lr_patience': 3, 'lr_cooldown': 2, 'best_model_path': os.path.join(model_dir, 'best_model.ckpt') } eval_params = {'batch_size': FLAGS.batch_size} fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} model = DarkonReplica(scope=ARCH_NAME[FLAGS.dataset], nb_classes=feeder.num_classes, n=5, input_shape=[32, 32, 3]) logits = model.get_logits(x) loss = CrossEntropy(model, smoothing=label_smoothing[FLAGS.dataset]) regu_losses = WeightDecay(model) full_loss = WeightedSum(model, [(1.0, loss), (weight_decay, regu_losses)]) def do_eval(preds, x_set, y_set, report_key, is_adv=None): acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params) setattr(report, report_key, acc) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' if report_text: print('Test accuracy on %s examples: %0.4f' % (report_text, acc))
def mnist_tutorial_jsma(train_start=0, train_end=60000, test_start=0, test_end=10000, viz_enabled=VIZ_ENABLED, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, source_samples=SOURCE_SAMPLES, learning_rate=LEARNING_RATE): """ MNIST tutorial for the Jacobian-based saliency map approach (JSMA) :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param viz_enabled: (boolean) activate plots of adversarial examples :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param nb_classes: number of output classes :param source_samples: number of test inputs to attack :param learning_rate: learning rate for training :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Create TF session and set as Keras backend session sess = tf.Session() print("Created TensorFlow session.") set_log_level(logging.DEBUG) # Get MNIST test data mnist = MNIST(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) x_train, y_train = mnist.get_set('train') x_test, y_test = mnist.get_set('test') # Obtain Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) nb_filters = 64 # Define TF model graph model = ModelBasicCNN('model1', nb_classes, nb_filters) preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=0.1) print("Defined TensorFlow model graph.") ########################################################################### # Training the model using TensorFlow ########################################################################### # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } sess.run(tf.global_variables_initializer()) rng = np.random.RandomState([2017, 8, 30]) train(sess, loss, x_train, y_train, args=train_params, rng=rng) # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params) assert x_test.shape[0] == test_end - test_start, x_test.shape print('Test accuracy on legitimate test examples: {0}'.format(accuracy)) report.clean_train_clean_eval = accuracy ########################################################################### # Craft adversarial examples using the Jacobian-based saliency map approach ########################################################################### print('Crafting ' + str(source_samples) + ' * ' + str(nb_classes - 1) + ' adversarial examples') # Keep track of success (adversarial example classified in target) results = np.zeros((nb_classes, source_samples), dtype='i') # Rate of perturbed features for each test set example and target class perturbations = np.zeros((nb_classes, source_samples), dtype='f') # Initialize our array for grid visualization grid_shape = (nb_classes, nb_classes, img_rows, img_cols, nchannels) grid_viz_data = np.zeros(grid_shape, dtype='f') # Instantiate a SaliencyMapMethod attack object jsma = SaliencyMapMethod(model, sess=sess) jsma_params = {'theta': 1., 'gamma': 0.1, 'clip_min': 0., 'clip_max': 1., 'y_target': None} figure = None # Loop over the samples we want to perturb into adversarial examples for sample_ind in xrange(0, source_samples): print('--------------------------------------') print('Attacking input %i/%i' % (sample_ind + 1, source_samples)) sample = x_test[sample_ind:(sample_ind + 1)] # We want to find an adversarial example for each possible target class # (i.e. all classes that differ from the label given in the dataset) current_class = int(np.argmax(y_test[sample_ind])) target_classes = other_classes(nb_classes, current_class) # For the grid visualization, keep original images along the diagonal grid_viz_data[current_class, current_class, :, :, :] = np.reshape( sample, (img_rows, img_cols, nchannels)) # Loop over all target classes for target in target_classes: print('Generating adv. example for target class %i' % target) # This call runs the Jacobian-based saliency map approach one_hot_target = np.zeros((1, nb_classes), dtype=np.float32) one_hot_target[0, target] = 1 jsma_params['y_target'] = one_hot_target adv_x = jsma.generate_np(sample, **jsma_params) # Check if success was achieved res = int(model_argmax(sess, x, preds, adv_x) == target) # Computer number of modified features adv_x_reshape = adv_x.reshape(-1) test_in_reshape = x_test[sample_ind].reshape(-1) nb_changed = np.where(adv_x_reshape != test_in_reshape)[0].shape[0] percent_perturb = float(nb_changed) / adv_x.reshape(-1).shape[0] # Display the original and adversarial images side-by-side if viz_enabled: figure = pair_visual( np.reshape(sample, (img_rows, img_cols, nchannels)), np.reshape(adv_x, (img_rows, img_cols, nchannels)), figure) # Add our adversarial example to our grid data grid_viz_data[target, current_class, :, :, :] = np.reshape( adv_x, (img_rows, img_cols, nchannels)) # Update the arrays for later analysis results[target, sample_ind] = res perturbations[target, sample_ind] = percent_perturb print('--------------------------------------') # Compute the number of adversarial examples that were successfully found nb_targets_tried = ((nb_classes - 1) * source_samples) succ_rate = float(np.sum(results)) / nb_targets_tried print('Avg. rate of successful adv. examples {0:.4f}'.format(succ_rate)) report.clean_train_adv_eval = 1. - succ_rate # Compute the average distortion introduced by the algorithm percent_perturbed = np.mean(perturbations) print('Avg. rate of perturbed features {0:.4f}'.format(percent_perturbed)) # Compute the average distortion introduced for successful samples only percent_perturb_succ = np.mean(perturbations * (results == 1)) print('Avg. rate of perturbed features for successful ' 'adversarial examples {0:.4f}'.format(percent_perturb_succ)) # Close TF session sess.close() # Finally, block & display a grid of all the adversarial examples if viz_enabled: import matplotlib.pyplot as plt plt.close(figure) _ = grid_visual(grid_viz_data) return report
def cifar10_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, clean_train=CLEAN_TRAIN, testing=False, backprop_through_attack=BACKPROP_THROUGH_ATTACK, nb_filters=NB_FILTERS, num_threads=None, label_smoothing=0.1, adversarial_training=ADVERSARIAL_TRAINING): """ CIFAR10 cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param label_smoothing: float, amount of label smoothing for cross entropy :param adversarial_training: True means using adversarial training :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: # put data on cpu and gpu both config_args = dict(allow_soft_placement=True) sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get CIFAR10 data data = CIFAR10(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) dataset_size = data.x_train.shape[0] dataset_train = data.to_tensorflow()[0] dataset_train = dataset_train.map( lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4) dataset_train = dataset_train.batch(batch_size) dataset_train = dataset_train.prefetch(16) x_train, y_train = data.get_set('train') x_test, y_test = data.get_set('test') # Use Image Parameters img_rows, img_cols, nchannels = x_test.shape[1:4] nb_classes = y_test.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } eval_params = {'batch_size': batch_size} bim_params = { 'eps': 0.5, 'clip_min': 0., 'eps_iter': 0.002, 'nb_iter': 10, 'clip_max': 1., 'ord': np.inf } rng = np.random.RandomState([2017, 8, 30]) def do_eval(preds, x_set, y_set, report_key, is_adv=None): acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params) setattr(report, report_key, acc) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' if report_text: print('Test accuracy on %s examples: %0.4f' % (report_text, acc)) if clean_train: model = ModelAllConvolutional('model1', nb_classes, nb_filters, input_shape=[32, 32, 3]) preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=label_smoothing) def evaluate(): do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False) """ when training, evaluating can be happened """ train(sess, loss, None, None, dataset_train=dataset_train, dataset_size=dataset_size, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params()) # save model # Calculate training error if testing: do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval') # Initialize the Basic Iterative Method (BIM) attack object and # graph for i in range(20): bim = BasicIterativeMethod(model, sess=sess) adv_x = bim.generate(x, **bim_params) preds_adv = model.get_logits(adv_x) # Evaluate the accuracy of the MNIST model on adversarial examples print("eps:%0.2f" % (bim_params["eps_iter"] * bim_params['nb_iter'])) do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True) bim_params["eps_iter"] = bim_params["eps_iter"] + 0.002 # Calculate training error if testing: do_eval(preds_adv, x_train, y_train, 'train_clean_train_adv_eval') if not adversarial_training: return report print('Repeating the process, using adversarial training') # Create a new model and train it to be robust to BasicIterativeMethod model2 = ModelAllConvolutional('model2', nb_classes, nb_filters, input_shape=[32, 32, 3]) bim2 = BasicIterativeMethod(model2, sess=sess) def attack(x): return bim2.generate(x, **bim_params) # add attack to loss loss2 = CrossEntropy(model2, smoothing=label_smoothing, attack=attack) preds2 = model2.get_logits(x) adv_x2 = attack(x) if not backprop_through_attack: # For the fgsm attack used in this tutorial, the attack has zero # gradient so enabling this flag does not change the gradient. # For some other attacks, enabling this flag increases the cost of # training, but gives the defender the ability to anticipate how # the attacker will change their strategy in response to updates to # the defender's parameters. adv_x2 = tf.stop_gradient(adv_x2) preds2_adv = model2.get_logits(adv_x2) def evaluate2(): # Accuracy of adversarially trained model on legitimate test inputs do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False) # Accuracy of the adversarially trained model on adversarial examples do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True) # Perform and evaluate adversarial training train(sess, loss2, None, None, dataset_train=dataset_train, dataset_size=dataset_size, evaluate=evaluate2, args=train_params, rng=rng, var_list=model2.get_params()) # Calculate training errors if testing: do_eval(preds2, x_train, y_train, 'train_adv_train_clean_eval') do_eval(preds2_adv, x_train, y_train, 'train_adv_train_adv_eval') return report
def mnist_tutorial( train_start=0, train_end=60, #60000 test_start=0, test_end=10, #10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, clean_train=CLEAN_TRAIN, testing=False, backprop_through_attack=BACKPROP_THROUGH_ATTACK, nb_filters=NB_FILTERS, num_threads=None, label_smoothing=0.1): # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get MNIST data data = Dataset(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) x_train, y_train = data.get_set('train') x_test, y_test = data.get_set('test') # Use Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } eval_params = {'batch_size': batch_size} rng = np.random.RandomState([2017, 8, 30]) def do_eval(preds, x_set, y_set, report_key, is_adv=None): acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params) setattr(report, report_key, acc) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' if report_text: print('Test accuracy on %s examples: %0.4f' % (report_text, acc)) if clean_train: #TODO: change model inputs #model = Architecture('model1', nb_classes, nb_filters) hparams = {} hparams['dataset'] = Dataset hparams['secret_seed'] = SECRET_SEED model = Architecture('model1', nb_classes, hparams) preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=label_smoothing) def evaluate(): do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False) train(sess, loss, x_train, y_train, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params()) # Calculate training error if testing: do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval') # Initialize the Fast Gradient Sign Method (FGSM) attack object and # graph #TODO: initialize Projected Gradient Descent Attack basic_attack = AttackModel(model, sess=sess) adv_x = basic_attack.generate(x, **attack_params) preds_adv = model.get_logits(adv_x) # Evaluate the accuracy of the MNIST model on adversarial examples do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True) # Calculate training error if testing: do_eval(preds_adv, x_train, y_train, 'train_clean_train_adv_eval') print('Repeating the process, using adversarial training') # Create a new model and train it to be robust to FastGradientMethod #TODO: change model inputs #model2 = Architecture('model2', nb_classes, nb_filters) hparams = {} hparams['dataset'] = Dataset hparams['secret_seed'] = SECRET_SEED model2 = Architecture('model2', nb_classes, hparams) learned_attack = AttackModel(model2, sess=sess) def attack(x): return learned_attack.generate(x, **attack_params) loss2 = CrossEntropy(model2, smoothing=label_smoothing, attack=attack) preds2 = model2.get_logits(x) adv_x2 = attack(x) if not backprop_through_attack: # For the fgsm attack used in this tutorial, the attack has zero # gradient so enabling this flag does not change the gradient. # For some other attacks, enabling this flag increases the cost of # training, but gives the defender the ability to anticipate how # the atacker will change their strategy in response to updates to # the defender's parameters. adv_x2 = tf.stop_gradient(adv_x2) preds2_adv = model2.get_logits(adv_x2) def evaluate2(): # Accuracy of adversarially trained model on legitimate test inputs do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False) # Accuracy of the adversarially trained model on adversarial examples do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True) # Perform and evaluate adversarial training train(sess, loss2, x_train, y_train, evaluate=evaluate2, args=train_params, rng=rng, var_list=model2.get_params()) # Calculate training errors if testing: do_eval(preds2, x_train, y_train, 'train_adv_train_clean_eval') do_eval(preds2_adv, x_train, y_train, 'train_adv_train_adv_eval') return report
# os.mkdir(train_dir) ckpt = None #tf.train.get_checkpoint_state(train_dir) print(train_dir, ckpt) ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path wrap = KerasModelWrapper(model) if load_model and ckpt_path: saver = tf.train.Saver() print(ckpt_path) saver.restore(sess, ckpt_path) print("Model loaded from: {}".format(ckpt_path)) evaluate() else: print("Model was not loaded, training from scratch.") loss = CrossEntropy(wrap, smoothing=label_smoothing) train(sess, loss, X_train, y_train, evaluate=evaluate, args=train_params, rng=rng) ''' # Define TF model graph model = cnn_model(img_rows=img_rows, img_cols=img_cols, channels=nchannels, nb_filters=64, nb_classes=nb_classes) preds = model(x) print("Defined TensorFlow model graph.")
def mnist_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, clean_train=CLEAN_TRAIN, testing=False, backprop_through_attack=BACKPROP_THROUGH_ATTACK, nb_filters=NB_FILTERS, num_threads=None, label_smoothing=0.1): """ MNIST cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {'allow_soft_placement': True} sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get MNIST data mnist = MNIST(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) x_train, y_train = mnist.get_set('train') x_test, y_test = mnist.get_set('test') # Use Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } eval_params = {'batch_size': batch_size} fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} rng = np.random.RandomState([2017, 8, 30]) def do_eval(preds, x_set, y_set, report_key, is_adv=None): acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params) setattr(report, report_key, acc) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' if report_text: print('Test accuracy on %s examples: %0.4f' % (report_text, acc)) if clean_train: model = ModelBasicCNN('model1', nb_classes, nb_filters) preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=label_smoothing) def evaluate(): do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False) train(sess, loss, x_train, y_train, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params()) # Calculate training error if testing: do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval') # Initialize the Fast Gradient Sign Method (FGSM) attack object and # graph fgsm = FastGradientMethod(model, sess=sess) adv_fgsm_x = fgsm.generate(x, **fgsm_params) preds_adv_fgsm = model.get_logits(adv_fgsm_x) # Generate fgsm adversarial examples and save to disk dir = 'images/fgsm_adv/' if not os.path.exists('images'): os.mkdir('images') if not os.path.exists(dir): os.mkdir(dir) if not os.path.exists(dir + 'train/'): os.mkdir(dir + 'train/') if not os.path.exists(dir + 'test/'): os.mkdir(dir + 'test/') for index in range(len(y_test)): print('test ' + str(index)) x_ = x_test[index] label = np.argmax(y_test[index]) raw_data = (fgsm.generate_np(x_.reshape( (1, 28, 28, 1)), **fgsm_params).reshape( (28, 28)) * 255).astype('uint8') im = Image.fromarray(raw_data, mode='P') im.save(dir + 'test/' + str(label) + '_' + str(uuid.uuid4()) + '.png') for index in range(len(y_train)): print('train ' + str(index)) x_ = x_train[index] label = np.argmax(y_train[index]) raw_data = (fgsm.generate_np(x_.reshape( (1, 28, 28, 1)), **fgsm_params).reshape( (28, 28)) * 255).astype('uint8') im = Image.fromarray(raw_data, mode='P') im.save(dir + 'train/' + str(label) + '_' + str(uuid.uuid4()) + '.png') return report
def mnist_tutorial_fgsm(train_start=0, train_end=60000, test_start=0, test_end=10000, viz_enabled=VIZ_ENABLED, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, source_samples=SOURCE_SAMPLES, learning_rate=LEARNING_RATE, attack_iterations=ATTACK_ITERATIONS, model_path=MODEL_PATH, targeted=TARGETED, noise_output=NOISE_OUTPUT): """ MNIST tutorial for Fast Gradient Method's attack :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param viz_enabled: (boolean) activate plots of adversarial examples :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param nb_classes: number of output classes :param source_samples: number of test inputs to attack :param learning_rate: learning rate for training :param model_path: path to the model file :param targeted: should we run a targeted attack? or untargeted? :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Create TF session sess = tf.Session() print("Created TensorFlow session.") set_log_level(logging.DEBUG) # Get MNIST test data mnist = MNIST(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) x_train, y_train = mnist.get_set('train') x_test, y_test = mnist.get_set('test') # Obtain Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) nb_filters = 64 # Define TF model graph model = ModelBasicCNN('model1', nb_classes, nb_filters) preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=0.1) print("Defined TensorFlow model graph.") ########################################################################### # Training the model using TensorFlow ########################################################################### # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'filename': os.path.split(model_path)[-1] } rng = np.random.RandomState([2017, 8, 30]) # check if we've trained before, and if we have, use that pre-trained model if os.path.exists(model_path + ".meta"): tf_model_load(sess, model_path) else: train(sess, loss, x_train, y_train, args=train_params, rng=rng) saver = tf.train.Saver() saver.save(sess, model_path) # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params) assert x_test.shape[0] == test_end - test_start, x_test.shape print('Test accuracy on legitimate test examples: {0}'.format(accuracy)) report.clean_train_clean_eval = accuracy ########################################################################### # Craft adversarial examples using Carlini and Wagner's approach ########################################################################### nb_adv_per_sample = str(nb_classes - 1) if targeted else '1' print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample + ' adversarial examples') print("This could take some time ...") # Instantiate a FGSM attack object fgsm = FastGradientMethod(model, sess=sess) if viz_enabled: assert source_samples == nb_classes idxs = [np.where(np.argmax(y_test, axis=1) == i)[0][0] for i in range(nb_classes)] if targeted: if viz_enabled: # Initialize our array for grid visualization grid_shape = (nb_classes, 1, img_rows, img_cols, nchannels) grid_viz_data = np.zeros(grid_shape, dtype='f') adv_inputs = np.array( [[instance] * nb_classes for instance in x_test[idxs]], dtype=np.float32) else: adv_inputs = np.array( [[instance] * nb_classes for instance in x_test[:source_samples]], dtype=np.float32) one_hot = np.zeros((nb_classes, nb_classes)) one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1 adv_inputs = adv_inputs.reshape( (source_samples * nb_classes, img_rows, img_cols, nchannels)) adv_ys = np.array([one_hot] * source_samples, dtype=np.float32).reshape((source_samples * nb_classes, nb_classes)) yname = "y_target" else: if viz_enabled: # Initialize our array for grid visualization grid_shape = (nb_classes, nb_classes, img_rows, img_cols, nchannels) grid_viz_data = np.zeros(grid_shape, dtype='f') adv_inputs = x_test[idxs] else: adv_inputs = x_test[:source_samples] adv_ys = None yname = "y" if targeted: fgsm_params_batch_size = source_samples * nb_classes else: fgsm_params_batch_size = source_samples fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} adv = fgsm.generate_np(adv_inputs, **fgsm_params) eval_params = {'batch_size': np.minimum(nb_classes, source_samples)} if targeted: adv_accuracy = model_eval( sess, x, y, preds, adv, adv_ys, args=eval_params) else: if viz_enabled: err = model_eval(sess, x, y, preds, adv, y_test[idxs], args=eval_params) adv_accuracy = 1 - err else: err = model_eval(sess, x, y, preds, adv, y_test[:source_samples], args=eval_params) adv_accuracy = 1 - err if viz_enabled: for i in range(nb_classes): if noise_output: image = adv[i * nb_classes] - adv_inputs[i * nb_classes] else: image = adv[i * nb_classes] grid_viz_data[i, 0] = image print('--------------------------------------') # Compute the number of adversarial examples that were successfully found print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy)) report.clean_train_adv_eval = 1. - adv_accuracy # Compute the average distortion introduced by the algorithm percent_perturbed = np.mean(np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5) print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed)) ########################################################################### # Adversarial Training ########################################################################### model2 = ModelBasicCNN('model2', nb_classes, nb_filters) fgsm2 = FastGradientMethod(model2, sess=sess) def attack_fgsm(x): return fgsm2.generate(adv_inputs, **fgsm_params) preds2 = model2.get_logits(x) loss2 = CrossEntropy(model2, smoothing=0.1, attack=attack_fgsm) train(sess, loss2, x_train, y_train, args=train_params, rng=rng) eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds2, x_test, y_test, args=eval_params) assert x_test.shape[0] == test_end - test_start, x_test.shape print('Test accuracy on adversarial fgsm test examples: {0}'.format(accuracy)) report.clean_train_clean_eval = accuracy print("Defined TensorFlow model graph.") eval_params = {'batch_size': np.minimum(nb_classes, source_samples)} if targeted: adv_accuracy = model_eval( sess, x, y, preds, adv, adv_ys, args=eval_params) else: if viz_enabled: err = model_eval(sess, x, y, preds, adv, y_test[idxs], args=eval_params) adv_accuracy = 1 - err else: err = model_eval(sess, x, y, preds, adv, y_test[:source_samples], args=eval_params) adv_accuracy = 1 - err if viz_enabled: for i in range(nb_classes): if noise_output: image = adv[i * nb_classes] - adv_inputs[i * nb_classes] else: image = adv[i * nb_classes] grid_viz_data[i, 0] = image print('--------------------------------------') # Compute the number of adversarial examples that were successfully found print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy)) report.clean_train_adv_eval = 1. - adv_accuracy # Compute the average distortion introduced by the algorithm percent_perturbed = np.mean(np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5) print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed)) # Close TF session sess.close() def save_visual(data, path): """ Modified version of cleverhans.plot.pyplot """ figure = plt.figure() # figure.canvas.set_window_title('Cleverhans: Grid Visualization') # Add the images to the plot num_cols = data.shape[0] num_rows = data.shape[1] num_channels = data.shape[4] for y in range(num_rows): for x in range(num_cols): figure.add_subplot(num_rows, num_cols, (x + 1) + (y * num_cols)) plt.axis('off') if num_channels == 1: plt.imshow(data[x, y, :, :, 0], cmap='gray') else: plt.imshow(data[x, y, :, :, :]) # Draw the plot and return plt.savefig(path) return figure # Finally, block & display a grid of all the adversarial examples if viz_enabled: # _ = grid_visual(grid_viz_data) # cleverhans_image.save("output", grid_viz_data) if noise_output: image_name = "output/fgsm_mnist_noise.png" else: image_name = "output/fgsm_mnist.png" _ = save_visual(grid_viz_data, image_name) return report
def mnist_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, train_dir=TRAIN_DIR, filename=FILENAME, load_model=LOAD_MODEL, testing=False, label_smoothing=0.1): """ MNIST CleverHans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param train_dir: Directory storing the saved model :param filename: Filename to save model under :param load_model: True for load, False for not load :param testing: if true, test error is calculated :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ keras.layers.core.K.set_learning_phase(0) # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) if not hasattr(backend, "tf"): raise RuntimeError("This tutorial requires keras to be configured" " to use the TensorFlow backend.") if keras.backend.image_dim_ordering() != 'tf': keras.backend.set_image_dim_ordering('tf') print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to " "'th', temporarily setting to 'tf'") # Create TF session and set as Keras backend session os.environ["CUDA_VISIBLE_DEVICES"] = '0' # only use No.0 GPU config = tf.ConfigProto() config.allow_soft_placement = True config.gpu_options.allow_growth = True sess = tf.Session(config=config) keras.backend.set_session(sess) # Get MNIST test data mnist = MNIST(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) x_train, y_train = mnist.get_set('train') x_test, y_test = mnist.get_set('test') # Obtain Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Define TF model graph model = cnn_model(img_rows=img_rows, img_cols=img_cols, channels=nchannels, nb_filters=64, nb_classes=nb_classes) preds = model(x) print("Defined TensorFlow model graph.") def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params) report.clean_train_clean_eval = acc # assert X_test.shape[0] == test_end - test_start, X_test.shape print('Test accuracy on legitimate examples: %0.4f' % acc) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': train_dir, 'filename': filename } rng = np.random.RandomState([2017, 8, 30]) if not os.path.exists(train_dir): os.mkdir(train_dir) ckpt = tf.train.get_checkpoint_state(train_dir) print(train_dir, ckpt) ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path wrap = KerasModelWrapper(model) if load_model and ckpt_path: saver = tf.train.Saver() print(ckpt_path) saver.restore(sess, ckpt_path) print("Model loaded from: {}".format(ckpt_path)) evaluate() else: print("Model was not loaded, training from scratch.") loss = CrossEntropy(wrap, smoothing=label_smoothing) train(sess, loss, x_train, y_train, evaluate=evaluate, args=train_params, rng=rng) saver = tf.train.Saver(max_to_keep=1) saver.save(sess, '{}/mnist.ckpt'.format(train_dir), global_step=NB_EPOCHS) print("model has been saved") # Calculate training error if testing: eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, x_train, y_train, args=eval_params) report.train_clean_train_clean_eval = acc # Initialize the Basic Iterative Method (BIM) attack object and graph lbfgs = LBFGS(wrap, sess=sess) # targeted attack, targeted class is 1 y_target = np.ones(128) y_target = keras.utils.to_categorical(y_target, num_classes=10) y_target = tf.Variable(y_target) sess.run(tf.global_variables_initializer()) lbfgs_params = {'y_target': y_target, 'batch_size': 128} adv_x = lbfgs.generate(x, **lbfgs_params) # Consider the attack to be constant adv_x = tf.stop_gradient(adv_x) preds_adv = model(adv_x) # Evaluate the accuracy of the MNIST model on adversarial examples eval_par = {'batch_size': batch_size} start_time = time.time() acc = model_eval(sess, x, y, preds_adv, x_test, y_test, args=eval_par) print('Test accuracy on adversarial examples: %0.4f\n' % acc) end_time = time.time() print("L-BFGS attack time is {}".format(end_time - start_time)) report.clean_train_adv_eval = acc # Calculating train error if testing: eval_par = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_adv, x_train, y_train, args=eval_par) report.train_clean_train_adv_eval = acc gc.collect() return report
def mnist_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, train_dir=TRAIN_DIR, filename=FILENAME, load_model=LOAD_MODEL, testing=False, label_smoothing=0.1, adversarial_training = ADVERSARIAL_TRAINING, attacking = ATTACKING,origin_method=ORIGIN_METHOD, save_model=SAVE_MODEL,model_type=MODEL_TYPE): """ MNIST CleverHans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param train_dir: Directory storing the saved model :param filename: Filename to save model under :param load_model: True for load, False for not load :param testing: if true, test error is calculated :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ keras.layers.core.K.set_learning_phase(0) # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) if not hasattr(backend, "tf"): raise RuntimeError("This tutorial requires keras to be configured" " to use the TensorFlow backend.") if keras.backend.image_dim_ordering() != 'tf': keras.backend.set_image_dim_ordering('tf') print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to " "'th', temporarily setting to 'tf'") # Create TF session and set as Keras backend session os.environ["CUDA_VISIBLE_DEVICES"] = '0' # only use No.0 GPU config = tf.ConfigProto() config.allow_soft_placement=True config.gpu_options.allow_growth = True sess = tf.Session(config=config) keras.backend.set_session(sess) # Get MNIST test data mnist = MNIST(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) x_train, y_train = mnist.get_set('train') x_test, y_test = mnist.get_set('test') # Obtain Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Define TF model graph the_model = modelA if model_type == 'a': the_model = modelA elif model_type == 'b': the_model = modelB elif model_type == 'c': the_model = modelC else: exit('the model type must be a or b or c.') model = the_model(img_rows=img_rows, img_cols=img_cols, channels=nchannels, nb_filters=64, nb_classes=nb_classes) wrap = KerasModelWrapper(model) preds = model(x) # Initialize the Fast Gradient Sign Method (FGSM) attack object and graph if origin_method == 'fgsm': att_method = FastGradientMethod(wrap, sess=sess) att_method_params = {'eps': 0.2, 'clip_min': 0., 'clip_max': 1.} elif origin_method == 'bim': att_method = BasicIterativeMethod(wrap, sess=sess) att_method_params = {'eps': 0.2, 'eps_iter': 0.06, 'nb_iter': 10, 'clip_min': 0., 'clip_max': 1.} elif origin_method == 'mifgsm': att_method = MomentumIterativeMethod(wrap, sess=sess) att_method_params = {'eps': 0.2, 'eps_iter': 0.08, 'nb_iter': 10, 'decay_factor': 0.4, 'clip_min': 0., 'clip_max': 1.} else: exit("the attack method must be fgsm,bim,mifgsm") # Evaluate the accuracy of the MNIST model on adversarial examples print(att_method_params) adv_x = att_method.generate(x, **att_method_params) # Consider the attack to be constant adv_x = tf.stop_gradient(adv_x) preds_adv = model(adv_x) def attack(x): return att_method.generate(x, **att_method_params) def evaluate2(): # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params) report.clean_train_clean_eval = acc print('AT Test accuracy on legitimate examples: %0.4f' % acc) # Accuracy of the adversarially trained model on adversarial examples accuracy = model_eval(sess, x, y, preds_adv, x_test, y_test, args=eval_params) print('AT Test accuracy on adversarial examples: %0.4f' % accuracy) report.adv_train_adv_eval = accuracy # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': train_dir, 'filename': filename } rng = np.random.RandomState([2017, 8, 30]) train_dir = train_dir + '/' + model_type + '/' + origin_method if not os.path.exists(train_dir): os.makedirs(train_dir) ckpt = tf.train.get_checkpoint_state(train_dir) print(train_dir, ckpt) ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path if load_model and ckpt_path: saver = tf.train.Saver() print(ckpt_path) saver.restore(sess, ckpt_path) print("Model loaded from: {}".format(ckpt_path)) evaluate2() else: print("Model was not loaded, training from scratch.") loss2 = CrossEntropy(wrap, smoothing=label_smoothing,attack=attack) train(sess, loss2, x_train, y_train, evaluate=evaluate2, args=train_params, rng=rng) if save_model: saver = tf.train.Saver(max_to_keep=1) saver.save(sess, '{}/{}.ckpt'.format(train_dir,origin_method), global_step=NB_EPOCHS) keras.models.save_model(model, '{}/{}_mnist.h5'.format(train_dir,origin_method)) print("model has been saved") # >>> other method >>> if adversarial_training: method = ['fgsm','bim','mifgsm'] for i in range(3): attacking = method[i] if attacking == 'fgsm': att_method = FastGradientMethod(wrap, sess=sess) att_method_params = {'eps': 0.2, 'clip_min': 0., 'clip_max': 1.} elif attacking == 'bim': att_method = BasicIterativeMethod(wrap,sess=sess) att_method_params = {'eps': 0.2, 'eps_iter':0.06, 'nb_iter':10, 'clip_min': 0., 'clip_max': 1.} elif attacking == 'mifgsm': att_method = MomentumIterativeMethod(wrap,sess=sess) att_method_params = {'eps': 0.2, 'eps_iter':0.08, 'nb_iter':10, 'decay_factor':0.4, 'clip_min': 0., 'clip_max': 1.} else: exit("the attack method must be fgsm,bim,mifgsm") # Evaluate the accuracy of the MNIST model on adversarial examples print(att_method_params) adv_x = att_method.generate(x, **att_method_params) # Consider the attack to be constant adv_x = tf.stop_gradient(adv_x) preds_adv = model(adv_x) eval_par = {'batch_size': batch_size} start_time = time.time() acc = model_eval(sess, x, y, preds_adv, x_test, y_test, args=eval_par) print('Test accuracy on adversarial examples: %0.4f' % acc) end_time = time.time() print("{} attack time is {}\n".format(attacking,end_time-start_time)) report.clean_train_adv_eval = acc gc.collect()
def mnist_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, clean_train=CLEAN_TRAIN, testing=False, backprop_through_attack=BACKPROP_THROUGH_ATTACK, nb_filters=NB_FILTERS, num_threads=None, label_smoothing=0.1): """ MNIST cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get MNIST test data mnist = MNIST(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) x_train, y_train = mnist.get_set('train') x_test, y_test = mnist.get_set('test') # Use Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } eval_params = {'batch_size': batch_size} fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} rng = np.random.RandomState([2017, 8, 30]) def do_eval(preds, x_set, y_set, report_key, is_adv=None): acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params) setattr(report, report_key, acc) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' if report_text: print('Test accuracy on %s examples: %0.4f' % (report_text, acc)) if clean_train: model = make_basic_picklable_cnn() # Tag the model so that when it is saved to disk, future scripts will # be able to tell what data it was trained on model.dataset_factory = mnist.get_factory() preds = model.get_logits(x) assert len(model.get_params()) > 0 loss = CrossEntropy(model, smoothing=label_smoothing) def evaluate(): do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False) train(sess, loss, x_train, y_train, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params()) with sess.as_default(): save("clean_model.joblib", model) print("Now that the model has been saved, you can evaluate it in a" " separate process using `evaluate_pickled_model.py`. " "You should get exactly the same result for both clean and " "adversarial accuracy as you get within this program.") # Calculate training error if testing: do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval') # Initialize the Fast Gradient Sign Method (FGSM) attack object and # graph fgsm = FastGradientMethod(model, sess=sess) adv_x = fgsm.generate(x, **fgsm_params) preds_adv = model.get_logits(adv_x) # Evaluate the accuracy of the MNIST model on adversarial examples do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True) # Calculate training error if testing: do_eval(preds_adv, x_train, y_train, 'train_clean_train_adv_eval') print('Repeating the process, using adversarial training') # Create a new model and train it to be robust to FastGradientMethod model2 = make_basic_picklable_cnn() # Tag the model so that when it is saved to disk, future scripts will # be able to tell what data it was trained on model2.dataset_factory = mnist.get_factory() fgsm2 = FastGradientMethod(model2, sess=sess) def attack(x): return fgsm2.generate(x, **fgsm_params) loss2 = CrossEntropy(model2, smoothing=label_smoothing, attack=attack) preds2 = model2.get_logits(x) adv_x2 = attack(x) if not backprop_through_attack: # For the fgsm attack used in this tutorial, the attack has zero # gradient so enabling this flag does not change the gradient. # For some other attacks, enabling this flag increases the cost of # training, but gives the defender the ability to anticipate how # the atacker will change their strategy in response to updates to # the defender's parameters. adv_x2 = tf.stop_gradient(adv_x2) preds2_adv = model2.get_logits(adv_x2) def evaluate2(): # Accuracy of adversarially trained model on legitimate test inputs do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False) # Accuracy of the adversarially trained model on adversarial examples do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True) # Perform and evaluate adversarial training train(sess, loss2, x_train, y_train, evaluate=evaluate2, args=train_params, rng=rng, var_list=model2.get_params()) with sess.as_default(): save("adv_model.joblib", model2) print( "Now that the model has been saved, you can evaluate it in a " "separate process using " "`python evaluate_pickled_model.py adv_model.joblib`. " "You should get exactly the same result for both clean and " "adversarial accuracy as you get within this program." " You can also move beyond the tutorials directory and run the " " real `compute_accuracy.py` script (make sure cleverhans/scripts " "is in your PATH) to see that this FGSM-trained " "model is actually not very robust---it's just a model that trains " " quickly so the tutorial does not take a long time") # Calculate training errors if testing: do_eval(preds2, x_train, y_train, 'train_adv_train_clean_eval') do_eval(preds2_adv, x_train, y_train, 'train_adv_train_adv_eval') return report
train_end=train_end, test_start=test_start, test_end=test_end) x_train, y_train = mnist.get_set('train') x_test, y_test = mnist.get_set('test') img_rows, img_cols, nchannels = x_train.shape[1:4] x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) y = tf.placeholder(tf.float32, shape=(None, 10)) nb_classes = 10 #? Y_train.shape[1] nb_filters = 64 #? model = ResNet(scope="model1", nb_classes=nb_classes, nb_filters=nb_filters) preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=0.1) print("Defined TensorFlow model graph.") loss = CrossEntropy(model, smoothing=0.1) print("Defined TensorFlow model graph.") ########################################################################### # Training the model using TensorFlow ########################################################################### #training ResNetmodel # from cleverhans.compat import flags # FLAGS = flags.FLAGS VIZ_ENABLED = True