def eval_multi(self, inc_epoch=True): """ Run the evaluation on multiple attacks. """ sess = self.sess preds = self.preds x = self.x_pre y = self.y X_train = self.X_train Y_train = self.Y_train X_test = self.X_test Y_test = self.Y_test writer = self.writer self.summary = tf.Summary() report = {} # Evaluate on train set subsample_factor = 100 X_train_subsampled = X_train[::subsample_factor] Y_train_subsampled = Y_train[::subsample_factor] acc_train = model_eval(sess, x, y, preds, X_train_subsampled, Y_train_subsampled, args=self.eval_params) self.log_value('train_accuracy_subsampled', acc_train, 'Clean accuracy, subsampled train') report['train'] = acc_train # Evaluate on the test set acc = model_eval(sess, x, y, preds, X_test, Y_test, args=self.eval_params) self.log_value('test_accuracy_natural', acc, 'Clean accuracy, natural test') report['test'] = acc # Evaluate against adversarial attacks if self.epoch % self.hparams.eval_iters == 0: for att_type in self.attack_type_test: adv_x, preds_adv = self.attacks[att_type] acc = self.eval_advs(x, y, preds_adv, X_test, Y_test, att_type) report[att_type] = acc if self.writer: writer.add_summary(self.summary, self.epoch) # Add examples of adversarial examples to the summary if self.writer and self.epoch % 20 == 0 and self.sum_op is not None: sm_val = self.sess.run(self.sum_op, feed_dict={x: X_test[:self.batch_size], y: Y_test[:self.batch_size]}) if self.writer: writer.add_summary(sm_val) self.epoch += 1 if inc_epoch else 0 return report
def prep_bbox(sess, x, y, x_train, y_train, x_test, y_test, nb_epochs, batch_size, learning_rate, rng, nb_classes=10, img_rows=28, img_cols=28, nchannels=1): """ Define and train a model that simulates the "remote" black-box oracle described in the original paper. :param sess: the TF session :param x: the input placeholder for MNIST :param y: the ouput placeholder for MNIST :param x_train: the training data for the oracle :param y_train: the training labels for the oracle :param x_test: the testing data for the oracle :param y_test: the testing labels for the oracle :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param rng: numpy.random.RandomState :return: """ # Define TF model graph (for the black-box model) nb_filters = 64 model = ModelBasicCNN('model1', nb_classes, nb_filters) loss = CrossEntropy(model, smoothing=0.1) predictions = model.get_logits(x) print("Defined TensorFlow model graph.") # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } train(sess, loss, x, y, x_train, y_train, args=train_params, rng=rng) # Print out the accuracy on legitimate data eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, predictions, x_test, y_test, args=eval_params) print('Test accuracy of black-box on legitimate test ' 'examples: ' + str(accuracy)) return model, predictions, accuracy
def evaluate(): # Evaluate the accuracy of the model on legitimate test examples eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, curr_X, curr_Y, args=eval_params) report.clean_train_clean_eval = acc print('Test accuracy on legitimate examples: %0.4f' % acc) return acc
def do_eval(preds, x_set, y_set, report_key, is_adv=None): acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' if report_text: print('Test accuracy on %s examples: %0.4f' % (report_text, acc))
def eval_advs(self, x, y, preds_adv, X_test, Y_test, att_type): """ Evaluate the accuracy of the model on adversarial examples :param x: symbolic input to model. :param y: symbolic variable for the label. :param preds_adv: symbolic variable for the prediction on an adversarial example. :param X_test: NumPy array of test set inputs. :param Y_test: NumPy array of test set labels. :param att_type: name of the attack. """ end = (len(X_test) // self.batch_size) * self.batch_size if self.hparams.fast_tests: end = 10*self.batch_size acc = model_eval(self.sess, x, y, preds_adv, X_test[:end], Y_test[:end], args=self.eval_params) self.log_value('test_accuracy_%s' % att_type, acc, 'Test accuracy on adversarial examples') return acc
def mnist_blackbox(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_classes=NB_CLASSES, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, nb_epochs=NB_EPOCHS, holdout=HOLDOUT, data_aug=DATA_AUG, nb_epochs_s=NB_EPOCHS_S, lmbda=LMBDA, aug_batch_size=AUG_BATCH_SIZE): """ MNIST tutorial for the black-box attack from arxiv.org/abs/1602.02697 :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :return: a dictionary with: * black-box model accuracy on test set * substitute model accuracy on test set * black-box model accuracy on adversarial examples transferred from the substitute model """ # Set logging level to see debug information set_log_level(logging.DEBUG) # Dictionary used to keep track and return key accuracies accuracies = {} # Perform tutorial setup assert setup_tutorial() # Create TF session sess = tf.Session() # Get MNIST data x_train, y_train, x_test, y_test = data_mnist(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) # Initialize substitute training set reserved for adversary x_sub = x_test[:holdout] y_sub = np.argmax(y_test[:holdout], axis=1) # Redefine test set as remaining samples unavailable to adversaries x_test = x_test[holdout:] y_test = y_test[holdout:] # Obtain Image parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Seed random number generator so tutorial is reproducible rng = np.random.RandomState([2017, 8, 30]) # Simulate the black-box model locally # You could replace this by a remote labeling API for instance print("Preparing the black-box model.") prep_bbox_out = prep_bbox(sess, x, y, x_train, y_train, x_test, y_test, nb_epochs, batch_size, learning_rate, rng, nb_classes, img_rows, img_cols, nchannels) model, bbox_preds, accuracies['bbox'] = prep_bbox_out # Train substitute using method from https://arxiv.org/abs/1602.02697 print("Training the substitute model.") train_sub_out = train_sub(sess, x, y, bbox_preds, x_sub, y_sub, nb_classes, nb_epochs_s, batch_size, learning_rate, data_aug, lmbda, aug_batch_size, rng, img_rows, img_cols, nchannels) model_sub, preds_sub = train_sub_out # Evaluate the substitute model on clean test examples eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_sub, x_test, y_test, args=eval_params) accuracies['sub'] = acc # Initialize the Fast Gradient Sign Method (FGSM) attack object. fgsm_par = {'eps': 0.3, 'ord': np.inf, 'clip_min': 0., 'clip_max': 1.} fgsm = FastGradientMethod(model_sub, sess=sess) # Craft adversarial examples using the substitute eval_params = {'batch_size': batch_size} x_adv_sub = fgsm.generate(x, **fgsm_par) # Evaluate the accuracy of the "black-box" model on adversarial examples accuracy = model_eval(sess, x, y, model.get_logits(x_adv_sub), x_test, y_test, args=eval_params) print('Test accuracy of oracle on adversarial examples generated ' 'using the substitute: ' + str(accuracy)) accuracies['bbox_on_sub_adv_ex'] = accuracy return accuracies
def mnist_tutorial_jsma(train_start=0, train_end=60000, test_start=0, test_end=10000, viz_enabled=VIZ_ENABLED, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, source_samples=SOURCE_SAMPLES, learning_rate=LEARNING_RATE): """ MNIST tutorial for the Jacobian-based saliency map approach (JSMA) :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param viz_enabled: (boolean) activate plots of adversarial examples :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param nb_classes: number of output classes :param source_samples: number of test inputs to attack :param learning_rate: learning rate for training :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Create TF session and set as Keras backend session sess = tf.Session() print("Created TensorFlow session.") set_log_level(logging.DEBUG) # Get MNIST test data x_train, y_train, x_test, y_test = data_mnist(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) # Obtain Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) nb_filters = 64 # Define TF model graph model = ModelBasicCNN('model1', nb_classes, nb_filters) preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=0.1) print("Defined TensorFlow model graph.") ########################################################################### # Training the model using TensorFlow ########################################################################### # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } sess.run(tf.global_variables_initializer()) rng = np.random.RandomState([2017, 8, 30]) train(sess, loss, x, y, x_train, y_train, args=train_params, rng=rng) # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params) assert x_test.shape[0] == test_end - test_start, x_test.shape print('Test accuracy on legitimate test examples: {0}'.format(accuracy)) report.clean_train_clean_eval = accuracy ########################################################################### # Craft adversarial examples using the Jacobian-based saliency map approach ########################################################################### print('Crafting ' + str(source_samples) + ' * ' + str(nb_classes - 1) + ' adversarial examples') # Keep track of success (adversarial example classified in target) results = np.zeros((nb_classes, source_samples), dtype='i') # Rate of perturbed features for each test set example and target class perturbations = np.zeros((nb_classes, source_samples), dtype='f') # Initialize our array for grid visualization grid_shape = (nb_classes, nb_classes, img_rows, img_cols, nchannels) grid_viz_data = np.zeros(grid_shape, dtype='f') # Instantiate a SaliencyMapMethod attack object jsma = SaliencyMapMethod(model, back='tf', sess=sess) jsma_params = { 'theta': 1., 'gamma': 0.1, 'clip_min': 0., 'clip_max': 1., 'y_target': None } figure = None # Loop over the samples we want to perturb into adversarial examples for sample_ind in xrange(0, source_samples): print('--------------------------------------') print('Attacking input %i/%i' % (sample_ind + 1, source_samples)) sample = x_test[sample_ind:(sample_ind + 1)] # We want to find an adversarial example for each possible target class # (i.e. all classes that differ from the label given in the dataset) current_class = int(np.argmax(y_test[sample_ind])) target_classes = other_classes(nb_classes, current_class) # For the grid visualization, keep original images along the diagonal grid_viz_data[current_class, current_class, :, :, :] = np.reshape( sample, (img_rows, img_cols, nchannels)) # Loop over all target classes for target in target_classes: print('Generating adv. example for target class %i' % target) # This call runs the Jacobian-based saliency map approach one_hot_target = np.zeros((1, nb_classes), dtype=np.float32) one_hot_target[0, target] = 1 jsma_params['y_target'] = one_hot_target adv_x = jsma.generate_np(sample, **jsma_params) # Check if success was achieved res = int(model_argmax(sess, x, preds, adv_x) == target) # Computer number of modified features adv_x_reshape = adv_x.reshape(-1) test_in_reshape = x_test[sample_ind].reshape(-1) nb_changed = np.where(adv_x_reshape != test_in_reshape)[0].shape[0] percent_perturb = float(nb_changed) / adv_x.reshape(-1).shape[0] # Display the original and adversarial images side-by-side if viz_enabled: figure = pair_visual( np.reshape(sample, (img_rows, img_cols, nchannels)), np.reshape(adv_x, (img_rows, img_cols, nchannels)), figure) # Add our adversarial example to our grid data grid_viz_data[target, current_class, :, :, :] = np.reshape( adv_x, (img_rows, img_cols, nchannels)) # Update the arrays for later analysis results[target, sample_ind] = res perturbations[target, sample_ind] = percent_perturb print('--------------------------------------') # Compute the number of adversarial examples that were successfully found nb_targets_tried = ((nb_classes - 1) * source_samples) succ_rate = float(np.sum(results)) / nb_targets_tried print('Avg. rate of successful adv. examples {0:.4f}'.format(succ_rate)) report.clean_train_adv_eval = 1. - succ_rate # Compute the average distortion introduced by the algorithm percent_perturbed = np.mean(perturbations) print('Avg. rate of perturbed features {0:.4f}'.format(percent_perturbed)) # Compute the average distortion introduced for successful samples only percent_perturb_succ = np.mean(perturbations * (results == 1)) print('Avg. rate of perturbed features for successful ' 'adversarial examples {0:.4f}'.format(percent_perturb_succ)) # Close TF session sess.close() # Finally, block & display a grid of all the adversarial examples if viz_enabled: import matplotlib.pyplot as plt plt.close(figure) _ = grid_visual(grid_viz_data) return report
def main(argv): model_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) if model_file is None: print('No model found') sys.exit() cifar = cifar10_input.CIFAR10Data(FLAGS.dataset_dir) nb_classes = 10 X_test = cifar.eval_data.xs Y_test = to_categorical(cifar.eval_data.ys, nb_classes) assert Y_test.shape[1] == 10. set_log_level(logging.DEBUG) with tf.Session() as sess: x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3)) y = tf.placeholder(tf.float32, shape=(None, 10)) from madry_cifar10_model import make_madry_wresnet model = make_madry_wresnet() saver = tf.train.Saver() # Restore the checkpoint saver.restore(sess, model_file) nb_samples = FLAGS.nb_samples attack_params = {'batch_size': FLAGS.batch_size, 'clip_min': 0., 'clip_max': 255.} if FLAGS.attack_type == 'cwl2': from cleverhans_copy.attacks import CarliniWagnerL2 attacker = CarliniWagnerL2(model, sess=sess) attack_params.update({'binary_search_steps': 1, 'max_iterations': 100, 'learning_rate': 0.1, 'initial_const': 10, 'batch_size': 10 }) else: # eps and eps_iter in range 0-255 attack_params.update({'eps': 8, 'ord': np.inf}) if FLAGS.attack_type == 'fgsm': from cleverhans_copy.attacks import FastGradientMethod attacker = FastGradientMethod(model, sess=sess) elif FLAGS.attack_type == 'pgd': attack_params.update({'eps_iter': 2, 'nb_iter': 20}) from cleverhans_copy.attacks import MadryEtAl attacker = MadryEtAl(model, sess=sess) eval_par = {'batch_size': FLAGS.batch_size} if FLAGS.sweep: max_eps = 16 epsilons = np.linspace(1, max_eps, max_eps) for e in epsilons: t1 = time.time() attack_params.update({'eps': e}) x_adv = attacker.generate(x, **attack_params) preds_adv = model.get_probs(x_adv) acc = model_eval(sess, x, y, preds_adv, X_test[ :nb_samples], Y_test[:nb_samples], args=eval_par) print('Epsilon %.2f, accuracy on adversarial' % e, 'examples %0.4f\n' % acc) t2 = time.time() else: t1 = time.time() x_adv = attacker.generate(x, **attack_params) preds_adv = model.get_probs(x_adv) acc = model_eval(sess, x, y, preds_adv, X_test[ :nb_samples], Y_test[:nb_samples], args=eval_par) t2 = time.time() print('Test accuracy on adversarial examples %0.4f\n' % acc) print("Took", t2 - t1, "seconds")
def mnist_tutorial_cw(train_start=0, train_end=60000, test_start=0, test_end=10000, viz_enabled=VIZ_ENABLED, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, source_samples=SOURCE_SAMPLES, learning_rate=LEARNING_RATE, attack_iterations=ATTACK_ITERATIONS, model_path=MODEL_PATH, targeted=TARGETED): """ MNIST tutorial for Carlini and Wagner's attack :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param viz_enabled: (boolean) activate plots of adversarial examples :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param nb_classes: number of output classes :param source_samples: number of test inputs to attack :param learning_rate: learning rate for training :param model_path: path to the model file :param targeted: should we run a targeted attack? or untargeted? :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Create TF session sess = tf.Session() print("Created TensorFlow session.") set_log_level(logging.DEBUG) # Get MNIST test data x_train, y_train, x_test, y_test = data_mnist(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) # Obtain Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) nb_filters = 64 # Define TF model graph model = ModelBasicCNN('model1', nb_classes, nb_filters) preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=0.1) print("Defined TensorFlow model graph.") ########################################################################### # Training the model using TensorFlow ########################################################################### # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'filename': os.path.split(model_path)[-1] } rng = np.random.RandomState([2017, 8, 30]) # check if we've trained before, and if we have, use that pre-trained model if os.path.exists(model_path + ".meta"): tf_model_load(sess, model_path) else: train(sess, loss, x, y, x_train, y_train, args=train_params, rng=rng) saver = tf.train.Saver() saver.save(sess, model_path) # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params) assert x_test.shape[0] == test_end - test_start, x_test.shape print('Test accuracy on legitimate test examples: {0}'.format(accuracy)) report.clean_train_clean_eval = accuracy ########################################################################### # Craft adversarial examples using Carlini and Wagner's approach ########################################################################### nb_adv_per_sample = str(nb_classes - 1) if targeted else '1' print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample + ' adversarial examples') print("This could take some time ...") # Instantiate a CW attack object cw = CarliniWagnerL2(model, back='tf', sess=sess) if viz_enabled: assert source_samples == nb_classes idxs = [np.where(np.argmax(y_test, axis=1) == i)[0][0] for i in range(nb_classes)] if targeted: if viz_enabled: # Initialize our array for grid visualization grid_shape = (nb_classes, nb_classes, img_rows, img_cols, nchannels) grid_viz_data = np.zeros(grid_shape, dtype='f') adv_inputs = np.array( [[instance] * nb_classes for instance in x_test[idxs]], dtype=np.float32) else: adv_inputs = np.array( [[instance] * nb_classes for instance in x_test[:source_samples]], dtype=np.float32) one_hot = np.zeros((nb_classes, nb_classes)) one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1 adv_inputs = adv_inputs.reshape( (source_samples * nb_classes, img_rows, img_cols, nchannels)) adv_ys = np.array([one_hot] * source_samples, dtype=np.float32).reshape((source_samples * nb_classes, nb_classes)) yname = "y_target" else: if viz_enabled: # Initialize our array for grid visualization grid_shape = (nb_classes, 2, img_rows, img_cols, nchannels) grid_viz_data = np.zeros(grid_shape, dtype='f') adv_inputs = x_test[idxs] else: adv_inputs = x_test[:source_samples] adv_ys = None yname = "y" if targeted: cw_params_batch_size = source_samples * nb_classes else: cw_params_batch_size = source_samples cw_params = {'binary_search_steps': 1, yname: adv_ys, 'max_iterations': attack_iterations, 'learning_rate': CW_LEARNING_RATE, 'batch_size': cw_params_batch_size, 'initial_const': 10} adv = cw.generate_np(adv_inputs, **cw_params) eval_params = {'batch_size': np.minimum(nb_classes, source_samples)} if targeted: adv_accuracy = model_eval( sess, x, y, preds, adv, adv_ys, args=eval_params) else: if viz_enabled: err = model_eval(sess, x, y, preds, adv, y_test[idxs], args=eval_params) adv_accuracy = 1 - err else: err = model_eval(sess, x, y, preds, adv, y_test[:source_samples], args=eval_params) adv_accuracy = 1 - err if viz_enabled: for j in range(nb_classes): if targeted: for i in range(nb_classes): grid_viz_data[i, j] = adv[i * nb_classes + j] else: grid_viz_data[j, 0] = adv_inputs[j] grid_viz_data[j, 1] = adv[j] print(grid_viz_data.shape) print('--------------------------------------') # Compute the number of adversarial examples that were successfully found print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy)) report.clean_train_adv_eval = 1. - adv_accuracy # Compute the average distortion introduced by the algorithm percent_perturbed = np.mean(np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5) print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed)) # Close TF session sess.close() # Finally, block & display a grid of all the adversarial examples if viz_enabled: import matplotlib.pyplot as plt _ = grid_visual(grid_viz_data) return report
def tsc_tutorial(attack_method='fgsm',batch_size=BATCH_SIZE, dataset_name='Adiac',eps=0.1,attack_on='train'): keras.layers.core.K.set_learning_phase(0) # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) if not hasattr(backend, "tf"): raise RuntimeError("This tutorial requires keras to be configured" " to use the TensorFlow backend.") if keras.backend.image_dim_ordering() != 'tf': keras.backend.set_image_dim_ordering('tf') print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to " "'th', temporarily setting to 'tf'") # Create TF session and set as Keras backend session sess = tf.Session() keras.backend.set_session(sess) root_dir = '/b/home/uha/hfawaz-datas/dl-tsc/' # dataset_name = 'Adiac' archive_name = 'TSC' classifier_name = 'resnet' out_dir = 'ucr-attack/' file_path = root_dir + 'results/' + classifier_name + '/' + archive_name +\ '/' + dataset_name + '/best_model.hdf5' adv_data_dir = out_dir+attack_method+'/'+archive_name+'/'+attack_on+\ '/eps-'+str(eps)+'/' if os.path.exists(adv_data_dir+dataset_name+'-adv'): print('Already_done:',dataset_name) return else: print('Doing:',dataset_name) dataset_dict = read_dataset(root_dir, archive_name, dataset_name) x_train, y_train, x_test, y_test, _, nb_classes = prepare_data(dataset_dict,dataset_name) if attack_on == 'train': X = x_train Y = y_train original_y = dataset_dict[dataset_name][1] elif attack_on =='test': X = x_test Y = y_test original_y = dataset_dict[dataset_name][3] else: print('Error either train or test options for attack_on param') exit() # for big datasets we should decompose in batches the evaluation of the attack # loop through the batches ori_acc = 0 adv_acc = 0 res_dir = out_dir + 'results'+attack_method+'.csv' if os.path.exists(res_dir): res_ori = pd.read_csv(res_dir, index_col=False) else: res_ori = pd.DataFrame(data=np.zeros((0, 3), dtype=np.float), index=[], columns=['dataset_name', 'ori_acc', 'adv_acc']) test_set = np.zeros((Y.shape[0], x_train.shape[1] + 1), dtype=np.float64) for i in range(0,len(X),batch_size): curr_X = X[i:i+batch_size] curr_Y = Y[i:i+batch_size] # Obtain series Parameters img_rows, nchannels = x_train.shape[1:3] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Define TF model graph model = keras.models.load_model(file_path) preds = model(x) print("Defined TensorFlow model graph.") def evaluate(): # Evaluate the accuracy of the model on legitimate test examples eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, curr_X, curr_Y, args=eval_params) report.clean_train_clean_eval = acc print('Test accuracy on legitimate examples: %0.4f' % acc) return acc wrap = KerasModelWrapper(model) ori_acc += evaluate() * len(curr_X)/len(X) if attack_method == 'fgsm': # Initialize the Fast Gradient Sign Method (FGSM) attack object and graph fgsm = FastGradientMethod(wrap, sess=sess) fgsm_params = {'eps': eps } adv_x = fgsm.generate(x, **fgsm_params) elif attack_method == 'bim': # BasicIterativeMethod bim = BasicIterativeMethod(wrap,sess=sess) bim_params = {'eps':eps, 'eps_iter':0.05, 'nb_iter':10} adv_x = bim.generate(x,**bim_params) else: print('Either bim or fgsm are acceptable as attack methods') return # Consider the attack to be constant adv_x = tf.stop_gradient(adv_x) adv = adv_x.eval({x: curr_X}, session=sess) adv = adv.reshape(adv.shape[0],adv.shape[1]) preds_adv = model(adv_x) # Evaluate the accuracy of the model on adversarial examples eval_par = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_adv, curr_X, curr_Y, args=eval_par) print('Test accuracy on adversarial examples: %0.4f\n' % acc) report.clean_train_adv_eval = acc adv_acc += acc * len(curr_X)/len(X) test_set[i:i+batch_size,0] = original_y[i:i+batch_size] test_set[i:i+batch_size,1:] = adv create_directory(adv_data_dir) np.savetxt(adv_data_dir+dataset_name+'-adv',test_set, delimiter=',') add_labels_to_adv_test_set(dataset_dict, dataset_name, adv_data_dir,original_y) res = pd.DataFrame(data = np.zeros((1,3),dtype=np.float), index=[0], columns=['dataset_name','ori_acc','adv_acc']) res['dataset_name'] = dataset_name+str(eps) res['ori_acc'] = ori_acc res['adv_acc'] = adv_acc res_ori = pd.concat((res_ori,res),sort=False) res_ori.to_csv(res_dir,index=False) return report
def main(argv): checkpoint = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) if checkpoint is None: raise ValueError("Couldn't find latest checkpoint in " + FLAGS.checkpoint_dir) train_start = 0 train_end = 60000 test_start = 0 test_end = 10000 X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) assert Y_train.shape[1] == 10 # NOTE: for compatibility with Madry Lab downloadable checkpoints, # we cannot enclose this in a scope or do anything else that would # change the automatic naming of the variables. model = MadryMNIST() x_input = tf.placeholder(tf.float32, shape=[None, 784]) x_image = tf.placeholder(tf.float32, shape=[None, 28, 28, 1]) y = tf.placeholder(tf.float32, shape=[None, 10]) if FLAGS.attack_type == 'fgsm': fgsm = FastGradientMethod(model) fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} adv_x = fgsm.generate(x_image, **fgsm_params) elif FLAGS.attack_type == 'bim': bim = BasicIterativeMethod(model) bim_params = { 'eps': 0.3, 'clip_min': 0., 'clip_max': 1., 'nb_iter': 50, 'eps_iter': .01 } adv_x = bim.generate(x_image, **bim_params) else: raise ValueError(FLAGS.attack_type) preds_adv = model.get_probs(adv_x) saver = tf.train.Saver() with tf.Session() as sess: # Restore the checkpoint saver.restore(sess, checkpoint) # Evaluate the accuracy of the MNIST model on adversarial examples eval_par = {'batch_size': FLAGS.batch_size} t1 = time.time() acc = model_eval(sess, x_image, y, preds_adv, X_test, Y_test, args=eval_par) t2 = time.time() print("Took", t2 - t1, "seconds") print('Test accuracy on adversarial examples: %0.4f\n' % acc)