def main(argv): model_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) if model_file is None: print('No model found') sys.exit() set_log_level(logging.DEBUG) sess = tf.Session() with sess.as_default(): model = make_wresnet() saver = tf.train.Saver() # Restore the checkpoint saver.restore(sess, model_file) SCOPE = "cifar10_challenge" model2 = make_wresnet(scope=SCOPE) assert len(model.get_vars()) == len(model2.get_vars()) found = [False] * len(model2.get_vars()) for var1 in model.get_vars(): var1_found = False var2_name = SCOPE + "/" + var1.name for idx, var2 in enumerate(model2.get_vars()): if var2.name == var2_name: var1_found = True found[idx] = True sess.run(tf.assign(var2, var1)) break assert var1_found, var1.name assert all(found) model2.dataset_factory = Factory(CIFAR, {"max_val": 255}) serial.save("model.joblib", model2)
def test_make_confidence_report_bundled(): """ A very simple test that just makes sure make_confidence_report_bundled can run without crashing """ sess = tf.compat.v1.Session() try: nb_classes = 3 nb_features = 2 batch_size = 5 nb_test_examples = batch_size * 2 layer = Linear(num_hid=nb_classes) model = MLP(layers=[layer], input_shape=(None, nb_features)) dataset = SimpleDataset(test_end=nb_test_examples, nb_classes=nb_classes) model.dataset_factory = dataset.get_factory() filepath = ".test_model.joblib" with sess.as_default(): sess.run(tf.compat.v1.global_variables_initializer()) serial.save(filepath, model) def recipe(sess, model, x, y, nb_classes, eps, clip_min, clip_max, eps_iter, nb_iter, report_path, eps_iter_small, batch_size): """ Mock recipe that just runs the Noise attack so the test runs fast """ attack_configs = [AttackConfig(Noise(model, sess), {'eps': eps})] new_work_goal = {config: 1 for config in attack_configs} goals = [Misclassify(new_work_goal=new_work_goal)] bundle_attacks(sess, model, x, y, attack_configs, goals, report_path, attack_batch_size=batch_size, eval_batch_size=batch_size) make_confidence_report_bundled(filepath, test_end=nb_test_examples, recipe=recipe, base_eps=.1, base_eps_iter=.01, batch_size=batch_size) finally: sess.close()
def test_save_and_load_var(self): """test_save_and_load_var: Test that we can save and load a PicklableVariable with joblib """ sess = tf.Session() with sess.as_default(): x = np.ones(1) xv = PicklableVariable(x) xv.var.initializer.run() save("/tmp/var.joblib", xv) sess.run(tf.assign(xv.var, np.ones(1) * 2)) new_xv = load("/tmp/var.joblib") self.assertClose(sess.run(xv.var), np.ones(1) * 2) self.assertClose(sess.run(new_xv.var), np.ones(1))
def save(criteria, report, report_path, adv_x_val): """ Saves the report and adversarial examples. :param criteria: dict, of the form returned by AttackGoal.get_criteria :param report: dict containing a confidence report :param report_path: string, filepath :param adv_x_val: numpy array containing dataset of adversarial examples """ print_stats(criteria['correctness'], criteria['confidence'], 'bundled') serial.save(report_path, report) assert report_path.endswith(".joblib") adv_x_path = report_path[:-len(".joblib")] + "_adv.npy" np.save(adv_x_path, adv_x_val)
def test_save_load_confidence_report(): """ Test that a confidence report can be loaded and saved. """ report = ConfidenceReport() num_examples = 2 clean_correctness = np.zeros((num_examples,), dtype=np.bool) clean_confidence = np.zeros((num_examples,), dtype=np.float32) adv_correctness = clean_correctness.copy() adv_confidence = clean_confidence.copy() report['clean'] = ConfidenceReportEntry(clean_correctness, clean_confidence) report['adv'] = ConfidenceReportEntry(adv_correctness, adv_confidence) report.completed = True filepath = ".test_confidence_report.joblib" serial.save(filepath, report) report = serial.load(filepath)
def evaluate(): global epoch global last_test_print global last_train_print global best_result global best_epoch with sess.as_default(): print("Saving to ", FLAGS.save_path) save(FLAGS.save_path, model) if epoch % print_test_period == 0 or time.time( ) - last_test_print > 300: t1 = time.time() result = do_eval(dataset.x_test, dataset.y_test, False) t2 = time.time() if result >= best_result: if result > best_result: best_epoch = epoch else: # Keep track of ties assert result == best_result if not isinstance(best_epoch, list): if best_epoch == -1: best_epoch = [] else: best_epoch = [best_epoch] best_epoch.append(epoch) best_result = result print("Best so far: ", best_result) print("Best epoch: ", best_epoch) last_test_print = t2 print("Test eval time: ", t2 - t1) if (epoch % print_train_period == 0 or time.time() - last_train_print > 3000): t1 = time.time() print("Training set: ") do_eval(dataset.x_train, dataset.y_train, False) t2 = time.time() print("Train eval time: ", t2 - t1) last_train_print = t2 epoch += 1
def make_confidence_report(filepath, train_start=TRAIN_START, train_end=TRAIN_END, test_start=TEST_START, test_end=TEST_END, batch_size=BATCH_SIZE, which_set=WHICH_SET, mc_batch_size=MC_BATCH_SIZE, report_path=REPORT_PATH, base_eps_iter=BASE_EPS_ITER, nb_iter=NB_ITER): """ Load a saved model, gather its predictions, and save a confidence report. This function works by running a single MaxConfidence attack on each example. This provides a reasonable estimate of the true failure rate quickly, so long as the model does not suffer from gradient masking. However, this estimate is mostly intended for development work and not for publication. A more accurate estimate may be obtained by running make_confidence_report_bundled.py instead. :param filepath: path to model to evaluate :param train_start: index of first training set example to use :param train_end: index of last training set example to use :param test_start: index of first test set example to use :param test_end: index of last test set example to use :param batch_size: size of evaluation batches :param which_set: 'train' or 'test' :param mc_batch_size: batch size for MaxConfidence attack :param base_eps_iter: step size if the data were in [0,1] (Step size will be rescaled proportional to the actual data range) :param nb_iter: Number of iterations of PGD to run per class """ # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.INFO) # Create TF session sess = tf.Session() if report_path is None: assert filepath.endswith('.joblib') report_path = filepath[:-len('.joblib')] + "_report.joblib" with sess.as_default(): model = load(filepath) assert len(model.get_params()) > 0 factory = model.dataset_factory factory.kwargs['train_start'] = train_start factory.kwargs['train_end'] = train_end factory.kwargs['test_start'] = test_start factory.kwargs['test_end'] = test_end dataset = factory() center = dataset.kwargs['center'] max_val = dataset.kwargs['max_val'] value_range = max_val * (1. + center) min_value = 0. - center * max_val if 'CIFAR' in str(factory.cls): base_eps = 8. / 255. if base_eps_iter is None: base_eps_iter = 2. / 255. elif 'MNIST' in str(factory.cls): base_eps = .3 if base_eps_iter is None: base_eps_iter = .1 else: raise NotImplementedError(str(factory.cls)) mc_params = { 'eps': base_eps * value_range, 'eps_iter': base_eps_iter * value_range, 'nb_iter': nb_iter, 'clip_min': min_value, 'clip_max': max_val } x_data, y_data = dataset.get_set(which_set) report = {} semantic = Semantic(model, center, max_val, sess) mc = MaxConfidence(model, sess=sess) jobs = [('clean', None, None, None), ('Semantic', semantic, None, None), ('mc', mc, mc_params, mc_batch_size)] for job in jobs: name, attack, attack_params, job_batch_size = job if job_batch_size is None: job_batch_size = batch_size t1 = time.time() packed = correctness_and_confidence(sess, model, x_data, y_data, batch_size=job_batch_size, devices=devices, attack=attack, attack_params=attack_params) t2 = time.time() print("Evaluation took", t2 - t1, "seconds") correctness, confidence = packed report[name] = {'correctness': correctness, 'confidence': confidence} print_stats(correctness, confidence, name) save(report_path, report)
def model_training(model, file_name, x_train, y_train, x_test, y_test, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, num_threads=None, label_smoothing=0.1): """ Trains the model with the specified parameters. Parameters ---------- model: cleverhans.model.Model The cleverhans picklable model file_name: str The name of the joblib file. x_train: numpy.ndarray The input array of the train dataset. y_train: numpy.ndarray The output array of the train dataset. x_test: numpy.ndarray The input array of the test dataset. y_test: numpy.ndarray The output array of the test dataset. nb_epochs: int, optional The number of epochs. batch_size: int, optional The batch size. learning_rate: float, optional The learning rate. num_threads: int, optional The number of threads used. label_smoothing: float, optional The amount of label smooting used. """ if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} session = tf.Session(config=tf.ConfigProto(**config_args)) img_rows, img_cols, channels = x_train.shape[1:4] nb_classes = y_train.shape[1] x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, channels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) train_params = { "nb_epochs": nb_epochs, "batch_size": batch_size, "learning_rate": learning_rate } eval_params = {"batch_size": batch_size} predictions = model.get_logits(x) loss = CrossEntropy(model, smoothing=label_smoothing) def train_evaluation(): """ Prints the performances of the models after each epoch. """ evaluate(session, x, y, predictions, x_train, y_train, x_test, y_test, eval_params) train(session, loss, x_train, y_train, evaluate=train_evaluation, args=train_params, var_list=model.get_params()) with session.as_default(): save("models/joblibs/" + file_name, model)
def mnist_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, clean_train=CLEAN_TRAIN, testing=False, preprocess='', backprop_through_attack=BACKPROP_THROUGH_ATTACK, nb_filters=NB_FILTERS, num_threads=None, label_smoothing=0.1): """ MNIST cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) ### CHANGE DATASET ### # Get MNIST data # mnist = MNIST_67(train_start=train_start, train_end=train_end, # test_start=test_start, test_end=test_end) # x_train, y_train = mnist.get_set('train') # x_test, y_test = mnist.get_set('test') x_train, y_train, x_test, y_test = get_MNIST_67_preprocess( preprocess=preprocess) with open('../pickle/{}_y_train.pickle'.format(FILENAME), 'wb') as handle: pickle.dump(y_train, handle) with open('../pickle/{}_y_test.pickle'.format(FILENAME), 'wb') as handle: pickle.dump(y_test, handle) # Use Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } eval_params = {'batch_size': batch_size} fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} rng = np.random.RandomState([2017, 8, 30]) ### ADD PARAMETERS ### def do_eval(preds, x_set, y_set, report_key, is_adv=None): acc = model_eval(sess, x, y, preds, x_set, y_set, save_logit=True, filename=FLAGS.filename + "_" + report_key, args=eval_params) setattr(report, report_key, acc) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' if report_text: print('Test accuracy on %s examples: %0.4f' % (report_text, acc)) if clean_train: ### picklable ### #model = ModelBasicCNN('model1', nb_classes, nb_filters) model = make_basic_picklable_cnn(nb_filters=nb_filters, nb_classes=nb_classes) preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=label_smoothing) def evaluate(): do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False) train(sess, loss, x_train, y_train, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params()) #Now, save the graph with sess.as_default(): save("../models/CNN_{}.joblib".format(preprocess), model) # Calculate training error if testing: do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval') exit() # Initialize the Fast Gradient Sign Method (FGSM) attack object and # graph fgsm = FastGradientMethod(model, sess=sess) adv_x = fgsm.generate(x, **fgsm_params) preds_adv = model.get_logits(adv_x) # Evaluate the accuracy of the MNIST model on adversarial examples do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True) # Calculate training error if testing: do_eval(preds_adv, x_train, y_train, 'train_clean_train_adv_eval') print('Repeating the process, using adversarial training') # Create a new model and train it to be robust to FastGradientMethod ### picklable ### #model2 = ModelBasicCNN('model2', nb_classes, nb_filters) model2 = make_basic_picklable_cnn(nb_filters=nb_filters, nb_classes=nb_classes) fgsm2 = FastGradientMethod(model2, sess=sess) def attack(x): return fgsm2.generate(x, **fgsm_params) loss2 = CrossEntropy(model2, smoothing=label_smoothing, attack=attack) preds2 = model2.get_logits(x) adv_x2 = attack(x) if not backprop_through_attack: # For the fgsm attack used in this tutorial, the attack has zero # gradient so enabling this flag does not change the gradient. # For some other attacks, enabling this flag increases the cost of # training, but gives the defender the ability to anticipate how # the atacker will change their strategy in response to updates to # the defender's parameters. adv_x2 = tf.stop_gradient(adv_x2) preds2_adv = model2.get_logits(adv_x2) def evaluate2(): # Accuracy of adversarially trained model on legitimate test inputs do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False) # Accuracy of the adversarially trained model on adversarial examples do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True) # Perform and evaluate adversarial training train(sess, loss2, x_train, y_train, evaluate=evaluate2, args=train_params, rng=rng, var_list=model2.get_params()) #Now, save the graph with sess.as_default(): save("../models/{}_{}.joblib".format(FILENAME, preprocess), model2) # Calculate training errors if testing: do_eval(preds2, x_train, y_train, 'train_adv_train_clean_eval') do_eval(preds2_adv, x_train, y_train, 'train_adv_train_adv_eval') return report
def bundle_examples_with_goal(sess, model, adv_x_list, y, goal, report_path): """ A post-processor version of attack bundling, that chooses the strongest example from the output of multiple earlier bundling strategies. :param sess: tf.session.Session :param model: cleverhans.model.Model :param adv_x_list: list of numpy arrays Each entry in the list is the output of a previous bundler; it is an adversarial version of the whole dataset. :param y: numpy array containing true labels :param goal: AttackGoal to use to choose the best version of each adversarial example :param report_path: str, the path the report will be saved to """ # Check the input num_attacks = len(adv_x_list) assert num_attacks > 0 adv_x_0 = adv_x_list[0] assert isinstance(adv_x_0, np.ndarray) assert all(adv_x.shape == adv_x_0.shape for adv_x in adv_x_list) # Allocate the output out = np.zeros_like(adv_x_0) m = adv_x_0.shape[0] # Initialize with negative sentinel values to make sure everything is # written to correctness = -np.ones(m, dtype='int32') confidence = -np.ones(m, dtype='float32') # Gather criteria criteria = [ goal.get_criteria(sess, model, adv_x, y) for adv_x in adv_x_list ] assert all('correctness' in c for c in criteria) assert all('confidence' in c for c in criteria) _logger.info("Accuracy on each advx dataset: ") for c in criteria: _logger.info("\t" + str(c['correctness'].mean())) for example_idx in range(m): # Index of the best attack for this example attack_idx = 0 # Find the winner for candidate_idx in range(1, num_attacks): if goal.new_wins(criteria[attack_idx], example_idx, criteria[candidate_idx], example_idx): attack_idx = candidate_idx # Copy the winner into the output out[example_idx] = adv_x_list[attack_idx][example_idx] correctness[example_idx] = criteria[attack_idx]['correctness'][ example_idx] confidence[example_idx] = criteria[attack_idx]['confidence'][ example_idx] assert correctness.min() >= 0 assert correctness.max() <= 1 assert confidence.min() >= 0. assert confidence.max() <= 1. report = { 'bundled': { 'correctness': correctness, 'confidence': confidence } } serial.save(report_path, report) assert report_path.endswith('.joblib') adv_x_path = report_path[:-len('.joblib')] + "_adv_x.npy" np.save(adv_x_path, out)
def do_train(train_start=TRAIN_START, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, backprop_through_attack=False, nb_filters=NB_FILTERS, num_threads=None, use_ema=USE_EMA, ema_decay=EMA_DECAY): print('Parameters') print('-' * 79) for x, y in sorted(locals().items()): print('%-32s %s' % (x, y)) print('-' * 79) if os.path.exists(FLAGS.save_path): print("Model " + FLAGS.save_path + " already exists. Refusing to overwrite.") quit() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) dataset = MNIST(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end, center=True) # Use Image Parameters img_rows, img_cols, nchannels = dataset.x_train.shape[1:4] nb_classes = dataset.NB_CLASSES # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) train_params = { 'nb_epochs': nb_epochs, 'learning_rate': learning_rate, 'batch_size': batch_size, } eval_params = {'batch_size': batch_size} rng = np.random.RandomState([2017, 8, 30]) sess = tf.Session() def do_eval(x_set, y_set, is_adv=None): acc = accuracy(sess, model, x_set, y_set) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'clean' if report_text: print('Accuracy on %s examples: %0.4f' % (report_text, acc)) return acc model = Model(filters=nb_filters) model.dataset_factory = dataset.get_factory() pgd = ProjectedGradientDescent(model=model, sess=sess) center = dataset.kwargs['center'] value_range = 1. + center base_eps = 8. / 255. attack_params = { 'eps': base_eps * value_range, 'clip_min': -float(center), 'clip_max': float(center), 'eps_iter': (2. / 255.) * value_range, 'nb_iter': 40. } loss = CrossEntropy( model, attack=pgd, adv_coeff=1., attack_params=attack_params, ) print_test_period = 10 print_train_period = 50 def evaluate(): global epoch global last_test_print global last_train_print global best_result global best_epoch with sess.as_default(): print("Saving to ", FLAGS.save_path) save(FLAGS.save_path, model) if epoch % print_test_period == 0 or time.time( ) - last_test_print > 300: t1 = time.time() result = do_eval(dataset.x_test, dataset.y_test, False) t2 = time.time() if result >= best_result: if result > best_result: best_epoch = epoch else: # Keep track of ties assert result == best_result if not isinstance(best_epoch, list): if best_epoch == -1: best_epoch = [] else: best_epoch = [best_epoch] best_epoch.append(epoch) best_result = result print("Best so far: ", best_result) print("Best epoch: ", best_epoch) last_test_print = t2 print("Test eval time: ", t2 - t1) if (epoch % print_train_period == 0 or time.time() - last_train_print > 3000): t1 = time.time() print("Training set: ") do_eval(dataset.x_train, dataset.y_train, False) t2 = time.time() print("Train eval time: ", t2 - t1) last_train_print = t2 epoch += 1 optimizer = None ema_decay = globals()[ema_decay] assert callable(ema_decay) train(sess, loss, dataset.x_train, dataset.y_train, evaluate=evaluate, optimizer=optimizer, args=train_params, rng=rng, var_list=model.get_params(), use_ema=use_ema, ema_decay=ema_decay) # Make sure we always evaluate on the last epoch, so pickling bugs are more # obvious if (epoch - 1) % print_test_period != 0: do_eval(dataset.x_test, dataset.y_test, False) if (epoch - 1) % print_train_period != 0: print("Training set: ") do_eval(dataset.x_train, dataset.y_train, False) with sess.as_default(): save(FLAGS.save_path, model)
def run_batch_with_goal(sess, model, x, y, adv_x_val, criteria, attack_configs, run_counts, goal, report, report_path): """ Runs attack bundling on one batch of data. This function is mostly intended to be called by `bundle_attacks_with_goal`. :param sess: tf.session.Session :param model: cleverhans.model.Model :param x: numpy array containing clean example inputs to attack :param y: numpy array containing true labels :param adv_x_val: numpy array containing the adversarial examples made so far by earlier work in the bundling process :param criteria: dict mapping string names of criteria to numpy arrays with their values for each example (Different AttackGoals track different criteria) :param run_counts: dict mapping AttackConfigs to numpy arrays reporting how many times they have been run on each example :param goal: the AttackGoal to work on :param report: dict, see `bundle_attacks_with_goal` :param report_path: str, path to save the report to """ attack_config = goal.get_attack_config(attack_configs, run_counts, criteria) idxs = goal.request_examples(attack_config, criteria, run_counts, BATCH_SIZE) x_batch = x[idxs] assert x_batch.shape[0] == BATCH_SIZE y_batch = y[idxs] assert y_batch.shape[0] == BATCH_SIZE adv_x_batch = run_attack(sess, model, x_batch, y_batch, attack_config.attack, attack_config.params, BATCH_SIZE, devices) criteria_batch = goal.get_criteria(sess, model, adv_x_batch, y_batch) # This can't be parallelized because some orig examples are copied more # than once into the batch cur_run_counts = run_counts[attack_config] for batch_idx, orig_idx in enumerate(idxs): cur_run_counts[orig_idx] += 1 should_copy = goal.new_wins(criteria, orig_idx, criteria_batch, batch_idx) if should_copy: adv_x_val[orig_idx] = adv_x_batch[batch_idx] for key in criteria: criteria[key][orig_idx] = criteria_batch[key][batch_idx] assert np.allclose(y[orig_idx], y_batch[batch_idx]) report['bundled'] = { 'correctness': criteria['correctness'], 'confidence': criteria['confidence'] } should_save = False new_time = time.time() if 'time' in report: if new_time - report['time'] > REPORT_TIME_INTERVAL: should_save = True else: should_save = True if should_save: report['time'] = new_time goal.print_progress(criteria, run_counts) print_stats(criteria['correctness'], criteria['confidence'], 'bundled') serial.save(report_path, report) assert report_path.endswith(".joblib") adv_x_path = report_path[:-len(".joblib")] + "_adv.npy" np.save(adv_x_path, adv_x_val)
def make_confidence_report( filepath, train_start=TRAIN_START, train_end=TRAIN_END, test_start=TEST_START, test_end=TEST_END, batch_size=BATCH_SIZE, which_set=WHICH_SET, mc_batch_size=MC_BATCH_SIZE, report_path=REPORT_PATH, base_eps_iter=BASE_EPS_ITER, nb_iter=NB_ITER, save_advx=SAVE_ADVX, ): """ Load a saved model, gather its predictions, and save a confidence report. This function works by running a single MaxConfidence attack on each example. This provides a reasonable estimate of the true failure rate quickly, so long as the model does not suffer from gradient masking. However, this estimate is mostly intended for development work and not for publication. A more accurate estimate may be obtained by running make_confidence_report_bundled.py instead. :param filepath: path to model to evaluate :param train_start: index of first training set example to use :param train_end: index of last training set example to use :param test_start: index of first test set example to use :param test_end: index of last test set example to use :param batch_size: size of evaluation batches :param which_set: 'train' or 'test' :param mc_batch_size: batch size for MaxConfidence attack :param base_eps_iter: step size if the data were in [0,1] (Step size will be rescaled proportional to the actual data range) :param nb_iter: Number of iterations of PGD to run per class :param save_advx: bool. If True, saves the adversarial examples to disk. On by default, but can be turned off to save memory, etc. """ # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.INFO) # Create TF session sess = tf.Session() if report_path is None: assert filepath.endswith(".joblib") report_path = filepath[: -len(".joblib")] + "_report.joblib" with sess.as_default(): model = load(filepath) assert len(model.get_params()) > 0 factory = model.dataset_factory factory.kwargs["train_start"] = train_start factory.kwargs["train_end"] = train_end factory.kwargs["test_start"] = test_start factory.kwargs["test_end"] = test_end dataset = factory() center = dataset.kwargs["center"] max_val = dataset.kwargs["max_val"] value_range = max_val * (1.0 + center) min_value = 0.0 - center * max_val if "CIFAR" in str(factory.cls): base_eps = 8.0 / 255.0 if base_eps_iter is None: base_eps_iter = 2.0 / 255.0 elif "MNIST" in str(factory.cls): base_eps = 0.3 if base_eps_iter is None: base_eps_iter = 0.1 else: raise NotImplementedError(str(factory.cls)) mc_params = { "eps": base_eps * value_range, "eps_iter": base_eps_iter * value_range, "nb_iter": nb_iter, "clip_min": min_value, "clip_max": max_val, } x_data, y_data = dataset.get_set(which_set) report = ConfidenceReport() semantic = Semantic(model, center, max_val, sess) mc = MaxConfidence(model, sess=sess) jobs = [ ("clean", None, None, None, False), ("Semantic", semantic, None, None, False), ("mc", mc, mc_params, mc_batch_size, True), ] for job in jobs: name, attack, attack_params, job_batch_size, save_this_job = job if job_batch_size is None: job_batch_size = batch_size t1 = time.time() if save_advx and save_this_job: # If we want to save the adversarial examples to the filesystem, we need # to fetch all of them. Otherwise they're just computed one batch at a # time and discarded # The path to save to assert report_path.endswith(".joblib") advx_path = report_path[: -len(".joblib")] + "_advx_" + name + ".npy" # Fetch the adversarial examples x_data = run_attack( sess, model, x_data, y_data, attack, attack_params, batch_size=job_batch_size, devices=devices, ) # Turn off the attack so `correctness_and_confidence` won't run it a # second time. attack = None attack_params = None # Save the adversarial examples np.save(advx_path, x_data) # Run correctness and confidence evaluation on adversarial examples packed = correctness_and_confidence( sess, model, x_data, y_data, batch_size=job_batch_size, devices=devices, attack=attack, attack_params=attack_params, ) t2 = time.time() print("Evaluation took", t2 - t1, "seconds") correctness, confidence = packed report[name] = ConfidenceReportEntry( correctness=correctness, confidence=confidence ) print_stats(correctness, confidence, name) save(report_path, report)
def mnist_tutorial_jsma(train_start=0, train_end=60000, test_start=0, test_end=10000, viz_enabled=VIZ_ENABLED, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, source_samples=SOURCE_SAMPLES, learning_rate=LEARNING_RATE): """ MNIST tutorial for the Jacobian-based saliency map approach (JSMA) :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param viz_enabled: (boolean) activate plots of adversarial examples :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param nb_classes: number of output classes :param source_samples: number of test inputs to attack :param learning_rate: learning rate for training :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Create TF session and set as Keras backend session #replace num_threads = None if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) #with sess = tf.Session() print("Created TensorFlow session.") set_log_level(logging.DEBUG) # Get MNIST test data mnist = MNIST(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) x_train, y_train = mnist.get_set('train') x_test, y_test = mnist.get_set('test') # Obtain Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) nb_filters = 64 # Define TF model graph model = make_basic_picklable_cnn() preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=0.1) print("Defined TensorFlow model graph.") ########################################################################### # Training the model using TensorFlow ########################################################################### # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } dataset = tf.data.Dataset.from_tensor_slices( (tf.reshape(x_train, [60000, 28, 28]), y_train)) dataset = dataset.batch(32) val_dataset = tf.data.Dataset.from_tensor_slices( (tf.reshape(x_test, [10000, 28, 28]), y_test)) val_dataset = val_dataset.batch(32) sess.run(tf.global_variables_initializer()) rng = np.random.RandomState([2017, 8, 30]) if TRAIN_NEW == 1: with sess.as_default(): train(sess, loss, x_train, y_train, args=train_params, rng=rng) save("test.joblib", model) else: with sess.as_default(): model = load("test.joblib") #changed assert len(model.get_params()) > 0 preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=0.1) # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params) assert x_test.shape[0] == test_end - test_start, x_test.shape print('Test accuracy on legitimate test examples: {0}'.format(accuracy)) report.clean_train_clean_eval = accuracy ########################################################################### # Craft adversarial examples using the Jacobian-based saliency map approach ########################################################################### print('Crafting ' + str(source_samples) + ' * ' + str(nb_classes - 1) + ' adversarial examples') # Keep track of success (adversarial example classified in target) results = np.zeros((nb_classes, source_samples), dtype='i') # Rate of perturbed features for each test set example and target class perturbations = np.zeros((nb_classes, source_samples), dtype='f') # Initialize our array for grid visualization grid_shape = (nb_classes, nb_classes, img_rows, img_cols, nchannels) grid_viz_data = np.zeros(grid_shape, dtype='f') # Instantiate a SaliencyMapMethod attack object jsma = SaliencyMapMethod(model, sess=sess) jsma_params = { 'theta': 1., 'gamma': 0.1, 'clip_min': 0., 'clip_max': 1., 'y_target': None } figure = None # Loop over the samples we want to perturb into adversarial examples seed(SEED) for sample_ind in xrange(0, source_samples): img = randint(0, 10000) print('--------------------------------------') print('Attacking input %i/%i' % (sample_ind + 1, source_samples)) sample = x_test[img:(img + 1)] #sample = x_test[sample_ind:(sample_ind + 1)] # We want to find an adversarial example for each possible target class # (i.e. all classes that differ from the label given in the dataset) current_class = int(np.argmax( y_test[img])) #current_class = int(np.argmax(y_test[sample_ind])) target_classes = other_classes(nb_classes, current_class) # For the grid visualization, keep original images along the diagonal grid_viz_data[current_class, current_class, :, :, :] = np.reshape( sample, (img_rows, img_cols, nchannels)) tn = 0 totc = 0 # Loop over all target classes for target in target_classes: print('Generating adv. example for target class %i' % target) # This call runs the Jacobian-based saliency map approach one_hot_target = np.zeros((1, nb_classes), dtype=np.float32) one_hot_target[0, target] = 1 jsma_params['y_target'] = one_hot_target adv_x = jsma.generate_np(sample, **jsma_params) # Check if success was achieved res = int(model_argmax(sess, x, preds, adv_x) == target) # Compute number of modified features adv_x_reshape = adv_x.reshape(-1) test_in_reshape = x_test[sample_ind].reshape(-1) nb_changed = np.where(adv_x_reshape != test_in_reshape)[0].shape[0] percent_perturb = float(nb_changed) / adv_x.reshape(-1).shape[0] diff = np.array(adv_x - sample) #print(np.sum(diff)) diff = np.reshape(diff, (28, 28)) diff = diff * 255 cv2.imwrite("test.png", diff) diff = cv2.imread("test.png") diff = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY) nieghbors = 0 tc = 0 for i in range(0, 28, 1): for j in range(0, 28, 1): if diff[i, j] > 0: tc = tc + 1 totc = totc + 1 if i > 0 and i < 27 and j > 0 and j < 27: #main grid not edges or corners if diff[i - 1, j - 1] > 0: nieghbors = nieghbors + 1 if diff[i - 1, j] > 0: nieghbors = nieghbors + 1 if diff[i - 1, j + 1] > 0: nieghbors = nieghbors + 1 if diff[i, j - 1] > 0: nieghbors = nieghbors + 1 if diff[i, j + 1] > 0: nieghbors = nieghbors + 1 if diff[i + 1, j - 1] > 0: nieghbors = nieghbors + 1 if diff[i + 1, j] > 0: nieghbors = nieghbors + 1 if diff[i + 1, j + 1] > 0: nieghbors = nieghbors + 1 else: #corners if i == 0 and j == 0: if diff[i, j + 1] > 0: nieghbors = nieghbors + 1 if diff[i + 1, j] > 0: nieghbors = nieghbors + 1 if i == 27 and j == 0: if diff[i, j + 1] > 0: nieghbors = nieghbors + 1 if diff[i - 1, j] > 0: nieghbors = nieghbors + 1 if i == 0 and j == 27: if diff[i, j - 1] > 0: nieghbors = nieghbors + 1 if diff[i + 1, j] > 0: nieghbors = nieghbors + 1 if i == 27 and j == 27: if diff[i, j - 1] > 0: nieghbors = nieghbors + 1 if diff[i - 1, j] > 0: nieghbors = nieghbors + 1 #edges if i == 0 and j > 0 and j < 27: #left side if diff[i, j - 1] > 0: nieghbors = nieghbors + 1 if diff[i, j + 1] > 0: nieghbors = nieghbors + 1 if diff[i + 1, j - 1] > 0: nieghbors = nieghbors + 1 if diff[i + 1, j] > 0: nieghbors = nieghbors + 1 if diff[i + 1, j + 1] > 0: nieghbors = nieghbors + 1 if i == 27 and j > 0 and j < 27: #right side if diff[i, j - 1] > 0: nieghbors = nieghbors + 1 if diff[i, j + 1] > 0: nieghbors = nieghbors + 1 if diff[i - 1, j - 1] > 0: nieghbors = nieghbors + 1 if diff[i - 1, j] > 0: nieghbors = nieghbors + 1 if diff[i - 1, j + 1] > 0: nieghbors = nieghbors + 1 if j == 0 and i > 0 and i < 27: #top side if diff[i - 1, j] > 0: nieghbors = nieghbors + 1 if diff[i + 1, j] > 0: nieghbors = nieghbors + 1 if diff[i - 1, j + 1] > 0: nieghbors = nieghbors + 1 if diff[i, j + 1] > 0: nieghbors = nieghbors + 1 if diff[i + 1, j + 1] > 0: nieghbors = nieghbors + 1 if j == 27 and i > 0 and i < 27: #bot side if diff[i - 1, j] > 0: nieghbors = nieghbors + 1 if diff[i + 1, j] > 0: nieghbors = nieghbors + 1 if diff[i - 1, j - 1] > 0: nieghbors = nieghbors + 1 if diff[i, j - 1] > 0: nieghbors = nieghbors + 1 if diff[i + 1, j - 1] > 0: nieghbors = nieghbors + 1 # print(tc) # print(nieghbors) tn = tn + nieghbors # if tc > 0: # print(nieghbors/tc) # Display the original and adversarial images side-by-side if viz_enabled: figure = pair_visual( np.reshape(sample, (img_rows, img_cols, nchannels)), np.reshape(adv_x, (img_rows, img_cols, nchannels)), figure) # Add our adversarial example to our grid data grid_viz_data[target, current_class, :, :, :] = np.reshape( adv_x, (img_rows, img_cols, nchannels)) # Update the arrays for later analysis results[target, sample_ind] = res perturbations[target, sample_ind] = percent_perturb #print(perturbations[target, sample_ind]) print('--------------------------------------') print("average neighbors per modified pixel ", tn / totc) # Compute the number of adversarial examples that were successfully found nb_targets_tried = ((nb_classes - 1) * source_samples) succ_rate = float(np.sum(results)) / nb_targets_tried print('Avg. rate of successful adv. examples {0:.8f}'.format(succ_rate)) report.clean_train_adv_eval = 1. - succ_rate # Compute the average distortion introduced by the algorithm percent_perturbed = np.mean(perturbations) s = perturbations.shape myPert = np.empty(0) myResults = np.empty(0) for i in range(s[0]): for j in range(s[1]): if perturbations[i][j] > 0: myPert = np.append(myPert, perturbations[i][j]) myResults = np.append(myResults, results[i][j]) min_perturbed = np.min(myPert) max_perturbed = np.max(myPert) s2 = myResults.shape final = np.empty(0) for i in range(s2[0]): if myResults[i] > 0: final = np.append(final, myPert[i]) print('Avg. rate of perturbed features {0:.8f}'.format(percent_perturbed)) print('MIN of perturbed features {0:.8f}'.format(min_perturbed)) print('MAX of perturbed features {0:.8f}'.format(max_perturbed)) # Compute the average distortion introduced for successful samples only percent_perturb_succ = np.mean(perturbations * (results == 1)) min_perturb_succ = np.min(final) max_perturb_succ = np.max(final) print('Avg. rate of perturbed features for successful ' 'adversarial examples {0:.8f}'.format(percent_perturb_succ)) print('Min of perturbed features for successful ' 'adversarial examples {0:.8f}'.format(min_perturb_succ)) print('Max of perturbed features for successful ' 'adversarial examples {0:.8f}'.format(max_perturb_succ)) #Close TF session sess.close() # Finally, block & display a grid of all the adversarial examples if viz_enabled: import matplotlib.pyplot as plt plt.close(figure) _ = grid_visual(grid_viz_data) return report
def train_sub(sess, x, y, bbox_preds, x_sub, y_sub, nb_classes, nb_epochs_s, batch_size, learning_rate, data_aug, lmbda, aug_batch_size, rng, img_rows=28, img_cols=28, nchannels=1, preprocess=''): """ This function creates the substitute by alternatively augmenting the training data and training the substitute. :param sess: TF session :param x: input TF placeholder :param y: output TF placeholder :param bbox_preds: output of black-box model predictions :param x_sub: initial substitute training data :param y_sub: initial substitute training labels :param nb_classes: number of output classes :param nb_epochs_s: number of epochs to train substitute model :param batch_size: size of training batches :param learning_rate: learning rate for training :param data_aug: number of times substitute training data is augmented :param lmbda: lambda from arxiv.org/abs/1602.02697 :param rng: numpy.random.RandomState instance :return: """ # Define TF model graph (for the black-box model) #model_sub = ModelSubstitute('model_s', nb_classes) model_sub = make_basic_picklable_substitute(nb_classes=nb_classes) preds_sub = model_sub.get_logits(x) loss_sub = CrossEntropy(model_sub, smoothing=0) print("Defined TensorFlow model graph for the substitute.") # Define the Jacobian symbolically using TensorFlow grads = jacobian_graph(preds_sub, x, nb_classes) # Train the substitute and augment dataset alternatively for rho in xrange(data_aug): print("Substitute training epoch #" + str(rho)) train_params = { 'nb_epochs': nb_epochs_s, 'batch_size': batch_size, 'learning_rate': learning_rate } with TemporaryLogLevel(logging.WARNING, "cleverhans.utils.tf"): train(sess, loss_sub, x_sub, to_categorical(y_sub, nb_classes), init_all=False, args=train_params, rng=rng, var_list=model_sub.get_params()) # If we are not at last substitute training iteration, augment dataset if rho < data_aug - 1: print("Augmenting substitute training data.") # Perform the Jacobian augmentation lmbda_coef = 2 * int(int(rho / 3) != 0) - 1 x_sub = jacobian_augmentation(sess, x, x_sub, y_sub, grads, lmbda_coef * lmbda, aug_batch_size) print("Labeling substitute training data.") # Label the newly generated synthetic points using the black-box y_sub = np.hstack([y_sub, y_sub]) x_sub_prev = x_sub[int(len(x_sub) / 2):] eval_params = {'batch_size': batch_size} bbox_val = batch_eval(sess, [x], [bbox_preds], [x_sub_prev], args=eval_params)[0] # Note here that we take the argmax because the adversary # only has access to the label (not the probabilities) output # by the black-box model y_sub[int(len(x_sub) / 2):] = np.argmax(bbox_val, axis=1) #Now, save the graph print("save model") with sess.as_default(): save("../models/{}_{}.joblib".format(FILENAME, preprocess), model_sub) return model_sub, preds_sub
def train_with_PGN(sess, model, loss, train_type='naive', evaluate=None, args=None, rng=None, classifier_var_list=None, generator_var_list=None, save_dir=None, fprop_args=None, optimizer=None, use_ema=False, ema_decay=.998, loss_threshold=1e10, dataset_train=None, dataset_size=None): """ Run (optionally multi-replica, synchronous) training to minimize `loss` :param sess: TF session to use when training the graph :param loss: tensor, the loss to minimize :param evaluate: function that is run after each training iteration (typically to display the test/validation accuracy). :param args: dict or argparse `Namespace` object. Should contain `nb_epochs`, `learning_rate`, `batch_size` :param rng: Instance of numpy.random.RandomState :param var_list: Optional list of parameters to train. :param fprop_args: dict, extra arguments to pass to fprop (loss and model). :param optimizer: Optimizer to be used for training :param use_ema: bool If true, uses an exponential moving average of the model parameters :param ema_decay: float or callable The decay parameter for EMA, if EMA is used If a callable rather than a float, this is a callable that takes the epoch and batch as arguments and returns the ema_decay for the current batch. :param loss_threshold: float Raise an exception if the loss exceeds this value. This is intended to rapidly detect numerical problems. Sometimes the loss may legitimately be higher than this value. In such cases, raise the value. If needed it can be np.inf. :param dataset_train: tf Dataset instance. Used as a replacement for x_train, y_train for faster performance. :param dataset_size: integer, the size of the dataset_train. :return: True if model trained """ # Check whether the hardware is working correctly canary.run_canary() args = _ArgsWrapper(args or {}) fprop_args = fprop_args or {} # Check that necessary arguments were given (see doc above) # Be sure to support 0 epochs for debugging purposes if args.nb_epochs is None: raise ValueError("`args` must specify number of epochs") if optimizer is None: if args.learning_rate is None: raise ValueError("Learning rate was not given in args dict") assert args.batch_size, "Batch size was not given in args dict" assert dataset_train and dataset_size, "dataset_train or dataset_size was not given" if rng is None: rng = np.random.RandomState() if optimizer is None: optimizer = tf.train.AdamOptimizer(learning_rate = args.learning_rate) else: if not isinstance(optimizer, tf.train.Optimizer): raise ValueError("optimizer object must be from a child class of " "tf.train.Optimizer") grads_classifier = [] if train_type == 'PGN': grads_generator = [] xs = [] ys = [] data_iterator = dataset_train.make_one_shot_iterator().get_next() x_train, y_train = sess.run(data_iterator) devices = infer_devices() for device in devices: with tf.device(device): x = tf.placeholder(x_train.dtype, (None,) + x_train.shape[1:]) y = tf.placeholder(y_train.dtype, (None,) + y_train.shape[1:]) xs.append(x) ys.append(y) if train_type == 'PGN': loss_classifier, loss_generator = loss.fprop(x, y, **fprop_args) else: loss_classifier = loss.fprop(x, y, **fprop_args) grads_classifier.append(optimizer.compute_gradients(loss_classifier, var_list=classifier_var_list)) if train_type == 'PGN': grads_generator.append(optimizer.compute_gradients(loss_generator, var_list=generator_var_list)) num_devices = len(devices) print("num_devices: ", num_devices) grad_classifier = avg_grads(grads_classifier) if train_type == 'PGN': grad_generator = avg_grads(grads_generator) # Trigger update operations within the default graph (such as batch_norm). with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): train_step = optimizer.apply_gradients(grad_classifier) if train_type == 'PGN': with tf.control_dependencies([train_step]): train_step = optimizer.apply_gradients(grad_generator) var_list = classifier_var_list if train_type == 'PGN': var_list += generator_var_list if use_ema: ema = tf.train.ExponentialMovingAverage(decay=ema_decay) with tf.control_dependencies([train_step]): train_step = ema.apply(var_list) # Get pointers to the EMA's running average variables avg_params = [ema.average(param) for param in var_list] # Make temporary buffers used for swapping the live and running average # parameters tmp_params = [tf.Variable(param, trainable=False) for param in var_list] # Define the swapping operation param_to_tmp = [tf.assign(tmp, param) for tmp, param in safe_zip(tmp_params, var_list)] with tf.control_dependencies(param_to_tmp): avg_to_param = [tf.assign(param, avg) for param, avg in safe_zip(var_list, avg_params)] with tf.control_dependencies(avg_to_param): tmp_to_avg = [tf.assign(avg, tmp) for avg, tmp in safe_zip(avg_params, tmp_params)] swap = tmp_to_avg batch_size = args.batch_size assert batch_size % num_devices == 0 device_batch_size = batch_size // num_devices sess.run(tf.global_variables_initializer()) best_acc = 0.0 for epoch in xrange(args.nb_epochs): nb_batches = int(math.ceil(float(dataset_size) / batch_size)) prev = time.time() for batch in range(nb_batches): x_train_shuffled, y_train_shuffled = sess.run(data_iterator) start, end = 0, batch_size feed_dict = dict() for dev_idx in xrange(num_devices): cur_start = start + dev_idx * device_batch_size cur_end = start + (dev_idx + 1) * device_batch_size feed_dict[xs[dev_idx]] = x_train_shuffled[cur_start:cur_end] feed_dict[ys[dev_idx]] = y_train_shuffled[cur_start:cur_end] _, loss_classifier_numpy = sess.run([train_step, loss_classifier], feed_dict=feed_dict) if np.abs(loss_classifier_numpy) > loss_threshold: raise ValueError("Extreme loss_classifier during training: ", loss_classifier_numpy) if np.isnan(loss_classifier_numpy) or np.isinf(loss_classifier_numpy): raise ValueError("NaN/Inf loss_classifier during training") cur = time.time() _logger.info("Epoch " + str(epoch) + " took " + str(cur - prev) + " seconds") if evaluate is not None: if use_ema: sess.run(swap) r_value = evaluate(epoch) if use_ema: sess.run(swap) if use_ema: sess.run(swap) with sess.as_default(): save_path = os.path.join(save_dir,'model.joblib') save(save_path, model) return True
def mnist_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, clean_train=CLEAN_TRAIN, testing=False, backprop_through_attack=BACKPROP_THROUGH_ATTACK, nb_filters=NB_FILTERS, num_threads=None, label_smoothing=0.1): """ MNIST cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get MNIST test data mnist = MNIST(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) x_train, y_train = mnist.get_set('train') x_test, y_test = mnist.get_set('test') # Use Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } eval_params = {'batch_size': batch_size} fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} rng = np.random.RandomState([2017, 8, 30]) def do_eval(preds, x_set, y_set, report_key, is_adv=None): acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params) setattr(report, report_key, acc) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' if report_text: print('Test accuracy on %s examples: %0.4f' % (report_text, acc)) if clean_train: model = make_basic_picklable_cnn() # Tag the model so that when it is saved to disk, future scripts will # be able to tell what data it was trained on model.dataset_factory = mnist.get_factory() preds = model.get_logits(x) assert len(model.get_params()) > 0 loss = CrossEntropy(model, smoothing=label_smoothing) def evaluate(): do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False) train(sess, loss, x_train, y_train, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params()) with sess.as_default(): save("clean_model.joblib", model) print("Now that the model has been saved, you can evaluate it in a" " separate process using `evaluate_pickled_model.py`. " "You should get exactly the same result for both clean and " "adversarial accuracy as you get within this program.") # Calculate training error if testing: do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval') # Initialize the Fast Gradient Sign Method (FGSM) attack object and # graph fgsm = FastGradientMethod(model, sess=sess) adv_x = fgsm.generate(x, **fgsm_params) preds_adv = model.get_logits(adv_x) # Evaluate the accuracy of the MNIST model on adversarial examples do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True) # Calculate training error if testing: do_eval(preds_adv, x_train, y_train, 'train_clean_train_adv_eval') print('Repeating the process, using adversarial training') # Create a new model and train it to be robust to FastGradientMethod model2 = make_basic_picklable_cnn() # Tag the model so that when it is saved to disk, future scripts will # be able to tell what data it was trained on model2.dataset_factory = mnist.get_factory() fgsm2 = FastGradientMethod(model2, sess=sess) def attack(x): return fgsm2.generate(x, **fgsm_params) loss2 = CrossEntropy(model2, smoothing=label_smoothing, attack=attack) preds2 = model2.get_logits(x) adv_x2 = attack(x) if not backprop_through_attack: # For the fgsm attack used in this tutorial, the attack has zero # gradient so enabling this flag does not change the gradient. # For some other attacks, enabling this flag increases the cost of # training, but gives the defender the ability to anticipate how # the atacker will change their strategy in response to updates to # the defender's parameters. adv_x2 = tf.stop_gradient(adv_x2) preds2_adv = model2.get_logits(adv_x2) def evaluate2(): # Accuracy of adversarially trained model on legitimate test inputs do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False) # Accuracy of the adversarially trained model on adversarial examples do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True) # Perform and evaluate adversarial training train(sess, loss2, x_train, y_train, evaluate=evaluate2, args=train_params, rng=rng, var_list=model2.get_params()) with sess.as_default(): save("adv_model.joblib", model2) print( "Now that the model has been saved, you can evaluate it in a " "separate process using " "`python evaluate_pickled_model.py adv_model.joblib`. " "You should get exactly the same result for both clean and " "adversarial accuracy as you get within this program." " You can also move beyond the tutorials directory and run the " " real `compute_accuracy.py` script (make sure cleverhans/scripts " "is in your PATH) to see that this FGSM-trained " "model is actually not very robust---it's just a model that trains " " quickly so the tutorial does not take a long time") # Calculate training errors if testing: do_eval(preds2, x_train, y_train, 'train_adv_train_clean_eval') do_eval(preds2_adv, x_train, y_train, 'train_adv_train_adv_eval') return report
def compute_geodesic_matrices(): mnist = MNIST(train_start=0, train_end=FLAGS.nb_train, test_start=0, test_end=1000) x_train, y_train = mnist.get_set('train') x_test, y_test = mnist.get_set('test') # Use Image Parameters. img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] with get_tensorflow_session() as sess: with tf.variable_scope('dknn'): tf.set_random_seed(FLAGS.seed) np.random.seed(int(FLAGS.seed)) # Define input TF placeholder. x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Define a model. model = make_basic_picklable_cnn() preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=0.) # Define the test set accuracy evaluation. def evaluate(): acc = model_eval(sess, x, y, preds, x_test, y_test, args={'batch_size': FLAGS.batch_size}) print('Test accuracy on test examples: %0.4f' % acc) # Train the model train_params = { 'nb_epochs': FLAGS.nb_epochs, 'batch_size': FLAGS.batch_size, 'learning_rate': FLAGS.lr } model_filepath = "../data/model.joblib" path = Path(model_filepath) if path.is_file(): model = serial.load(model_filepath) else: train(sess, loss, x_train, y_train, evaluate=evaluate, args=train_params, var_list=model.get_params()) serial.save(model_filepath, model) # Define callable that returns a dictionary of all activations for a dataset def get_activations(data): data_activations = {} for layer in layers: layer_sym = tf.layers.flatten(model.get_layer(x, layer)) data_activations[layer] = batch_eval( sess, [x], [layer_sym], [data], args={'batch_size': FLAGS.batch_size})[0] return data_activations # Use a holdout of the test set to simulate calibration data for the DkNN. train_data = x_train train_labels = np.argmax(y_train, axis=1) cali_data = x_test[:FLAGS.nb_cali] y_cali = y_test[:FLAGS.nb_cali] cali_labels = np.argmax(y_cali, axis=1) test_data = x_test[FLAGS.nb_cali:] y_test = y_test[FLAGS.nb_cali:] # Extract representations for the training and calibration data at each layer of interest to the DkNN. layers = ['ReLU1', 'ReLU3', 'ReLU5', 'logits'] # Wrap the model into a DkNNModel dknn = DkNNModel(FLAGS.neighbors, layers, get_activations, train_data, train_labels, nb_classes, scope='dknn') # Compute matrix for each layer geodesic_matrices = [] for layer in layers: print(layer) activations = dknn.train_activations[layer] geodesic_matrix = hard_geodesics_euclidean_kernel( activations, FLAGS.proto_neighbors) geodesic_matrices.append(geodesic_matrix) matrix_path = '../results/geodesic_matrices_' + str( FLAGS.nb_train) + '_' + str(FLAGS.proto_neighbors) + '.pkl' with open(matrix_path, 'wb') as f: pickle.dump(geodesic_matrices, f) return True