def main(argv=None): """ Make a confidence report and save it to disk. """ assert len(argv) >= 3 _name_of_script = argv[0] model_filepath = argv[1] adv_x_filepaths = argv[2:] sess = tf.Session() with sess.as_default(): model = serial.load(model_filepath) factory = model.dataset_factory factory.kwargs['train_start'] = FLAGS.train_start factory.kwargs['train_end'] = FLAGS.train_end factory.kwargs['test_start'] = FLAGS.test_start factory.kwargs['test_end'] = FLAGS.test_end dataset = factory() adv_x_list = [np.load(filepath) for filepath in adv_x_filepaths] x, y = dataset.get_set(FLAGS.which_set) for adv_x in adv_x_list: assert adv_x.shape == x.shape, (adv_x.shape, x.shape) # Make sure these were made for the right dataset with right scaling # arguments, etc. assert adv_x.min() >= 0. - dataset.kwargs['center'] * dataset.max_val assert adv_x.max() <= dataset.max_val data_range = dataset.max_val * (1. + dataset.kwargs['center']) if adv_x.max() - adv_x.min() <= .8 * data_range: warnings.warn( "Something is weird. Your adversarial examples use " "less than 80% of the data range." "This might mean you generated them for a model with " "inputs in [0, 1] and are now using them for a model " "with inputs in [0, 255] or something like that. " "Or it could be OK if you're evaluating on a very small " "batch.") report_path = FLAGS.report_path if report_path is None: suffix = "_bundled_examples_report.joblib" assert model_filepath.endswith('.joblib') report_path = model_filepath[:-len('.joblib')] + suffix goal = MaxConfidence() bundle_examples_with_goal(sess, model, adv_x_list, y, goal, report_path, batch_size=FLAGS.batch_size)
def test_save_and_load_var(self): """test_save_and_load_var: Test that we can save and load a PicklableVariable with joblib """ sess = tf.Session() with sess.as_default(): x = np.ones(1) xv = PicklableVariable(x) xv.var.initializer.run() save("/tmp/var.joblib", xv) sess.run(tf.assign(xv.var, np.ones(1) * 2)) new_xv = load("/tmp/var.joblib") self.assertClose(sess.run(xv.var), np.ones(1) * 2) self.assertClose(sess.run(new_xv.var), np.ones(1))
def print_accuracies(filepath, train_start=TRAIN_START, train_end=TRAIN_END, test_start=TEST_START, test_end=TEST_END, batch_size=BATCH_SIZE, which_set=WHICH_SET, base_eps_iter=BASE_EPS_ITER, nb_iter=NB_ITER): """ Load a saved model and print out its accuracy on different data distributions This function works by running a single attack on each example. This provides a reasonable estimate of the true failure rate quickly, so long as the model does not suffer from gradient masking. However, this estimate is mostly intended for development work and not for publication. A more accurate estimate may be obtained by running an attack bundler instead. :param filepath: path to model to evaluate :param train_start: index of first training set example to use :param train_end: index of last training set example to use :param test_start: index of first test set example to use :param test_end: index of last test set example to use :param batch_size: size of evaluation batches :param which_set: 'train' or 'test' :param base_eps_iter: step size if the data were in [0,1] (Step size will be rescaled proportional to the actual data range) :param nb_iter: Number of iterations of PGD to run per class """ # Set TF random seed to improve reproducibility tf.set_random_seed(20181014) set_log_level(logging.INFO) sess = tf.Session() with sess.as_default(): model = load(filepath) assert len(model.get_params()) > 0 factory = model.dataset_factory factory.kwargs['train_start'] = train_start factory.kwargs['train_end'] = train_end factory.kwargs['test_start'] = test_start factory.kwargs['test_end'] = test_end dataset = factory() x_data, y_data = dataset.get_set(which_set) impl(sess, model, dataset, factory, x_data, y_data, base_eps_iter, nb_iter)
def test_save_load_confidence_report(): """ Test that a confidence report can be loaded and saved. """ report = ConfidenceReport() num_examples = 2 clean_correctness = np.zeros((num_examples, ), dtype=np.bool) clean_confidence = np.zeros((num_examples, ), dtype=np.float32) adv_correctness = clean_correctness.copy() adv_confidence = clean_confidence.copy() report['clean'] = ConfidenceReportEntry(clean_correctness, clean_confidence) report['adv'] = ConfidenceReportEntry(adv_correctness, adv_confidence) report.completed = True filepath = ".test_confidence_report.joblib" serial.save(filepath, report) report = serial.load(filepath)
def plot_report_from_path(path, success_name=DEFAULT_SUCCESS_NAME, fail_names=DEFAULT_FAIL_NAMES, label=None, is_max_confidence=True, linewidth=LINEWIDTH, plot_upper_bound=True): """ Plots a success-fail curve from a confidence report stored on disk, :param path: string filepath for the stored report. (Should be the output of make_confidence_report*.py) :param success_name: The name (confidence report key) of the data that should be used to measure success rate :param fail_names: A list of names (confidence report keys) of the data that should be used to measure failure rate. *Only one of these keys will be plotted*. Each key will be tried in order until one is found in the report. This is to support both the output of `make_confidence_report` and `make_confidence_report_bundled`. :param label: Optional string. Name to use for this curve in the legend. :param is_max_confidence: bool. If True, when measuring the failure rate, treat the data as the output of a maximum confidence attack procedure. This means that the attack is optimal (assuming the underlying optimizer is good enough, *which is probably false*, so interpret the plot accordingly) for thresholds >= .5 but for lower thresholds the observed failure rate is a lower bound on the true worst failure rate and the observed coverage is an upper bound (assuming good enough optimization) on the true failure rate. The plot thus draws the threshold >= .5 portion of the curve with a solid line and the upper and lower bounds with a dashed line. See https://openreview.net/forum?id=H1g0piA9tQ for details. If False, the attack procedure is regarded as an ad hoc way of obtaining a loose lower bound, and thus the whole curve is drawn with dashed lines. :param linewidth: thickness of the line to draw :param plot_upper_bound: include upper bound on error rate in plot """ report = load(path) plot_report(report, success_name, fail_names, label, is_max_confidence, linewidth, plot_upper_bound)
def evaluate_model(filepath, train_start=0, train_end=60000, test_start=0, test_end=10000, batch_size=128, testing=False, num_threads=None): """ Run evaluation on a saved model :param filepath: path to model to evaluate :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param batch_size: size of evaluation batches """ # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.INFO) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get MNIST test data mnist = MNIST(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) x_train, y_train = mnist.get_set('train') x_test, y_test = mnist.get_set('test') # Use Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) eval_params = {'batch_size': batch_size} fgsm_params = { 'eps': 0.3, 'clip_min': 0., 'clip_max': 1. } def do_eval(preds, x_set, y_set, report_key, is_adv=None): acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' if report_text: print('Test accuracy on %s examples: %0.4f' % (report_text, acc)) with sess.as_default(): model = load(filepath) assert len(model.get_params()) > 0 # Initialize the Fast Gradient Sign Method (FGSM) attack object and # graph fgsm = FastGradientMethod(model, sess=sess) adv_x = fgsm.generate(x, **fgsm_params) preds_adv = model.get_logits(adv_x) preds = model.get_logits(x) # Evaluate the accuracy of the MNIST model on adversarial examples do_eval(preds, x_test, y_test, 'train_clean_train_clean_eval', False) do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True)
def make_confidence_report(filepath, train_start=TRAIN_START, train_end=TRAIN_END, test_start=TEST_START, test_end=TEST_END, batch_size=BATCH_SIZE, which_set=WHICH_SET, mc_batch_size=MC_BATCH_SIZE, report_path=REPORT_PATH, base_eps_iter=BASE_EPS_ITER, nb_iter=NB_ITER, save_advx=SAVE_ADVX): """ Load a saved model, gather its predictions, and save a confidence report. This function works by running a single MaxConfidence attack on each example. This provides a reasonable estimate of the true failure rate quickly, so long as the model does not suffer from gradient masking. However, this estimate is mostly intended for development work and not for publication. A more accurate estimate may be obtained by running make_confidence_report_bundled.py instead. :param filepath: path to model to evaluate :param train_start: index of first training set example to use :param train_end: index of last training set example to use :param test_start: index of first test set example to use :param test_end: index of last test set example to use :param batch_size: size of evaluation batches :param which_set: 'train' or 'test' :param mc_batch_size: batch size for MaxConfidence attack :param base_eps_iter: step size if the data were in [0,1] (Step size will be rescaled proportional to the actual data range) :param nb_iter: Number of iterations of PGD to run per class :param save_advx: bool. If True, saves the adversarial examples to disk. On by default, but can be turned off to save memory, etc. """ # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.INFO) # Create TF session sess = tf.Session() if report_path is None: assert filepath.endswith('.joblib') report_path = filepath[:-len('.joblib')] + "_report.joblib" with sess.as_default(): model = load(filepath) assert len(model.get_params()) > 0 factory = model.dataset_factory factory.kwargs['train_start'] = train_start factory.kwargs['train_end'] = train_end factory.kwargs['test_start'] = test_start factory.kwargs['test_end'] = test_end dataset = factory() center = dataset.kwargs['center'] max_val = dataset.kwargs['max_val'] value_range = max_val * (1. + center) min_value = 0. - center * max_val if 'CIFAR' in str(factory.cls): base_eps = 8. / 255. if base_eps_iter is None: base_eps_iter = 2. / 255. elif 'MNIST' in str(factory.cls): base_eps = .3 if base_eps_iter is None: base_eps_iter = .1 else: raise NotImplementedError(str(factory.cls)) mc_params = { 'eps': base_eps * value_range, 'eps_iter': base_eps_iter * value_range, 'nb_iter': nb_iter, 'clip_min': min_value, 'clip_max': max_val } x_data, y_data = dataset.get_set(which_set) report = ConfidenceReport() semantic = Semantic(model, center, max_val, sess) mc = MaxConfidence(model, sess=sess) jobs = [('clean', None, None, None, False), ('Semantic', semantic, None, None, False), ('mc', mc, mc_params, mc_batch_size, True)] for job in jobs: name, attack, attack_params, job_batch_size, save_this_job = job if job_batch_size is None: job_batch_size = batch_size t1 = time.time() if save_advx and save_this_job: # If we want to save the adversarial examples to the filesystem, we need # to fetch all of them. Otherwise they're just computed one batch at a # time and discarded # The path to save to assert report_path.endswith('.joblib') advx_path = report_path[:-len('.joblib')] + '_advx_' + name + '.npy' # Fetch the adversarial examples x_data = run_attack(sess, model, x_data, y_data, attack, attack_params, batch_size=job_batch_size, devices=devices) # Turn off the attack so `correctness_and_confidence` won't run it a # second time. attack = None attack_params = None # Save the adversarial examples np.save(advx_path, x_data) # Run correctness and confidence evaluation on adversarial examples packed = correctness_and_confidence(sess, model, x_data, y_data, batch_size=job_batch_size, devices=devices, attack=attack, attack_params=attack_params) t2 = time.time() print("Evaluation took", t2 - t1, "seconds") correctness, confidence = packed report[name] = ConfidenceReportEntry(correctness=correctness, confidence=confidence) print_stats(correctness, confidence, name) save(report_path, report)
def make_confidence_report_bundled(filepath, train_start=TRAIN_START, train_end=TRAIN_END, test_start=TEST_START, test_end=TEST_END, which_set=WHICH_SET, recipe=RECIPE, report_path=REPORT_PATH, nb_iter=NB_ITER, base_eps=None, base_eps_iter=None, base_eps_iter_small=None, batch_size=BATCH_SIZE): """ Load a saved model, gather its predictions, and save a confidence report. :param filepath: path to model to evaluate :param train_start: index of first training set example to use :param train_end: index of last training set example to use :param test_start: index of first test set example to use :param test_end: index of last test set example to use :param which_set: 'train' or 'test' :param nb_iter: int, number of iterations of attack algorithm (note that different recipes will use this differently, for example many will run two attacks, one with nb_iter iterations and one with 25X more) :param base_eps: float, epsilon parameter for threat model, on a scale of [0, 1]. Inferred from the dataset if not specified. :param base_eps_iter: float, a step size used in different ways by different recipes. Typically the step size for a PGD attack. Inferred from the dataset if not specified. :param base_eps_iter_small: float, a second step size for a more fine-grained attack. Inferred from the dataset if not specified. :param batch_size: int, batch size """ # Avoid circular import from src.FGSM.cleverhans.cleverhans import attack_bundling if callable(recipe): run_recipe = recipe else: run_recipe = getattr(attack_bundling, recipe) # Set logging level to see debug information set_log_level(logging.INFO) # Create TF session sess = tf.Session() assert filepath.endswith('.joblib') if report_path is None: report_path = filepath[:-len('.joblib')] + "_bundled_report.joblib" with sess.as_default(): model = load(filepath) assert len(model.get_params()) > 0 factory = model.dataset_factory factory.kwargs['train_start'] = train_start factory.kwargs['train_end'] = train_end factory.kwargs['test_start'] = test_start factory.kwargs['test_end'] = test_end dataset = factory() center = dataset.kwargs['center'] if 'max_val' in factory.kwargs: max_value = factory.kwargs['max_val'] elif hasattr(dataset, 'max_val'): max_value = dataset.max_val else: raise AttributeError("Can't find max_value specification") min_value = 0. - center * max_value value_range = max_value - min_value if 'CIFAR' in str(factory.cls): if base_eps is None: base_eps = 8. / 255. if base_eps_iter is None: base_eps_iter = 2. / 255. if base_eps_iter_small is None: base_eps_iter_small = 1. / 255. elif 'MNIST' in str(factory.cls): if base_eps is None: base_eps = .3 if base_eps_iter is None: base_eps_iter = .1 base_eps_iter_small = None else: # Note that it is not required to specify base_eps_iter_small if base_eps is None or base_eps_iter is None: raise NotImplementedError("Not able to infer threat model from " + str(factory.cls)) eps = base_eps * value_range eps_iter = base_eps_iter * value_range if base_eps_iter_small is None: eps_iter_small = None else: eps_iter_small = base_eps_iter_small * value_range clip_min = min_value clip_max = max_value x_data, y_data = dataset.get_set(which_set) assert x_data.max() <= max_value assert x_data.min() >= min_value assert eps_iter <= eps assert eps_iter_small is None or eps_iter_small <= eps # Different recipes take different arguments. # For now I don't have an idea for a beautiful unifying framework, so # we get an if statement. if recipe == 'random_search_max_confidence_recipe': # pylint always checks against the default recipe here # pylint: disable=no-value-for-parameter run_recipe(sess=sess, model=model, x=x_data, y=y_data, eps=eps, clip_min=clip_min, clip_max=clip_max, report_path=report_path) else: run_recipe(sess=sess, model=model, x=x_data, y=y_data, nb_classes=dataset.NB_CLASSES, eps=eps, clip_min=clip_min, clip_max=clip_max, eps_iter=eps_iter, nb_iter=nb_iter, report_path=report_path, eps_iter_small=eps_iter_small, batch_size=batch_size)
def make_confidence_report_spsa(filepath, train_start=TRAIN_START, train_end=TRAIN_END, test_start=TEST_START, test_end=TEST_END, batch_size=BATCH_SIZE, which_set=WHICH_SET, report_path=REPORT_PATH, nb_iter=NB_ITER_SPSA, spsa_samples=SPSA_SAMPLES, spsa_iters=SPSA.DEFAULT_SPSA_ITERS): """ Load a saved model, gather its predictions, and save a confidence report. This function works by running a single MaxConfidence attack on each example, using SPSA as the underyling optimizer. This is not intended to be a strong generic attack. It is intended to be a test to uncover gradient masking. :param filepath: path to model to evaluate :param train_start: index of first training set example to use :param train_end: index of last training set example to use :param test_start: index of first test set example to use :param test_end: index of last test set example to use :param batch_size: size of evaluation batches :param which_set: 'train' or 'test' :param nb_iter: Number of iterations of PGD to run per class :param spsa_samples: Number of samples for SPSA """ # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.INFO) # Create TF session sess = tf.Session() if report_path is None: assert filepath.endswith('.joblib') report_path = filepath[:-len('.joblib')] + "_spsa_report.joblib" with sess.as_default(): model = load(filepath) assert len(model.get_params()) > 0 factory = model.dataset_factory factory.kwargs['train_start'] = train_start factory.kwargs['train_end'] = train_end factory.kwargs['test_start'] = test_start factory.kwargs['test_end'] = test_end dataset = factory() center = dataset.kwargs['center'] center = np.float32(center) max_val = dataset.kwargs['max_val'] max_val = np.float32(max_val) value_range = max_val * (1. + center) min_value = np.float32(0. - center * max_val) if 'CIFAR' in str(factory.cls): base_eps = 8. / 255. elif 'MNIST' in str(factory.cls): base_eps = .3 else: raise NotImplementedError(str(factory.cls)) eps = np.float32(base_eps * value_range) clip_min = min_value clip_max = max_val x_data, y_data = dataset.get_set(which_set) nb_classes = dataset.NB_CLASSES spsa_max_confidence_recipe(sess, model, x_data, y_data, nb_classes, eps, clip_min, clip_max, nb_iter, report_path, spsa_samples=spsa_samples, spsa_iters=spsa_iters, eval_batch_size=batch_size)