Beispiel #1
0
def get_labels(file_name, x_set):
    """
    Returns the predicted labels of the input array.

    Parameters
    ----------
    file_name: str
        The name of the joblib.
    x_set: numpy.ndarray
        The input array.
    """

    tf.reset_default_graph()

    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))

    with tf.Session() as sess:
        model = load("models/joblibs/" + file_name + ".joblib")
        last = model(x)

        z = sess.run(last, feed_dict={x: x_set})

        sess.close()

    return z
Beispiel #2
0
def plot_report_from_path(path, success_name=DEFAULT_SUCCESS_NAME,
                          fail_names=DEFAULT_FAIL_NAMES, label=None,
                          is_max_confidence=True,
                          linewidth=LINEWIDTH):
  """
  Plots a success-fail curve from a confidence report stored on disk,
  :param path: string filepath for the stored report.
    (Should be the output of make_confidence_report*.py)
  :param success_name: The name (confidence report key) of the data that
    should be used to measure success rate
  :param fail_names: A list of names (confidence report keys) of the data
    that should be used to measure failure rate.
    *Only one of these keys will be plotted*. Each key will be tried in
    order until one is found in the report. This is to support both the
    output of `make_confidence_report` and `make_confidence_report_bundled`.
  :param label: Optional string. Name to use for this curve in the legend.
  :param is_max_confidence: bool.
    If True, when measuring the failure rate, treat the data as the output
    of a maximum confidence attack procedure.
    This means that the attack is optimal (assuming the underlying optimizer
    is good enough, *which is probably false*, so interpret the plot
    accordingly) for thresholds >= .5 but for lower thresholds the observed
    failure rate is a lower bound on the true worst failure rate and the
    observed coverage is an upper bound (assuming good enough optimization)
    on the true failure rate.
    The plot thus draws the threshold >= .5 portion of the curve with a solid
    line and the upper and lower bounds with a dashed line.
    See https://openreview.net/forum?id=H1g0piA9tQ for details.
    If False, the attack procedure is regarded as an ad hoc way of obtaining
    a loose lower bound, and thus the whole curve is drawn with dashed lines.
  :param linewidth: thickness of the line to draw
  """
  report = load(path)
  plot_report(report, success_name, fail_names, label, is_max_confidence,
              linewidth)
Beispiel #3
0
def model_testing(file_name, x_train, y_train, x_test, y_test):
    """
    Runs the evaluation and prints out the results.

    Parameters
    ----------
    file_name: str
        The name of the joblib file.
    x_train: numpy.ndarray
        The input array of the train dataset.
    y_train: numpy.ndarray
        The output array of the train dataset.
    x_test: numpy.ndarray
        The input array of the test dataset.
    y_test: numpy.ndarray
        The output array of the test dataset.
    """

    session = tf.Session()

    with session.as_default():
        model = load("models/joblibs/" + file_name)

    img_rows, img_cols, channels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, channels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    eval_params = {"batch_size": 128}

    predictions = model.get_logits(x)

    evaluate(session, x, y, predictions, x_train, y_train, x_test, y_test,
             eval_params)
Beispiel #4
0
def main(argv=None):
    """
  Make a confidence report and save it to disk.
  """
    assert len(argv) >= 3
    _name_of_script = argv[0]
    model_filepath = argv[1]
    adv_x_filepaths = argv[2:]

    sess = tf.compat.v1.Session()
    with sess.as_default():
        model = serial.load(model_filepath)

    factory = model.dataset_factory
    factory.kwargs['train_start'] = FLAGS.train_start
    factory.kwargs['train_end'] = FLAGS.train_end
    factory.kwargs['test_start'] = FLAGS.test_start
    factory.kwargs['test_end'] = FLAGS.test_end
    dataset = factory()

    adv_x_list = [np.load(filepath) for filepath in adv_x_filepaths]
    x, y = dataset.get_set(FLAGS.which_set)
    for adv_x in adv_x_list:
        assert adv_x.shape == x.shape, (adv_x.shape, x.shape)
        # Make sure these were made for the right dataset with right scaling
        # arguments, etc.
        assert adv_x.min() >= 0. - dataset.kwargs['center'] * dataset.max_val
        assert adv_x.max() <= dataset.max_val
        data_range = dataset.max_val * (1. + dataset.kwargs['center'])

        if adv_x.max() - adv_x.min() <= .8 * data_range:
            warnings.warn(
                "Something is weird. Your adversarial examples use "
                "less than 80% of the data range."
                "This might mean you generated them for a model with "
                "inputs in [0, 1] and are now using them for a model "
                "with inputs in [0, 255] or something like that. "
                "Or it could be OK if you're evaluating on a very small "
                "batch.")

    report_path = FLAGS.report_path
    if report_path is None:
        suffix = "_bundled_examples_report.joblib"
        assert model_filepath.endswith('.joblib')
        report_path = model_filepath[:-len('.joblib')] + suffix

    goal = MaxConfidence()
    bundle_examples_with_goal(sess,
                              model,
                              adv_x_list,
                              y,
                              goal,
                              report_path,
                              batch_size=FLAGS.batch_size)
Beispiel #5
0
    def load_model(self, model_dir):
        data_filepath = os.path.join(model_dir, "model.joblib")
        path = Path(data_filepath)

        if path.is_file():
            print('Loading model from:\n {}'.format(data_filepath) + '\n')
            model = serial.load(data_filepath)

        else:
            print('Model path {} does not exist'.format(data_filepath))

        return model
Beispiel #6
0
def print_accuracies(
    filepath,
    train_start=TRAIN_START,
    train_end=TRAIN_END,
    test_start=TEST_START,
    test_end=TEST_END,
    batch_size=BATCH_SIZE,
    which_set=WHICH_SET,
    base_eps_iter=BASE_EPS_ITER,
    nb_iter=NB_ITER,
):
    """
    Load a saved model and print out its accuracy on different data distributions

    This function works by running a single attack on each example.
    This provides a reasonable estimate of the true failure rate quickly, so
    long as the model does not suffer from gradient masking.
    However, this estimate is mostly intended for development work and not
    for publication. A more accurate estimate may be obtained by running
    an attack bundler instead.

    :param filepath: path to model to evaluate
    :param train_start: index of first training set example to use
    :param train_end: index of last training set example to use
    :param test_start: index of first test set example to use
    :param test_end: index of last test set example to use
    :param batch_size: size of evaluation batches
    :param which_set: 'train' or 'test'
    :param base_eps_iter: step size if the data were in [0,1]
      (Step size will be rescaled proportional to the actual data range)
    :param nb_iter: Number of iterations of PGD to run per class
    """

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(20181014)
    set_log_level(logging.INFO)
    sess = tf.Session()

    with sess.as_default():
        model = load(filepath)
    assert len(model.get_params()) > 0
    factory = model.dataset_factory
    factory.kwargs["train_start"] = train_start
    factory.kwargs["train_end"] = train_end
    factory.kwargs["test_start"] = test_start
    factory.kwargs["test_end"] = test_end
    dataset = factory()

    x_data, y_data = dataset.get_set(which_set)

    impl(sess, model, dataset, factory, x_data, y_data, base_eps_iter, nb_iter)
 def test_save_and_load_var(self):
   """test_save_and_load_var: Test that we can save and load a
   PicklableVariable with joblib
   """
   sess = tf.Session()
   with sess.as_default():
     x = np.ones(1)
     xv = PicklableVariable(x)
     xv.var.initializer.run()
     save("/tmp/var.joblib", xv)
     sess.run(tf.assign(xv.var, np.ones(1) * 2))
     new_xv = load("/tmp/var.joblib")
     self.assertClose(sess.run(xv.var), np.ones(1) * 2)
     self.assertClose(sess.run(new_xv.var), np.ones(1))
def test_save_load_confidence_report():
  """
  Test that a confidence report can be loaded and saved.
  """
  report = ConfidenceReport()
  num_examples = 2
  clean_correctness = np.zeros((num_examples,), dtype=np.bool)
  clean_confidence = np.zeros((num_examples,), dtype=np.float32)
  adv_correctness = clean_correctness.copy()
  adv_confidence = clean_confidence.copy()
  report['clean'] = ConfidenceReportEntry(clean_correctness, clean_confidence)
  report['adv'] = ConfidenceReportEntry(adv_correctness, adv_confidence)
  report.completed = True
  filepath = ".test_confidence_report.joblib"
  serial.save(filepath, report)
  report = serial.load(filepath)
Beispiel #9
0
def compute_geodesic_matrices():
    mnist = MNIST(train_start=0,
                  train_end=FLAGS.nb_train,
                  test_start=0,
                  test_end=1000)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    # Use Image Parameters.
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    with get_tensorflow_session() as sess:
        with tf.variable_scope('dknn'):
            tf.set_random_seed(FLAGS.seed)
            np.random.seed(int(FLAGS.seed))

            # Define input TF placeholder.
            x = tf.placeholder(tf.float32,
                               shape=(None, img_rows, img_cols, nchannels))
            y = tf.placeholder(tf.float32, shape=(None, nb_classes))

            # Define a model.
            model = make_basic_picklable_cnn()
            preds = model.get_logits(x)
            loss = CrossEntropy(model, smoothing=0.)

            # Define the test set accuracy evaluation.
            def evaluate():
                acc = model_eval(sess,
                                 x,
                                 y,
                                 preds,
                                 x_test,
                                 y_test,
                                 args={'batch_size': FLAGS.batch_size})
                print('Test accuracy on test examples: %0.4f' % acc)

            # Train the model
            train_params = {
                'nb_epochs': FLAGS.nb_epochs,
                'batch_size': FLAGS.batch_size,
                'learning_rate': FLAGS.lr
            }

            model_filepath = "../data/model.joblib"
            path = Path(model_filepath)

            if path.is_file():
                model = serial.load(model_filepath)
            else:
                train(sess,
                      loss,
                      x_train,
                      y_train,
                      evaluate=evaluate,
                      args=train_params,
                      var_list=model.get_params())
                serial.save(model_filepath, model)

            # Define callable that returns a dictionary of all activations for a dataset
            def get_activations(data):
                data_activations = {}
                for layer in layers:
                    layer_sym = tf.layers.flatten(model.get_layer(x, layer))
                    data_activations[layer] = batch_eval(
                        sess, [x], [layer_sym], [data],
                        args={'batch_size': FLAGS.batch_size})[0]
                return data_activations

            # Use a holdout of the test set to simulate calibration data for the DkNN.
            train_data = x_train
            train_labels = np.argmax(y_train, axis=1)
            cali_data = x_test[:FLAGS.nb_cali]
            y_cali = y_test[:FLAGS.nb_cali]
            cali_labels = np.argmax(y_cali, axis=1)
            test_data = x_test[FLAGS.nb_cali:]
            y_test = y_test[FLAGS.nb_cali:]

            # Extract representations for the training and calibration data at each layer of interest to the DkNN.
            layers = ['ReLU1', 'ReLU3', 'ReLU5', 'logits']

            # Wrap the model into a DkNNModel
            dknn = DkNNModel(FLAGS.neighbors,
                             layers,
                             get_activations,
                             train_data,
                             train_labels,
                             nb_classes,
                             scope='dknn')

    # Compute matrix for each layer
    geodesic_matrices = []
    for layer in layers:
        print(layer)
        activations = dknn.train_activations[layer]
        geodesic_matrix = hard_geodesics_euclidean_kernel(
            activations, FLAGS.proto_neighbors)
        geodesic_matrices.append(geodesic_matrix)

    matrix_path = '../results/geodesic_matrices_' + str(
        FLAGS.nb_train) + '_' + str(FLAGS.proto_neighbors) + '.pkl'
    with open(matrix_path, 'wb') as f:
        pickle.dump(geodesic_matrices, f)

    return True
Beispiel #10
0
  print_report.py model_report.joblib
Prints out some basic statistics stored in a pickled ConfidenceReport
"""
import sys
import warnings

from cleverhans.confidence_report import ConfidenceReport
from cleverhans.serial import load

if len(sys.argv) == 2:
    # pylint doesn't realize that sys.argv will change at runtime
    # pylint:disable=unbalanced-tuple-unpacking
    _, path = sys.argv
else:
    raise ValueError("Wrong number of arguments")
the_report = load(path)


def current(report):
    """
  The current implementation of report printing.
  :param report: ConfidenceReport
  """
    if hasattr(report, "completed"):
        if report.completed:
            print("Report completed")
        else:
            print("REPORT NOT COMPLETED")
    else:
        warnings.warn(
            "This report does not indicate whether it is completed. Support for reports without a `completed`"
Beispiel #11
0
def mnist_tutorial_jsma(train_start=0,
                        train_end=60000,
                        test_start=0,
                        test_end=10000,
                        viz_enabled=VIZ_ENABLED,
                        nb_epochs=NB_EPOCHS,
                        batch_size=BATCH_SIZE,
                        source_samples=SOURCE_SAMPLES,
                        learning_rate=LEARNING_RATE):
    """
  MNIST tutorial for the Jacobian-based saliency map approach (JSMA)
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param viz_enabled: (boolean) activate plots of adversarial examples
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param nb_classes: number of output classes
  :param source_samples: number of test inputs to attack
  :param learning_rate: learning rate for training
  :return: an AccuracyReport object
  """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session and set as Keras backend session
    #replace
    num_threads = None
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))
    #with sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    mnist = MNIST(train_start=train_start,
                  train_end=train_end,
                  test_start=test_start,
                  test_end=test_end)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]
    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    nb_filters = 64

    # Define TF model graph
    model = make_basic_picklable_cnn()

    preds = model.get_logits(x)
    loss = CrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    dataset = tf.data.Dataset.from_tensor_slices(
        (tf.reshape(x_train, [60000, 28, 28]), y_train))
    dataset = dataset.batch(32)
    val_dataset = tf.data.Dataset.from_tensor_slices(
        (tf.reshape(x_test, [10000, 28, 28]), y_test))
    val_dataset = val_dataset.batch(32)

    sess.run(tf.global_variables_initializer())
    rng = np.random.RandomState([2017, 8, 30])
    if TRAIN_NEW == 1:
        with sess.as_default():
            train(sess, loss, x_train, y_train, args=train_params, rng=rng)
            save("test.joblib", model)
    else:
        with sess.as_default():
            model = load("test.joblib")  #changed
        assert len(model.get_params()) > 0
        preds = model.get_logits(x)
        loss = CrossEntropy(model, smoothing=0.1)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using the Jacobian-based saliency map approach
    ###########################################################################
    print('Crafting ' + str(source_samples) + ' * ' + str(nb_classes - 1) +
          ' adversarial examples')

    # Keep track of success (adversarial example classified in target)
    results = np.zeros((nb_classes, source_samples), dtype='i')

    # Rate of perturbed features for each test set example and target class
    perturbations = np.zeros((nb_classes, source_samples), dtype='f')

    # Initialize our array for grid visualization
    grid_shape = (nb_classes, nb_classes, img_rows, img_cols, nchannels)
    grid_viz_data = np.zeros(grid_shape, dtype='f')

    # Instantiate a SaliencyMapMethod attack object
    jsma = SaliencyMapMethod(model, sess=sess)
    jsma_params = {
        'theta': 1.,
        'gamma': 0.1,
        'clip_min': 0.,
        'clip_max': 1.,
        'y_target': None
    }

    figure = None
    # Loop over the samples we want to perturb into adversarial examples
    seed(SEED)
    for sample_ind in xrange(0, source_samples):
        img = randint(0, 10000)
        print('--------------------------------------')
        print('Attacking input %i/%i' % (sample_ind + 1, source_samples))
        sample = x_test[img:(img +
                             1)]  #sample = x_test[sample_ind:(sample_ind + 1)]

        # We want to find an adversarial example for each possible target class
        # (i.e. all classes that differ from the label given in the dataset)
        current_class = int(np.argmax(
            y_test[img]))  #current_class = int(np.argmax(y_test[sample_ind]))
        target_classes = other_classes(nb_classes, current_class)

        # For the grid visualization, keep original images along the diagonal
        grid_viz_data[current_class, current_class, :, :, :] = np.reshape(
            sample, (img_rows, img_cols, nchannels))
        tn = 0
        totc = 0
        # Loop over all target classes
        for target in target_classes:
            print('Generating adv. example for target class %i' % target)

            # This call runs the Jacobian-based saliency map approach
            one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
            one_hot_target[0, target] = 1
            jsma_params['y_target'] = one_hot_target
            adv_x = jsma.generate_np(sample, **jsma_params)

            # Check if success was achieved
            res = int(model_argmax(sess, x, preds, adv_x) == target)

            # Compute number of modified features
            adv_x_reshape = adv_x.reshape(-1)
            test_in_reshape = x_test[sample_ind].reshape(-1)
            nb_changed = np.where(adv_x_reshape != test_in_reshape)[0].shape[0]
            percent_perturb = float(nb_changed) / adv_x.reshape(-1).shape[0]
            diff = np.array(adv_x - sample)
            #print(np.sum(diff))
            diff = np.reshape(diff, (28, 28))
            diff = diff * 255
            cv2.imwrite("test.png", diff)
            diff = cv2.imread("test.png")
            diff = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY)
            nieghbors = 0
            tc = 0
            for i in range(0, 28, 1):
                for j in range(0, 28, 1):
                    if diff[i, j] > 0:
                        tc = tc + 1
                        totc = totc + 1
                        if i > 0 and i < 27 and j > 0 and j < 27:  #main grid not edges or corners
                            if diff[i - 1, j - 1] > 0:
                                nieghbors = nieghbors + 1
                            if diff[i - 1, j] > 0:
                                nieghbors = nieghbors + 1
                            if diff[i - 1, j + 1] > 0:
                                nieghbors = nieghbors + 1
                            if diff[i, j - 1] > 0:
                                nieghbors = nieghbors + 1
                            if diff[i, j + 1] > 0:
                                nieghbors = nieghbors + 1
                            if diff[i + 1, j - 1] > 0:
                                nieghbors = nieghbors + 1
                            if diff[i + 1, j] > 0:
                                nieghbors = nieghbors + 1
                            if diff[i + 1, j + 1] > 0:
                                nieghbors = nieghbors + 1
                        else:
                            #corners
                            if i == 0 and j == 0:
                                if diff[i, j + 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i + 1, j] > 0:
                                    nieghbors = nieghbors + 1
                            if i == 27 and j == 0:
                                if diff[i, j + 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i - 1, j] > 0:
                                    nieghbors = nieghbors + 1
                            if i == 0 and j == 27:
                                if diff[i, j - 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i + 1, j] > 0:
                                    nieghbors = nieghbors + 1
                            if i == 27 and j == 27:
                                if diff[i, j - 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i - 1, j] > 0:
                                    nieghbors = nieghbors + 1
                            #edges
                            if i == 0 and j > 0 and j < 27:  #left side
                                if diff[i, j - 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i, j + 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i + 1, j - 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i + 1, j] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i + 1, j + 1] > 0:
                                    nieghbors = nieghbors + 1
                            if i == 27 and j > 0 and j < 27:  #right side
                                if diff[i, j - 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i, j + 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i - 1, j - 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i - 1, j] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i - 1, j + 1] > 0:
                                    nieghbors = nieghbors + 1
                            if j == 0 and i > 0 and i < 27:  #top side
                                if diff[i - 1, j] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i + 1, j] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i - 1, j + 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i, j + 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i + 1, j + 1] > 0:
                                    nieghbors = nieghbors + 1
                            if j == 27 and i > 0 and i < 27:  #bot side
                                if diff[i - 1, j] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i + 1, j] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i - 1, j - 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i, j - 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i + 1, j - 1] > 0:
                                    nieghbors = nieghbors + 1

            # print(tc)
            # print(nieghbors)
            tn = tn + nieghbors
            # if tc > 0:
            # print(nieghbors/tc)
            # Display the original and adversarial images side-by-side
            if viz_enabled:
                figure = pair_visual(
                    np.reshape(sample, (img_rows, img_cols, nchannels)),
                    np.reshape(adv_x, (img_rows, img_cols, nchannels)), figure)
            # Add our adversarial example to our grid data
            grid_viz_data[target, current_class, :, :, :] = np.reshape(
                adv_x, (img_rows, img_cols, nchannels))

            # Update the arrays for later analysis
            results[target, sample_ind] = res
            perturbations[target, sample_ind] = percent_perturb
            #print(perturbations[target, sample_ind])

    print('--------------------------------------')

    print("average neighbors per modified pixel ", tn / totc)
    # Compute the number of adversarial examples that were successfully found
    nb_targets_tried = ((nb_classes - 1) * source_samples)
    succ_rate = float(np.sum(results)) / nb_targets_tried
    print('Avg. rate of successful adv. examples {0:.8f}'.format(succ_rate))
    report.clean_train_adv_eval = 1. - succ_rate

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(perturbations)

    s = perturbations.shape
    myPert = np.empty(0)
    myResults = np.empty(0)
    for i in range(s[0]):
        for j in range(s[1]):
            if perturbations[i][j] > 0:
                myPert = np.append(myPert, perturbations[i][j])
                myResults = np.append(myResults, results[i][j])
    min_perturbed = np.min(myPert)
    max_perturbed = np.max(myPert)

    s2 = myResults.shape
    final = np.empty(0)
    for i in range(s2[0]):
        if myResults[i] > 0:
            final = np.append(final, myPert[i])

    print('Avg. rate of perturbed features {0:.8f}'.format(percent_perturbed))
    print('MIN of perturbed features {0:.8f}'.format(min_perturbed))
    print('MAX of perturbed features {0:.8f}'.format(max_perturbed))

    # Compute the average distortion introduced for successful samples only
    percent_perturb_succ = np.mean(perturbations * (results == 1))
    min_perturb_succ = np.min(final)
    max_perturb_succ = np.max(final)
    print('Avg. rate of perturbed features for successful '
          'adversarial examples {0:.8f}'.format(percent_perturb_succ))
    print('Min of perturbed features for successful '
          'adversarial examples {0:.8f}'.format(min_perturb_succ))
    print('Max of perturbed features for successful '
          'adversarial examples {0:.8f}'.format(max_perturb_succ))

    #Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        import matplotlib.pyplot as plt
        plt.close(figure)
        _ = grid_visual(grid_viz_data)

    return report
Beispiel #12
0
def evaluate(dataset='CIFAR100'):
    batch_size = 128
    test_num = 10000
    defense_list = ['Naive', 'Goodfellow', 'Madry', 'PGN']
    model_path_list = []
    for defense in defense_list:
        for i in os.listdir('save/%s/%s' % (dataset, defense)):
            if os.path.exists('save/%s/%s/%s/model.joblib' %
                              (dataset, defense, i)):
                model_path_list.append('save/%s/%s/%s/model.joblib' %
                                       (dataset, defense, i))

    if dataset == 'CIFAR100':
        data = CIFAR100(test_start=0, test_end=test_num)
        x_test, y_test = data.get_set('test')
        x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3))
        y = tf.placeholder(tf.float32, shape=(None, 100))
    elif dataset == 'CIFAR10':
        data = CIFAR10(test_start=0, test_end=test_num)
        x_test, y_test = data.get_set('test')
        x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3))
        y = tf.placeholder(tf.float32, shape=(None, 10))

    sess = tf.Session()

    cw_params = {
        'batch_size': 128,
        'clip_min': 0.,
        'clip_max': 1.,
        'max_iterations': 100,
        'y': y
    }

    eval_params = {'batch_size': batch_size}

    def do_eval(preds, x_set, y_set, report_text):
        acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        print('Test accuracy on %s: %0.4f' % (report_text, acc))
        return acc

    def get_adv_x_numpy(adv_x, attack_success_index, x_set, y_set):
        result = []
        result_index = []
        nb_batches = int(math.ceil(float(len(x_set)) / batch_size))
        X_cur = np.zeros((batch_size, ) + x_set.shape[1:], dtype=x_set.dtype)
        Y_cur = np.zeros((batch_size, ) + y_set.shape[1:], dtype=y_set.dtype)
        for batch in range(nb_batches):
            start = batch * batch_size
            end = min(len(x_set), start + batch_size)
            cur_batch_size = end - start
            X_cur[:cur_batch_size] = x_set[start:end]
            Y_cur[:cur_batch_size] = y_set[start:end]
            feed_dict = {x: X_cur, y: Y_cur}
            adv_x_numpy, success_index = sess.run(
                [adv_x, attack_success_index], feed_dict=feed_dict)
            result.append(adv_x_numpy[:cur_batch_size])
            result_index.append(success_index[:cur_batch_size])
        return np.concatenate(result, axis=0), np.concatenate(result_index,
                                                              axis=0)

    print(model_path_list)
    acc_dict = {}
    l2mean_dict = {}
    for model_path in model_path_list:
        defense = model_path.split('/')[2]
        if not defense in acc_dict:
            acc_dict[defense] = []
        if not defense in l2mean_dict:
            l2mean_dict[defense] = []

        if os.path.exists(
                os.path.join(os.path.dirname(model_path), 'cash_result')):
            with open(os.path.join(os.path.dirname(model_path), 'cash_result'),
                      'r') as f:
                cash_result_str = f.read()
                acc, l2mean, model_create_time = cash_result_str.split(",")

            if int(model_create_time) == int(os.path.getctime(model_path)):
                acc_dict[defense].append(float(acc))
                l2mean_dict[defense].append(float(l2mean))
                print(model_path, acc, l2mean)
                continue

        with sess.as_default():
            model = load(model_path)

        attack_model = CarliniWagnerL2(model, sess=sess)
        attack_params = cw_params

        preds = model.get_logits(x)
        acc = do_eval(preds, x_test[:test_num], y_test[:test_num],
                      'DEFENSE : %s' % defense)
        adv_x = attack_model.generate(x, **attack_params)
        preds_adv = model.get_logits(adv_x)
        attack_success_index = tf.math.not_equal(tf.argmax(preds_adv, axis=-1),
                                                 tf.argmax(y, axis=-1))
        adv_x_numpy, success_index = get_adv_x_numpy(adv_x,
                                                     attack_success_index,
                                                     x_test[:test_num],
                                                     y_test[:test_num])
        print('C&W attack success_rate = %f' % np.mean(success_index))

        l2mean = np.mean(
            np.sqrt(
                np.sum(np.power(
                    adv_x_numpy[success_index] -
                    x_test[:test_num][success_index], 2),
                       axis=(1, 2, 3))))

        acc_dict[defense].append(acc)
        l2mean_dict[defense].append(l2mean)
        print(model_path, acc, l2mean)
        with open(os.path.join(os.path.dirname(model_path), 'cash_result'),
                  'w') as f:
            f.write('%.4f,%.4f,%d' %
                    (acc, l2mean, os.path.getctime(model_path)))

    for defense in defense_list:
        if not defense in l2mean_dict:
            continue

        l2mean_dict[defense] = np.array(l2mean_dict[defense])
        acc_dict[defense] = np.array(acc_dict[defense])
        arg_l2mean_dict = np.argsort(l2mean_dict[defense])
        l2mean_dict[defense] = l2mean_dict[defense][arg_l2mean_dict]
        acc_dict[defense] = acc_dict[defense][arg_l2mean_dict]
        plt.plot(l2mean_dict[defense], acc_dict[defense], '-o', label=defense)
    plt.legend()
    plt.xlabel('$\\rho_{cw}$')
    plt.ylabel('benign accuracy')
    plt.title("RESULT FOR %s" % dataset)

    fig_save_dir = 'evaluate/%s' % dataset
    if not os.path.exists(fig_save_dir):
        os.makedirs(fig_save_dir)
    plt.savefig('%s/robustness-curve.png' % fig_save_dir)
def evaluate_model(filepath,
                   train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   batch_size=128,
                   testing=False,
                   num_threads=None):
    """
  Run evaluation on a saved model
  :param filepath: path to model to evaluate
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param batch_size: size of evaluation batches
  """

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.INFO)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get MNIST test data
    mnist = MNIST(train_start=train_start,
                  train_end=train_end,
                  test_start=test_start,
                  test_end=test_end)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    eval_params = {'batch_size': batch_size}
    fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}

    def do_eval(preds, x_set, y_set, report_key, is_adv=None):
        acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

    with sess.as_default():
        model = load(filepath)
    assert len(model.get_params()) > 0

    # Initialize the Fast Gradient Sign Method (FGSM) attack object and
    # graph
    fgsm = FastGradientMethod(model, sess=sess)
    adv_x = fgsm.generate(x, **fgsm_params)
    preds_adv = model.get_logits(adv_x)
    preds = model.get_logits(x)

    # Evaluate the accuracy of the MNIST model on adversarial examples
    do_eval(preds, x_test, y_test, 'train_clean_train_clean_eval', False)
    do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True)
Beispiel #14
0
def make_confidence_report_spsa(filepath,
                                train_start=TRAIN_START,
                                train_end=TRAIN_END,
                                test_start=TEST_START,
                                test_end=TEST_END,
                                batch_size=BATCH_SIZE,
                                which_set=WHICH_SET,
                                report_path=REPORT_PATH,
                                nb_iter=NB_ITER_SPSA,
                                spsa_samples=SPSA_SAMPLES,
                                spsa_iters=SPSA.DEFAULT_SPSA_ITERS):
    """
  Load a saved model, gather its predictions, and save a confidence report.


  This function works by running a single MaxConfidence attack on each example,
  using SPSA as the underyling optimizer.
  This is not intended to be a strong generic attack.
  It is intended to be a test to uncover gradient masking.

  :param filepath: path to model to evaluate
  :param train_start: index of first training set example to use
  :param train_end: index of last training set example to use
  :param test_start: index of first test set example to use
  :param test_end: index of last test set example to use
  :param batch_size: size of evaluation batches
  :param which_set: 'train' or 'test'
  :param nb_iter: Number of iterations of PGD to run per class
  :param spsa_samples: Number of samples for SPSA
  """

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.INFO)

    # Create TF session
    sess = tf.Session()

    if report_path is None:
        assert filepath.endswith('.joblib')
        report_path = filepath[:-len('.joblib')] + "_spsa_report.joblib"

    with sess.as_default():
        model = load(filepath)
    assert len(model.get_params()) > 0
    factory = model.dataset_factory
    factory.kwargs['train_start'] = train_start
    factory.kwargs['train_end'] = train_end
    factory.kwargs['test_start'] = test_start
    factory.kwargs['test_end'] = test_end
    dataset = factory()

    center = dataset.kwargs['center']
    center = np.float32(center)
    max_val = dataset.kwargs['max_val']
    max_val = np.float32(max_val)
    value_range = max_val * (1. + center)
    min_value = np.float32(0. - center * max_val)

    if 'CIFAR' in str(factory.cls):
        base_eps = 8. / 255.
    elif 'MNIST' in str(factory.cls):
        base_eps = .3
    else:
        raise NotImplementedError(str(factory.cls))

    eps = np.float32(base_eps * value_range)
    clip_min = min_value
    clip_max = max_val

    x_data, y_data = dataset.get_set(which_set)

    nb_classes = dataset.NB_CLASSES

    spsa_max_confidence_recipe(sess,
                               model,
                               x_data,
                               y_data,
                               nb_classes,
                               eps,
                               clip_min,
                               clip_max,
                               nb_iter,
                               report_path,
                               spsa_samples=spsa_samples,
                               spsa_iters=spsa_iters)
Beispiel #15
0
def generate_attacks(save_path, file_path, x_set, y_set, attack, gamma,
                     first_index, last_index):
    """
    Applies the saliency map attack against the specified model.

    Parameters
    ----------
    save_path: str
        The path of the folder in which the crafted adversarial samples will be saved.
    file_path: str
        The path to the joblib file of the model to attack.
    x_set: numpy.ndarray
        The dataset input array.
    y_set: numpy.ndarray
        The dataset output array.
    attack: str
        The type of used attack (either "jsma", "wjsma" or "tjsma").
    gamma: float
            Maximum percentage of perturbed features.
    first_index:
        The index of the first image attacked.
    last_index: int
        The index of the last image attacked.
    """

    if not os.path.exists(save_path):
        os.mkdir(save_path)

    sess = tf.Session()

    img_rows, img_cols, channels = x_set.shape[1:4]
    nb_classes = y_set.shape[1]

    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, channels))

    with sess.as_default():
        model = load(file_path)

    assert len(model.get_params()) > 0

    # Attack parameters. See SaliencyMapMethod for more information
    jsma = SaliencyMapMethod(model, sess=sess)
    jsma_params = {
        'theta': 1,
        'gamma': gamma,
        'clip_min': 0.,
        'clip_max': 1.,
        'y_target': None,
        'attack': attack
    }

    preds = model(x)

    for sample_ind in range(first_index, last_index):
        results = pd.DataFrame()

        print('Attacking input %i/%i' % (sample_ind + 1, last_index))

        sample = x_set[sample_ind:(sample_ind + 1)]
        current_class = int(np.argmax(y_set[sample_ind]))
        target_classes = other_classes(nb_classes, current_class)

        for target in target_classes:
            one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
            one_hot_target[0, target] = 1
            jsma_params['y_target'] = one_hot_target
            adv_x, predictions = jsma.generate_np(sample, **jsma_params)

            res = int(model_argmax(sess, x, preds, adv_x) == target)

            adv_x_reshape = adv_x.reshape(-1)
            test_in_reshape = x_set[sample_ind].reshape(-1)
            nb_changed = np.where(adv_x_reshape != test_in_reshape)[0].shape[0]
            percent_perturb = float(nb_changed) / adv_x.reshape(-1).shape[0]

            results['number_' + str(sample_ind) + '_' + str(current_class) + '_to_' + str(target)] = \
                np.concatenate((adv_x_reshape.reshape(-1), np.array([nb_changed, percent_perturb, res]))
                               )

        sample_vector = sample.reshape(-1)
        shape1 = sample_vector.shape[0]
        shape2 = results.shape[0]

        results['original_image_' + str(sample_ind)] = \
            np.concatenate((sample.reshape(-1), np.zeros((shape2 - shape1,))))

        results.to_csv(save_path + '/' + attack + '_image_' + str(sample_ind) +
                       '.csv',
                       index=False)
def make_confidence_report_bundled(filepath,
                                   train_start=TRAIN_START,
                                   train_end=TRAIN_END,
                                   test_start=TEST_START,
                                   test_end=TEST_END,
                                   which_set=WHICH_SET,
                                   recipe=RECIPE,
                                   report_path=REPORT_PATH):
    """
  Load a saved model, gather its predictions, and save a confidence report.
  :param filepath: path to model to evaluate
  :param train_start: index of first training set example to use
  :param train_end: index of last training set example to use
  :param test_start: index of first test set example to use
  :param test_end: index of last test set example to use
  :param which_set: 'train' or 'test'
  """
    # Avoid circular import
    from cleverhans import attack_bundling
    run_recipe = getattr(attack_bundling, recipe)

    # Set logging level to see debug information
    set_log_level(logging.INFO)

    # Create TF session
    sess = tf.Session()

    assert filepath.endswith('.joblib')
    if report_path is None:
        report_path = filepath[:-len('.joblib')] + "_bundled_report.joblib"

    with sess.as_default():
        model = load(filepath)
    assert len(model.get_params()) > 0
    factory = model.dataset_factory
    factory.kwargs['test_start'] = test_start
    factory.kwargs['test_end'] = test_end
    dataset = factory()

    center = dataset.kwargs['center']
    max_value = dataset.max_val
    min_value = 0. - center * max_value
    value_range = max_value - min_value

    if 'CIFAR' in str(factory.cls):
        base_eps = 8. / 255.
        base_eps_iter = 2. / 255.
    elif 'MNIST' in str(factory.cls):
        base_eps = .3
        base_eps_iter = .1
    else:
        raise NotImplementedError(str(factory.cls))

    eps = base_eps * value_range
    eps_iter = base_eps_iter * value_range
    nb_iter = 40
    clip_min = min_value
    clip_max = max_value

    x_data, y_data = dataset.get_set(which_set)
    assert x_data.max() <= max_value
    assert x_data.min() >= min_value

    # Different recipes take different arguments.
    # For now I don't have an idea for a beautiful unifying framework, so
    # we get an if statement.
    if recipe == 'random_search_max_confidence_recipe':
        # pylint always checks against the default recipe here
        # pylint: disable=no-value-for-parameter
        run_recipe(sess=sess,
                   model=model,
                   x=x_data,
                   y=y_data,
                   eps=eps,
                   clip_min=clip_min,
                   clip_max=clip_max,
                   report_path=report_path)
    else:
        run_recipe(sess=sess,
                   model=model,
                   x=x_data,
                   y=y_data,
                   nb_classes=dataset.NB_CLASSES,
                   eps=eps,
                   clip_min=clip_min,
                   clip_max=clip_max,
                   eps_iter=eps_iter,
                   nb_iter=nb_iter,
                   report_path=report_path)
def make_confidence_report(filepath,
                           train_start=TRAIN_START,
                           train_end=TRAIN_END,
                           test_start=TEST_START,
                           test_end=TEST_END,
                           batch_size=BATCH_SIZE,
                           which_set=WHICH_SET,
                           mc_batch_size=MC_BATCH_SIZE,
                           report_path=REPORT_PATH,
                           base_eps_iter=BASE_EPS_ITER,
                           nb_iter=NB_ITER):
    """
  Load a saved model, gather its predictions, and save a confidence report.


  This function works by running a single MaxConfidence attack on each example.
  This provides a reasonable estimate of the true failure rate quickly, so
  long as the model does not suffer from gradient masking.
  However, this estimate is mostly intended for development work and not
  for publication. A more accurate estimate may be obtained by running
  make_confidence_report_bundled.py instead.

  :param filepath: path to model to evaluate
  :param train_start: index of first training set example to use
  :param train_end: index of last training set example to use
  :param test_start: index of first test set example to use
  :param test_end: index of last test set example to use
  :param batch_size: size of evaluation batches
  :param which_set: 'train' or 'test'
  :param mc_batch_size: batch size for MaxConfidence attack
  :param base_eps_iter: step size if the data were in [0,1]
    (Step size will be rescaled proportional to the actual data range)
  :param nb_iter: Number of iterations of PGD to run per class
  """

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.INFO)

    # Create TF session
    sess = tf.Session()

    if report_path is None:
        assert filepath.endswith('.joblib')
        report_path = filepath[:-len('.joblib')] + "_report.joblib"

    with sess.as_default():
        model = load(filepath)
    assert len(model.get_params()) > 0
    factory = model.dataset_factory
    factory.kwargs['train_start'] = train_start
    factory.kwargs['train_end'] = train_end
    factory.kwargs['test_start'] = test_start
    factory.kwargs['test_end'] = test_end
    dataset = factory()

    center = dataset.kwargs['center']
    max_val = dataset.kwargs['max_val']
    value_range = max_val * (1. + center)
    min_value = 0. - center * max_val

    if 'CIFAR' in str(factory.cls):
        base_eps = 8. / 255.
        if base_eps_iter is None:
            base_eps_iter = 2. / 255.
    elif 'MNIST' in str(factory.cls):
        base_eps = .3
        if base_eps_iter is None:
            base_eps_iter = .1
    else:
        raise NotImplementedError(str(factory.cls))

    mc_params = {
        'eps': base_eps * value_range,
        'eps_iter': base_eps_iter * value_range,
        'nb_iter': nb_iter,
        'clip_min': min_value,
        'clip_max': max_val
    }

    x_data, y_data = dataset.get_set(which_set)

    report = {}

    semantic = Semantic(model, center, max_val, sess)
    mc = MaxConfidence(model, sess=sess)

    jobs = [('clean', None, None, None), ('Semantic', semantic, None, None),
            ('mc', mc, mc_params, mc_batch_size)]

    for job in jobs:
        name, attack, attack_params, job_batch_size = job
        if job_batch_size is None:
            job_batch_size = batch_size
        t1 = time.time()
        packed = correctness_and_confidence(sess,
                                            model,
                                            x_data,
                                            y_data,
                                            batch_size=job_batch_size,
                                            devices=devices,
                                            attack=attack,
                                            attack_params=attack_params)
        t2 = time.time()
        print("Evaluation took", t2 - t1, "seconds")
        correctness, confidence = packed

        report[name] = {'correctness': correctness, 'confidence': confidence}

        print_stats(correctness, confidence, name)

    save(report_path, report)
Beispiel #18
0
def print_accuracies(filepath,
                     train_start=TRAIN_START,
                     train_end=TRAIN_END,
                     test_start=TEST_START,
                     test_end=TEST_END,
                     batch_size=BATCH_SIZE,
                     which_set=WHICH_SET,
                     base_eps_iter=BASE_EPS_ITER,
                     nb_iter=NB_ITER):
    """
  Load a saved model and print out its accuracy on different data distributions

  This function works by running a single attack on each example.
  This provides a reasonable estimate of the true failure rate quickly, so
  long as the model does not suffer from gradient masking.
  However, this estimate is mostly intended for development work and not
  for publication. A more accurate estimate may be obtained by running
  an attack bundler instead.

  :param filepath: path to model to evaluate
  :param train_start: index of first training set example to use
  :param train_end: index of last training set example to use
  :param test_start: index of first test set example to use
  :param test_end: index of last test set example to use
  :param batch_size: size of evaluation batches
  :param which_set: 'train' or 'test'
  :param base_eps_iter: step size if the data were in [0,1]
    (Step size will be rescaled proportional to the actual data range)
  :param nb_iter: Number of iterations of PGD to run per class
  """

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(20181014)
    set_log_level(logging.INFO)
    sess = tf.Session()

    with sess.as_default():
        model = load(filepath)
    assert len(model.get_params()) > 0
    factory = model.dataset_factory
    factory.kwargs['train_start'] = train_start
    factory.kwargs['train_end'] = train_end
    factory.kwargs['test_start'] = test_start
    factory.kwargs['test_end'] = test_end
    dataset = factory()

    center = dataset.kwargs['center']
    max_val = dataset.kwargs['max_val']
    value_range = max_val * (1. + center)
    min_value = 0. - center * max_val

    if 'CIFAR' in str(factory.cls):
        base_eps = 8. / 255.
        if base_eps_iter is None:
            base_eps_iter = 2. / 255.
    elif 'MNIST' in str(factory.cls):
        base_eps = .3
        if base_eps_iter is None:
            base_eps_iter = .1
    else:
        raise NotImplementedError(str(factory.cls))

    pgd_params = {
        'eps': base_eps * value_range,
        'eps_iter': base_eps_iter * value_range,
        'nb_iter': nb_iter,
        'clip_min': min_value,
        'clip_max': max_val
    }

    x_data, y_data = dataset.get_set(which_set)

    semantic = Semantic(model, center, max_val, sess)
    pgd = ProjectedGradientDescent(model, sess=sess)

    jobs = [('clean', None, None, None), ('Semantic', semantic, None, None),
            ('pgd', pgd, pgd_params, None)]

    for job in jobs:
        name, attack, attack_params, job_batch_size = job
        if job_batch_size is None:
            job_batch_size = batch_size
        t1 = time.time()
        acc = accuracy(sess,
                       model,
                       x_data,
                       y_data,
                       batch_size=job_batch_size,
                       devices=devices,
                       attack=attack,
                       attack_params=attack_params)
        t2 = time.time()
        print("Accuracy on " + name + " examples: ", acc)
        print("Evaluation took", t2 - t1, "seconds")
Beispiel #19
0
def make_confidence_report_bundled(filepath,
                                   train_start=TRAIN_START,
                                   train_end=TRAIN_END,
                                   test_start=TEST_START,
                                   test_end=TEST_END,
                                   which_set=WHICH_SET,
                                   recipe=RECIPE,
                                   report_path=REPORT_PATH,
                                   nb_iter=NB_ITER,
                                   base_eps=None,
                                   base_eps_iter=None,
                                   base_eps_iter_small=None):
    """
  Load a saved model, gather its predictions, and save a confidence report.
  :param filepath: path to model to evaluate
  :param train_start: index of first training set example to use
  :param train_end: index of last training set example to use
  :param test_start: index of first test set example to use
  :param test_end: index of last test set example to use
  :param which_set: 'train' or 'test'
  :param nb_iter: int, number of iterations of attack algorithm
    (note that different recipes will use this differently,
     for example many will run two attacks, one with nb_iter
     iterations and one with 25X more)
  :param base_eps: float, epsilon parameter for threat model, on a scale of [0, 1].
    Inferred from the dataset if not specified.
  :param base_eps_iter: float, a step size used in different ways by different recipes.
    Typically the step size for a PGD attack.
    Inferred from the dataset if not specified.
  :param base_eps_iter_small: float, a second step size for a more fine-grained attack.
    Inferred from the dataset if not specified.
  """
    # Avoid circular import
    from cleverhans import attack_bundling
    if callable(recipe):
        run_recipe = recipe
    else:
        run_recipe = getattr(attack_bundling, recipe)

    # Set logging level to see debug information
    set_log_level(logging.INFO)

    # Create TF session
    sess = tf.Session()

    assert filepath.endswith('.joblib')
    if report_path is None:
        report_path = filepath[:-len('.joblib')] + "_bundled_report.joblib"

    with sess.as_default():
        model = load(filepath)
    assert len(model.get_params()) > 0
    factory = model.dataset_factory
    factory.kwargs['train_start'] = train_start
    factory.kwargs['train_end'] = train_end
    factory.kwargs['test_start'] = test_start
    factory.kwargs['test_end'] = test_end
    dataset = factory()

    center = dataset.kwargs['center']
    if 'max_val' in factory.kwargs:
        max_value = factory.kwargs['max_val']
    elif hasattr(dataset, 'max_val'):
        max_value = dataset.max_val
    else:
        raise AttributeError("Can't find max_value specification")
    min_value = 0. - center * max_value
    value_range = max_value - min_value

    if 'CIFAR' in str(factory.cls):
        if base_eps is None:
            base_eps = 8. / 255.
        if base_eps_iter is None:
            base_eps_iter = 2. / 255.
        if base_eps_iter_small is None:
            base_eps_iter_small = 1. / 255.
    elif 'MNIST' in str(factory.cls):
        if base_eps is None:
            base_eps = .3
        if base_eps_iter is None:
            base_eps_iter = .1
        base_eps_iter_small = None
    else:
        # Note that it is not required to specify base_eps_iter_small
        if base_eps is None or base_eps_iter is None:
            raise NotImplementedError("Not able to infer threat model from " +
                                      str(factory.cls))

    eps = base_eps * value_range
    eps_iter = base_eps_iter * value_range
    if base_eps_iter_small is None:
        eps_iter_small = None
    else:
        eps_iter_small = base_eps_iter_small * value_range
    clip_min = min_value
    clip_max = max_value

    x_data, y_data = dataset.get_set(which_set)
    assert x_data.max() <= max_value
    assert x_data.min() >= min_value

    assert eps_iter <= eps
    assert eps_iter_small is None or eps_iter_small <= eps

    # Different recipes take different arguments.
    # For now I don't have an idea for a beautiful unifying framework, so
    # we get an if statement.
    if recipe == 'random_search_max_confidence_recipe':
        # pylint always checks against the default recipe here
        # pylint: disable=no-value-for-parameter
        run_recipe(sess=sess,
                   model=model,
                   x=x_data,
                   y=y_data,
                   eps=eps,
                   clip_min=clip_min,
                   clip_max=clip_max,
                   report_path=report_path)
    else:
        run_recipe(sess=sess,
                   model=model,
                   x=x_data,
                   y=y_data,
                   nb_classes=dataset.NB_CLASSES,
                   eps=eps,
                   clip_min=clip_min,
                   clip_max=clip_max,
                   eps_iter=eps_iter,
                   nb_iter=nb_iter,
                   report_path=report_path,
                   eps_iter_small=eps_iter_small)
Beispiel #20
0
def evaluate_model(filepath,
                   attack=None,
                   preprocess=None,
                   batch_size=128,
                   num_threads=None):
    """
  Run evaluation on a saved model
  :param filepath: path to model to evaluate
  :param batch_size: size of evaluation batches
  """

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.INFO)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get MNIST test data
    x_train, y_train, x_test, y_test = get_MNIST_67_preprocess(
        test_attack=attack)

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_test.shape[1:4]
    nb_classes = y_test.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    eval_params = {'batch_size': batch_size}

    def do_eval(preds, x_set, y_set, report_key, is_adv=None):
        acc = model_eval(sess,
                         x,
                         y,
                         preds,
                         x_set,
                         y_set,
                         save_logit=True,
                         filename=report_key,
                         args=eval_params)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

    # Load Model
    with sess.as_default():
        model = load(filepath)
    assert len(model.get_params()) > 0

    # Attack
    if attack == 'fgsm':
        # Initialize the Fast Gradient Sign Method (FGSM) attack object and
        # graph
        # fgsm attack
        fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
        fgsm = FastGradientMethod(model, sess=sess)
        # Loop over the samples we want to perturb into adversarial examples
        for sample_ind in xrange(0, len(x_test)):
            sample = x_test[sample_ind:(sample_ind + 1)]
            adv_x = fgsm.generate_np(sample, **fgsm_params)
            x_test[sample_ind:(sample_ind + 1)] = adv_x

    elif attack == 'jsma':
        jsma = SaliencyMapMethod(model, sess=sess)
        jsma_params = {
            'theta': 1.,
            'gamma': 0.1,
            'clip_min': 0.,
            'clip_max': 1.,
            'y_target': None
        }  # Loop over the samples we want to perturb into adversarial examples
        for sample_ind in xrange(0, len(x_test)):
            sample = x_test[sample_ind:(sample_ind + 1)]
            one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
            if y_test[sample_ind, 0] == 1:
                one_hot_target[0, 1] = 1
            else:
                one_hot_target[0, 0] = 1
            jsma_params['y_target'] = one_hot_target
            adv_x = jsma.generate_np(sample, **jsma_params)
            x_test[sample_ind:(sample_ind + 1)] = adv_x
        plt.imshow(x_test[1], cmap='gray')
        plt.show()
        exit()
    # Image Process
    x_test = image_process(x_test, preprocess)

    preds = model.get_logits(x)
    fn = str(filepath[10:-7]) + "_" + str(preprocess) + "_" + str(attack)
    do_eval(preds, x_test, y_test, fn, True)
    with open('{}_y.pickle'.format(fn), 'wb') as handle:
        pickle.dump(y_test, handle)
Beispiel #21
0
def make_confidence_report(
    filepath,
    train_start=TRAIN_START,
    train_end=TRAIN_END,
    test_start=TEST_START,
    test_end=TEST_END,
    batch_size=BATCH_SIZE,
    which_set=WHICH_SET,
    mc_batch_size=MC_BATCH_SIZE,
    report_path=REPORT_PATH,
    base_eps_iter=BASE_EPS_ITER,
    nb_iter=NB_ITER,
    save_advx=SAVE_ADVX,
):
    """
    Load a saved model, gather its predictions, and save a confidence report.


    This function works by running a single MaxConfidence attack on each example.
    This provides a reasonable estimate of the true failure rate quickly, so
    long as the model does not suffer from gradient masking.
    However, this estimate is mostly intended for development work and not
    for publication. A more accurate estimate may be obtained by running
    make_confidence_report_bundled.py instead.

    :param filepath: path to model to evaluate
    :param train_start: index of first training set example to use
    :param train_end: index of last training set example to use
    :param test_start: index of first test set example to use
    :param test_end: index of last test set example to use
    :param batch_size: size of evaluation batches
    :param which_set: 'train' or 'test'
    :param mc_batch_size: batch size for MaxConfidence attack
    :param base_eps_iter: step size if the data were in [0,1]
      (Step size will be rescaled proportional to the actual data range)
    :param nb_iter: Number of iterations of PGD to run per class
    :param save_advx: bool. If True, saves the adversarial examples to disk.
      On by default, but can be turned off to save memory, etc.
    """

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.INFO)

    # Create TF session
    sess = tf.Session()

    if report_path is None:
        assert filepath.endswith(".joblib")
        report_path = filepath[: -len(".joblib")] + "_report.joblib"

    with sess.as_default():
        model = load(filepath)
    assert len(model.get_params()) > 0
    factory = model.dataset_factory
    factory.kwargs["train_start"] = train_start
    factory.kwargs["train_end"] = train_end
    factory.kwargs["test_start"] = test_start
    factory.kwargs["test_end"] = test_end
    dataset = factory()

    center = dataset.kwargs["center"]
    max_val = dataset.kwargs["max_val"]
    value_range = max_val * (1.0 + center)
    min_value = 0.0 - center * max_val

    if "CIFAR" in str(factory.cls):
        base_eps = 8.0 / 255.0
        if base_eps_iter is None:
            base_eps_iter = 2.0 / 255.0
    elif "MNIST" in str(factory.cls):
        base_eps = 0.3
        if base_eps_iter is None:
            base_eps_iter = 0.1
    else:
        raise NotImplementedError(str(factory.cls))

    mc_params = {
        "eps": base_eps * value_range,
        "eps_iter": base_eps_iter * value_range,
        "nb_iter": nb_iter,
        "clip_min": min_value,
        "clip_max": max_val,
    }

    x_data, y_data = dataset.get_set(which_set)

    report = ConfidenceReport()

    semantic = Semantic(model, center, max_val, sess)
    mc = MaxConfidence(model, sess=sess)

    jobs = [
        ("clean", None, None, None, False),
        ("Semantic", semantic, None, None, False),
        ("mc", mc, mc_params, mc_batch_size, True),
    ]

    for job in jobs:
        name, attack, attack_params, job_batch_size, save_this_job = job
        if job_batch_size is None:
            job_batch_size = batch_size
        t1 = time.time()
        if save_advx and save_this_job:
            # If we want to save the adversarial examples to the filesystem, we need
            # to fetch all of them. Otherwise they're just computed one batch at a
            # time and discarded

            # The path to save to
            assert report_path.endswith(".joblib")
            advx_path = report_path[: -len(".joblib")] + "_advx_" + name + ".npy"

            # Fetch the adversarial examples
            x_data = run_attack(
                sess,
                model,
                x_data,
                y_data,
                attack,
                attack_params,
                batch_size=job_batch_size,
                devices=devices,
            )

            # Turn off the attack so `correctness_and_confidence` won't run it a
            # second time.
            attack = None
            attack_params = None

            # Save the adversarial examples
            np.save(advx_path, x_data)

        # Run correctness and confidence evaluation on adversarial examples
        packed = correctness_and_confidence(
            sess,
            model,
            x_data,
            y_data,
            batch_size=job_batch_size,
            devices=devices,
            attack=attack,
            attack_params=attack_params,
        )
        t2 = time.time()
        print("Evaluation took", t2 - t1, "seconds")
        correctness, confidence = packed

        report[name] = ConfidenceReportEntry(
            correctness=correctness, confidence=confidence
        )

        print_stats(correctness, confidence, name)

    save(report_path, report)