def main(argv):
    model_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)

    if model_file is None:
        print('No model found')
        sys.exit()

    set_log_level(logging.DEBUG)

    sess = tf.Session()
    with sess.as_default():

        model = make_wresnet()
        saver = tf.train.Saver()
        # Restore the checkpoint
        saver.restore(sess, model_file)
        SCOPE = "cifar10_challenge"
        model2 = make_wresnet(scope=SCOPE)
        assert len(model.get_vars()) == len(model2.get_vars())
        found = [False] * len(model2.get_vars())
        for var1 in model.get_vars():
            var1_found = False
            var2_name = SCOPE + "/" + var1.name
            for idx, var2 in enumerate(model2.get_vars()):
                if var2.name == var2_name:
                    var1_found = True
                    found[idx] = True
                    sess.run(tf.assign(var2, var1))
                    break
            assert var1_found, var1.name
        assert all(found)

        model2.dataset_factory = Factory(CIFAR, {"max_val": 255})

        serial.save("model.joblib", model2)
def test_make_confidence_report_bundled():
  """
  A very simple test that just makes sure make_confidence_report_bundled can run without crashing
  """

  sess = tf.compat.v1.Session()
  try:
    nb_classes = 3
    nb_features = 2
    batch_size = 5
    nb_test_examples = batch_size * 2
    layer = Linear(num_hid=nb_classes)
    model = MLP(layers=[layer], input_shape=(None, nb_features))
    dataset = SimpleDataset(test_end=nb_test_examples, nb_classes=nb_classes)
    model.dataset_factory = dataset.get_factory()
    filepath = ".test_model.joblib"
    with sess.as_default():
      sess.run(tf.compat.v1.global_variables_initializer())
      serial.save(filepath, model)
    def recipe(sess, model, x, y, nb_classes, eps, clip_min,
               clip_max, eps_iter, nb_iter,
               report_path, eps_iter_small, batch_size):
      """
      Mock recipe that just runs the Noise attack so the test runs fast
      """
      attack_configs = [AttackConfig(Noise(model, sess), {'eps': eps})]
      new_work_goal = {config: 1 for config in attack_configs}
      goals = [Misclassify(new_work_goal=new_work_goal)]
      bundle_attacks(sess, model, x, y, attack_configs, goals, report_path, attack_batch_size=batch_size,
                     eval_batch_size=batch_size)
    make_confidence_report_bundled(filepath, test_end=nb_test_examples, recipe=recipe,
                                   base_eps=.1, base_eps_iter=.01, batch_size=batch_size)
  finally:
    sess.close()
 def test_save_and_load_var(self):
   """test_save_and_load_var: Test that we can save and load a
   PicklableVariable with joblib
   """
   sess = tf.Session()
   with sess.as_default():
     x = np.ones(1)
     xv = PicklableVariable(x)
     xv.var.initializer.run()
     save("/tmp/var.joblib", xv)
     sess.run(tf.assign(xv.var, np.ones(1) * 2))
     new_xv = load("/tmp/var.joblib")
     self.assertClose(sess.run(xv.var), np.ones(1) * 2)
     self.assertClose(sess.run(new_xv.var), np.ones(1))
Example #4
0
def save(criteria, report, report_path, adv_x_val):
    """
  Saves the report and adversarial examples.
  :param criteria: dict, of the form returned by AttackGoal.get_criteria
  :param report: dict containing a confidence report
  :param report_path: string, filepath
  :param adv_x_val: numpy array containing dataset of adversarial examples
  """
    print_stats(criteria['correctness'], criteria['confidence'], 'bundled')

    serial.save(report_path, report)

    assert report_path.endswith(".joblib")
    adv_x_path = report_path[:-len(".joblib")] + "_adv.npy"
    np.save(adv_x_path, adv_x_val)
def test_save_load_confidence_report():
  """
  Test that a confidence report can be loaded and saved.
  """
  report = ConfidenceReport()
  num_examples = 2
  clean_correctness = np.zeros((num_examples,), dtype=np.bool)
  clean_confidence = np.zeros((num_examples,), dtype=np.float32)
  adv_correctness = clean_correctness.copy()
  adv_confidence = clean_confidence.copy()
  report['clean'] = ConfidenceReportEntry(clean_correctness, clean_confidence)
  report['adv'] = ConfidenceReportEntry(adv_correctness, adv_confidence)
  report.completed = True
  filepath = ".test_confidence_report.joblib"
  serial.save(filepath, report)
  report = serial.load(filepath)
Example #6
0
 def evaluate():
     global epoch
     global last_test_print
     global last_train_print
     global best_result
     global best_epoch
     with sess.as_default():
         print("Saving to ", FLAGS.save_path)
         save(FLAGS.save_path, model)
     if epoch % print_test_period == 0 or time.time(
     ) - last_test_print > 300:
         t1 = time.time()
         result = do_eval(dataset.x_test, dataset.y_test, False)
         t2 = time.time()
         if result >= best_result:
             if result > best_result:
                 best_epoch = epoch
             else:
                 # Keep track of ties
                 assert result == best_result
                 if not isinstance(best_epoch, list):
                     if best_epoch == -1:
                         best_epoch = []
                     else:
                         best_epoch = [best_epoch]
                 best_epoch.append(epoch)
             best_result = result
         print("Best so far: ", best_result)
         print("Best epoch: ", best_epoch)
         last_test_print = t2
         print("Test eval time: ", t2 - t1)
     if (epoch % print_train_period == 0
             or time.time() - last_train_print > 3000):
         t1 = time.time()
         print("Training set: ")
         do_eval(dataset.x_train, dataset.y_train, False)
         t2 = time.time()
         print("Train eval time: ", t2 - t1)
         last_train_print = t2
     epoch += 1
Example #7
0
def make_confidence_report(filepath,
                           train_start=TRAIN_START,
                           train_end=TRAIN_END,
                           test_start=TEST_START,
                           test_end=TEST_END,
                           batch_size=BATCH_SIZE,
                           which_set=WHICH_SET,
                           mc_batch_size=MC_BATCH_SIZE,
                           report_path=REPORT_PATH,
                           base_eps_iter=BASE_EPS_ITER,
                           nb_iter=NB_ITER):
    """
  Load a saved model, gather its predictions, and save a confidence report.


  This function works by running a single MaxConfidence attack on each example.
  This provides a reasonable estimate of the true failure rate quickly, so
  long as the model does not suffer from gradient masking.
  However, this estimate is mostly intended for development work and not
  for publication. A more accurate estimate may be obtained by running
  make_confidence_report_bundled.py instead.

  :param filepath: path to model to evaluate
  :param train_start: index of first training set example to use
  :param train_end: index of last training set example to use
  :param test_start: index of first test set example to use
  :param test_end: index of last test set example to use
  :param batch_size: size of evaluation batches
  :param which_set: 'train' or 'test'
  :param mc_batch_size: batch size for MaxConfidence attack
  :param base_eps_iter: step size if the data were in [0,1]
    (Step size will be rescaled proportional to the actual data range)
  :param nb_iter: Number of iterations of PGD to run per class
  """

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.INFO)

    # Create TF session
    sess = tf.Session()

    if report_path is None:
        assert filepath.endswith('.joblib')
        report_path = filepath[:-len('.joblib')] + "_report.joblib"

    with sess.as_default():
        model = load(filepath)
    assert len(model.get_params()) > 0
    factory = model.dataset_factory
    factory.kwargs['train_start'] = train_start
    factory.kwargs['train_end'] = train_end
    factory.kwargs['test_start'] = test_start
    factory.kwargs['test_end'] = test_end
    dataset = factory()

    center = dataset.kwargs['center']
    max_val = dataset.kwargs['max_val']
    value_range = max_val * (1. + center)
    min_value = 0. - center * max_val

    if 'CIFAR' in str(factory.cls):
        base_eps = 8. / 255.
        if base_eps_iter is None:
            base_eps_iter = 2. / 255.
    elif 'MNIST' in str(factory.cls):
        base_eps = .3
        if base_eps_iter is None:
            base_eps_iter = .1
    else:
        raise NotImplementedError(str(factory.cls))

    mc_params = {
        'eps': base_eps * value_range,
        'eps_iter': base_eps_iter * value_range,
        'nb_iter': nb_iter,
        'clip_min': min_value,
        'clip_max': max_val
    }

    x_data, y_data = dataset.get_set(which_set)

    report = {}

    semantic = Semantic(model, center, max_val, sess)
    mc = MaxConfidence(model, sess=sess)

    jobs = [('clean', None, None, None), ('Semantic', semantic, None, None),
            ('mc', mc, mc_params, mc_batch_size)]

    for job in jobs:
        name, attack, attack_params, job_batch_size = job
        if job_batch_size is None:
            job_batch_size = batch_size
        t1 = time.time()
        packed = correctness_and_confidence(sess,
                                            model,
                                            x_data,
                                            y_data,
                                            batch_size=job_batch_size,
                                            devices=devices,
                                            attack=attack,
                                            attack_params=attack_params)
        t2 = time.time()
        print("Evaluation took", t2 - t1, "seconds")
        correctness, confidence = packed

        report[name] = {'correctness': correctness, 'confidence': confidence}

        print_stats(correctness, confidence, name)

    save(report_path, report)
Example #8
0
def model_training(model,
                   file_name,
                   x_train,
                   y_train,
                   x_test,
                   y_test,
                   nb_epochs=NB_EPOCHS,
                   batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE,
                   num_threads=None,
                   label_smoothing=0.1):
    """
    Trains the model with the specified parameters.

    Parameters
    ----------
    model: cleverhans.model.Model
        The cleverhans picklable model
    file_name: str
        The name of the joblib file.
    x_train: numpy.ndarray
        The input array of the train dataset.
    y_train: numpy.ndarray
        The output array of the train dataset.
    x_test: numpy.ndarray
        The input array of the test dataset.
    y_test: numpy.ndarray
        The output array of the test dataset.
    nb_epochs: int, optional
        The number of epochs.
    batch_size: int, optional
        The batch size.
    learning_rate: float, optional
        The learning rate.
    num_threads: int, optional
        The number of threads used.
    label_smoothing: float, optional
        The amount of label smooting used.
    """

    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}

    session = tf.Session(config=tf.ConfigProto(**config_args))

    img_rows, img_cols, channels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, channels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    train_params = {
        "nb_epochs": nb_epochs,
        "batch_size": batch_size,
        "learning_rate": learning_rate
    }

    eval_params = {"batch_size": batch_size}

    predictions = model.get_logits(x)
    loss = CrossEntropy(model, smoothing=label_smoothing)

    def train_evaluation():
        """
        Prints the performances of the models after each epoch.
        """

        evaluate(session, x, y, predictions, x_train, y_train, x_test, y_test,
                 eval_params)

    train(session,
          loss,
          x_train,
          y_train,
          evaluate=train_evaluation,
          args=train_params,
          var_list=model.get_params())

    with session.as_default():
        save("models/joblibs/" + file_name, model)
Example #9
0
def mnist_tutorial(train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   nb_epochs=NB_EPOCHS,
                   batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE,
                   clean_train=CLEAN_TRAIN,
                   testing=False,
                   preprocess='',
                   backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                   nb_filters=NB_FILTERS,
                   num_threads=None,
                   label_smoothing=0.1):
    """
  MNIST cleverhans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param clean_train: perform normal training on clean examples only
                      before performing adversarial training.
  :param testing: if true, complete an AccuracyReport for unit tests
                  to verify that performance is adequate
  :param backprop_through_attack: If True, backprop through adversarial
                                  example construction process during
                                  adversarial training.
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    ### CHANGE DATASET ###
    # Get MNIST data
    # mnist = MNIST_67(train_start=train_start, train_end=train_end,
    #               test_start=test_start, test_end=test_end)
    # x_train, y_train = mnist.get_set('train')
    # x_test, y_test = mnist.get_set('test')
    x_train, y_train, x_test, y_test = get_MNIST_67_preprocess(
        preprocess=preprocess)
    with open('../pickle/{}_y_train.pickle'.format(FILENAME), 'wb') as handle:
        pickle.dump(y_train, handle)
    with open('../pickle/{}_y_test.pickle'.format(FILENAME), 'wb') as handle:
        pickle.dump(y_test, handle)
    # Use Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_params = {'batch_size': batch_size}
    fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
    rng = np.random.RandomState([2017, 8, 30])

    ### ADD PARAMETERS ###
    def do_eval(preds, x_set, y_set, report_key, is_adv=None):
        acc = model_eval(sess,
                         x,
                         y,
                         preds,
                         x_set,
                         y_set,
                         save_logit=True,
                         filename=FLAGS.filename + "_" + report_key,
                         args=eval_params)
        setattr(report, report_key, acc)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

    if clean_train:
        ### picklable ###
        #model = ModelBasicCNN('model1', nb_classes, nb_filters)
        model = make_basic_picklable_cnn(nb_filters=nb_filters,
                                         nb_classes=nb_classes)
        preds = model.get_logits(x)
        loss = CrossEntropy(model, smoothing=label_smoothing)

        def evaluate():
            do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)

        train(sess,
              loss,
              x_train,
              y_train,
              evaluate=evaluate,
              args=train_params,
              rng=rng,
              var_list=model.get_params())

        #Now, save the graph
        with sess.as_default():
            save("../models/CNN_{}.joblib".format(preprocess), model)

        # Calculate training error
        if testing:
            do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval')

        exit()
        # Initialize the Fast Gradient Sign Method (FGSM) attack object and
        # graph
        fgsm = FastGradientMethod(model, sess=sess)
        adv_x = fgsm.generate(x, **fgsm_params)
        preds_adv = model.get_logits(adv_x)

        # Evaluate the accuracy of the MNIST model on adversarial examples
        do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True)

        # Calculate training error
        if testing:
            do_eval(preds_adv, x_train, y_train, 'train_clean_train_adv_eval')

        print('Repeating the process, using adversarial training')

    # Create a new model and train it to be robust to FastGradientMethod
    ### picklable ###
    #model2 = ModelBasicCNN('model2', nb_classes, nb_filters)
    model2 = make_basic_picklable_cnn(nb_filters=nb_filters,
                                      nb_classes=nb_classes)

    fgsm2 = FastGradientMethod(model2, sess=sess)

    def attack(x):
        return fgsm2.generate(x, **fgsm_params)

    loss2 = CrossEntropy(model2, smoothing=label_smoothing, attack=attack)
    preds2 = model2.get_logits(x)
    adv_x2 = attack(x)

    if not backprop_through_attack:
        # For the fgsm attack used in this tutorial, the attack has zero
        # gradient so enabling this flag does not change the gradient.
        # For some other attacks, enabling this flag increases the cost of
        # training, but gives the defender the ability to anticipate how
        # the atacker will change their strategy in response to updates to
        # the defender's parameters.
        adv_x2 = tf.stop_gradient(adv_x2)
    preds2_adv = model2.get_logits(adv_x2)

    def evaluate2():
        # Accuracy of adversarially trained model on legitimate test inputs
        do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False)
        # Accuracy of the adversarially trained model on adversarial examples
        do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True)

    # Perform and evaluate adversarial training
    train(sess,
          loss2,
          x_train,
          y_train,
          evaluate=evaluate2,
          args=train_params,
          rng=rng,
          var_list=model2.get_params())

    #Now, save the graph
    with sess.as_default():
        save("../models/{}_{}.joblib".format(FILENAME, preprocess), model2)

    # Calculate training errors
    if testing:
        do_eval(preds2, x_train, y_train, 'train_adv_train_clean_eval')
        do_eval(preds2_adv, x_train, y_train, 'train_adv_train_adv_eval')

    return report
Example #10
0
def bundle_examples_with_goal(sess, model, adv_x_list, y, goal, report_path):
    """
  A post-processor version of attack bundling, that chooses the strongest
  example from the output of multiple earlier bundling strategies.

  :param sess: tf.session.Session
  :param model: cleverhans.model.Model
  :param adv_x_list: list of numpy arrays
    Each entry in the list is the output of a previous bundler; it is an
      adversarial version of the whole dataset.
  :param y: numpy array containing true labels
  :param goal: AttackGoal to use to choose the best version of each adversarial
    example
  :param report_path: str, the path the report will be saved to
  """

    # Check the input
    num_attacks = len(adv_x_list)
    assert num_attacks > 0
    adv_x_0 = adv_x_list[0]
    assert isinstance(adv_x_0, np.ndarray)
    assert all(adv_x.shape == adv_x_0.shape for adv_x in adv_x_list)

    # Allocate the output
    out = np.zeros_like(adv_x_0)
    m = adv_x_0.shape[0]
    # Initialize with negative sentinel values to make sure everything is
    # written to
    correctness = -np.ones(m, dtype='int32')
    confidence = -np.ones(m, dtype='float32')

    # Gather criteria
    criteria = [
        goal.get_criteria(sess, model, adv_x, y) for adv_x in adv_x_list
    ]
    assert all('correctness' in c for c in criteria)
    assert all('confidence' in c for c in criteria)
    _logger.info("Accuracy on each advx dataset: ")
    for c in criteria:
        _logger.info("\t" + str(c['correctness'].mean()))

    for example_idx in range(m):
        # Index of the best attack for this example
        attack_idx = 0
        # Find the winner
        for candidate_idx in range(1, num_attacks):
            if goal.new_wins(criteria[attack_idx], example_idx,
                             criteria[candidate_idx], example_idx):
                attack_idx = candidate_idx
        # Copy the winner into the output
        out[example_idx] = adv_x_list[attack_idx][example_idx]
        correctness[example_idx] = criteria[attack_idx]['correctness'][
            example_idx]
        confidence[example_idx] = criteria[attack_idx]['confidence'][
            example_idx]

    assert correctness.min() >= 0
    assert correctness.max() <= 1
    assert confidence.min() >= 0.
    assert confidence.max() <= 1.

    report = {
        'bundled': {
            'correctness': correctness,
            'confidence': confidence
        }
    }
    serial.save(report_path, report)
    assert report_path.endswith('.joblib')
    adv_x_path = report_path[:-len('.joblib')] + "_adv_x.npy"
    np.save(adv_x_path, out)
Example #11
0
def do_train(train_start=TRAIN_START,
             train_end=60000,
             test_start=0,
             test_end=10000,
             nb_epochs=NB_EPOCHS,
             batch_size=BATCH_SIZE,
             learning_rate=LEARNING_RATE,
             backprop_through_attack=False,
             nb_filters=NB_FILTERS,
             num_threads=None,
             use_ema=USE_EMA,
             ema_decay=EMA_DECAY):
    print('Parameters')
    print('-' * 79)
    for x, y in sorted(locals().items()):
        print('%-32s %s' % (x, y))
    print('-' * 79)

    if os.path.exists(FLAGS.save_path):
        print("Model " + FLAGS.save_path +
              " already exists. Refusing to overwrite.")
        quit()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    dataset = MNIST(train_start=train_start,
                    train_end=train_end,
                    test_start=test_start,
                    test_end=test_end,
                    center=True)

    # Use Image Parameters
    img_rows, img_cols, nchannels = dataset.x_train.shape[1:4]
    nb_classes = dataset.NB_CLASSES

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    train_params = {
        'nb_epochs': nb_epochs,
        'learning_rate': learning_rate,
        'batch_size': batch_size,
    }
    eval_params = {'batch_size': batch_size}
    rng = np.random.RandomState([2017, 8, 30])
    sess = tf.Session()

    def do_eval(x_set, y_set, is_adv=None):
        acc = accuracy(sess, model, x_set, y_set)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'clean'
        if report_text:
            print('Accuracy on %s examples: %0.4f' % (report_text, acc))
        return acc

    model = Model(filters=nb_filters)
    model.dataset_factory = dataset.get_factory()

    pgd = ProjectedGradientDescent(model=model, sess=sess)

    center = dataset.kwargs['center']
    value_range = 1. + center
    base_eps = 8. / 255.

    attack_params = {
        'eps': base_eps * value_range,
        'clip_min': -float(center),
        'clip_max': float(center),
        'eps_iter': (2. / 255.) * value_range,
        'nb_iter': 40.
    }

    loss = CrossEntropy(
        model,
        attack=pgd,
        adv_coeff=1.,
        attack_params=attack_params,
    )

    print_test_period = 10
    print_train_period = 50

    def evaluate():
        global epoch
        global last_test_print
        global last_train_print
        global best_result
        global best_epoch
        with sess.as_default():
            print("Saving to ", FLAGS.save_path)
            save(FLAGS.save_path, model)
        if epoch % print_test_period == 0 or time.time(
        ) - last_test_print > 300:
            t1 = time.time()
            result = do_eval(dataset.x_test, dataset.y_test, False)
            t2 = time.time()
            if result >= best_result:
                if result > best_result:
                    best_epoch = epoch
                else:
                    # Keep track of ties
                    assert result == best_result
                    if not isinstance(best_epoch, list):
                        if best_epoch == -1:
                            best_epoch = []
                        else:
                            best_epoch = [best_epoch]
                    best_epoch.append(epoch)
                best_result = result
            print("Best so far: ", best_result)
            print("Best epoch: ", best_epoch)
            last_test_print = t2
            print("Test eval time: ", t2 - t1)
        if (epoch % print_train_period == 0
                or time.time() - last_train_print > 3000):
            t1 = time.time()
            print("Training set: ")
            do_eval(dataset.x_train, dataset.y_train, False)
            t2 = time.time()
            print("Train eval time: ", t2 - t1)
            last_train_print = t2
        epoch += 1

    optimizer = None

    ema_decay = globals()[ema_decay]
    assert callable(ema_decay)

    train(sess,
          loss,
          dataset.x_train,
          dataset.y_train,
          evaluate=evaluate,
          optimizer=optimizer,
          args=train_params,
          rng=rng,
          var_list=model.get_params(),
          use_ema=use_ema,
          ema_decay=ema_decay)
    # Make sure we always evaluate on the last epoch, so pickling bugs are more
    # obvious
    if (epoch - 1) % print_test_period != 0:
        do_eval(dataset.x_test, dataset.y_test, False)
    if (epoch - 1) % print_train_period != 0:
        print("Training set: ")
        do_eval(dataset.x_train, dataset.y_train, False)

    with sess.as_default():
        save(FLAGS.save_path, model)
Example #12
0
def run_batch_with_goal(sess, model, x, y, adv_x_val, criteria, attack_configs,
                        run_counts, goal, report, report_path):
    """
  Runs attack bundling on one batch of data.
  This function is mostly intended to be called by
  `bundle_attacks_with_goal`.

  :param sess: tf.session.Session
  :param model: cleverhans.model.Model
  :param x: numpy array containing clean example inputs to attack
  :param y: numpy array containing true labels
  :param adv_x_val: numpy array containing the adversarial examples made so far
    by earlier work in the bundling process
  :param criteria: dict mapping string names of criteria to numpy arrays with
    their values for each example
    (Different AttackGoals track different criteria)
  :param run_counts: dict mapping AttackConfigs to numpy arrays reporting how
    many times they have been run on each example
  :param goal: the AttackGoal to work on
  :param report: dict, see `bundle_attacks_with_goal`
  :param report_path: str, path to save the report to
  """
    attack_config = goal.get_attack_config(attack_configs, run_counts,
                                           criteria)
    idxs = goal.request_examples(attack_config, criteria, run_counts,
                                 BATCH_SIZE)
    x_batch = x[idxs]
    assert x_batch.shape[0] == BATCH_SIZE
    y_batch = y[idxs]
    assert y_batch.shape[0] == BATCH_SIZE
    adv_x_batch = run_attack(sess, model, x_batch, y_batch,
                             attack_config.attack, attack_config.params,
                             BATCH_SIZE, devices)
    criteria_batch = goal.get_criteria(sess, model, adv_x_batch, y_batch)
    # This can't be parallelized because some orig examples are copied more
    # than once into the batch
    cur_run_counts = run_counts[attack_config]
    for batch_idx, orig_idx in enumerate(idxs):
        cur_run_counts[orig_idx] += 1
        should_copy = goal.new_wins(criteria, orig_idx, criteria_batch,
                                    batch_idx)
        if should_copy:
            adv_x_val[orig_idx] = adv_x_batch[batch_idx]
            for key in criteria:
                criteria[key][orig_idx] = criteria_batch[key][batch_idx]
            assert np.allclose(y[orig_idx], y_batch[batch_idx])
    report['bundled'] = {
        'correctness': criteria['correctness'],
        'confidence': criteria['confidence']
    }

    should_save = False
    new_time = time.time()
    if 'time' in report:
        if new_time - report['time'] > REPORT_TIME_INTERVAL:
            should_save = True
    else:
        should_save = True
    if should_save:
        report['time'] = new_time
        goal.print_progress(criteria, run_counts)
        print_stats(criteria['correctness'], criteria['confidence'], 'bundled')

        serial.save(report_path, report)

        assert report_path.endswith(".joblib")
        adv_x_path = report_path[:-len(".joblib")] + "_adv.npy"
        np.save(adv_x_path, adv_x_val)
Example #13
0
def make_confidence_report(
    filepath,
    train_start=TRAIN_START,
    train_end=TRAIN_END,
    test_start=TEST_START,
    test_end=TEST_END,
    batch_size=BATCH_SIZE,
    which_set=WHICH_SET,
    mc_batch_size=MC_BATCH_SIZE,
    report_path=REPORT_PATH,
    base_eps_iter=BASE_EPS_ITER,
    nb_iter=NB_ITER,
    save_advx=SAVE_ADVX,
):
    """
    Load a saved model, gather its predictions, and save a confidence report.


    This function works by running a single MaxConfidence attack on each example.
    This provides a reasonable estimate of the true failure rate quickly, so
    long as the model does not suffer from gradient masking.
    However, this estimate is mostly intended for development work and not
    for publication. A more accurate estimate may be obtained by running
    make_confidence_report_bundled.py instead.

    :param filepath: path to model to evaluate
    :param train_start: index of first training set example to use
    :param train_end: index of last training set example to use
    :param test_start: index of first test set example to use
    :param test_end: index of last test set example to use
    :param batch_size: size of evaluation batches
    :param which_set: 'train' or 'test'
    :param mc_batch_size: batch size for MaxConfidence attack
    :param base_eps_iter: step size if the data were in [0,1]
      (Step size will be rescaled proportional to the actual data range)
    :param nb_iter: Number of iterations of PGD to run per class
    :param save_advx: bool. If True, saves the adversarial examples to disk.
      On by default, but can be turned off to save memory, etc.
    """

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.INFO)

    # Create TF session
    sess = tf.Session()

    if report_path is None:
        assert filepath.endswith(".joblib")
        report_path = filepath[: -len(".joblib")] + "_report.joblib"

    with sess.as_default():
        model = load(filepath)
    assert len(model.get_params()) > 0
    factory = model.dataset_factory
    factory.kwargs["train_start"] = train_start
    factory.kwargs["train_end"] = train_end
    factory.kwargs["test_start"] = test_start
    factory.kwargs["test_end"] = test_end
    dataset = factory()

    center = dataset.kwargs["center"]
    max_val = dataset.kwargs["max_val"]
    value_range = max_val * (1.0 + center)
    min_value = 0.0 - center * max_val

    if "CIFAR" in str(factory.cls):
        base_eps = 8.0 / 255.0
        if base_eps_iter is None:
            base_eps_iter = 2.0 / 255.0
    elif "MNIST" in str(factory.cls):
        base_eps = 0.3
        if base_eps_iter is None:
            base_eps_iter = 0.1
    else:
        raise NotImplementedError(str(factory.cls))

    mc_params = {
        "eps": base_eps * value_range,
        "eps_iter": base_eps_iter * value_range,
        "nb_iter": nb_iter,
        "clip_min": min_value,
        "clip_max": max_val,
    }

    x_data, y_data = dataset.get_set(which_set)

    report = ConfidenceReport()

    semantic = Semantic(model, center, max_val, sess)
    mc = MaxConfidence(model, sess=sess)

    jobs = [
        ("clean", None, None, None, False),
        ("Semantic", semantic, None, None, False),
        ("mc", mc, mc_params, mc_batch_size, True),
    ]

    for job in jobs:
        name, attack, attack_params, job_batch_size, save_this_job = job
        if job_batch_size is None:
            job_batch_size = batch_size
        t1 = time.time()
        if save_advx and save_this_job:
            # If we want to save the adversarial examples to the filesystem, we need
            # to fetch all of them. Otherwise they're just computed one batch at a
            # time and discarded

            # The path to save to
            assert report_path.endswith(".joblib")
            advx_path = report_path[: -len(".joblib")] + "_advx_" + name + ".npy"

            # Fetch the adversarial examples
            x_data = run_attack(
                sess,
                model,
                x_data,
                y_data,
                attack,
                attack_params,
                batch_size=job_batch_size,
                devices=devices,
            )

            # Turn off the attack so `correctness_and_confidence` won't run it a
            # second time.
            attack = None
            attack_params = None

            # Save the adversarial examples
            np.save(advx_path, x_data)

        # Run correctness and confidence evaluation on adversarial examples
        packed = correctness_and_confidence(
            sess,
            model,
            x_data,
            y_data,
            batch_size=job_batch_size,
            devices=devices,
            attack=attack,
            attack_params=attack_params,
        )
        t2 = time.time()
        print("Evaluation took", t2 - t1, "seconds")
        correctness, confidence = packed

        report[name] = ConfidenceReportEntry(
            correctness=correctness, confidence=confidence
        )

        print_stats(correctness, confidence, name)

    save(report_path, report)
Example #14
0
def mnist_tutorial_jsma(train_start=0,
                        train_end=60000,
                        test_start=0,
                        test_end=10000,
                        viz_enabled=VIZ_ENABLED,
                        nb_epochs=NB_EPOCHS,
                        batch_size=BATCH_SIZE,
                        source_samples=SOURCE_SAMPLES,
                        learning_rate=LEARNING_RATE):
    """
  MNIST tutorial for the Jacobian-based saliency map approach (JSMA)
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param viz_enabled: (boolean) activate plots of adversarial examples
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param nb_classes: number of output classes
  :param source_samples: number of test inputs to attack
  :param learning_rate: learning rate for training
  :return: an AccuracyReport object
  """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session and set as Keras backend session
    #replace
    num_threads = None
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))
    #with sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    mnist = MNIST(train_start=train_start,
                  train_end=train_end,
                  test_start=test_start,
                  test_end=test_end)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]
    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    nb_filters = 64

    # Define TF model graph
    model = make_basic_picklable_cnn()

    preds = model.get_logits(x)
    loss = CrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    dataset = tf.data.Dataset.from_tensor_slices(
        (tf.reshape(x_train, [60000, 28, 28]), y_train))
    dataset = dataset.batch(32)
    val_dataset = tf.data.Dataset.from_tensor_slices(
        (tf.reshape(x_test, [10000, 28, 28]), y_test))
    val_dataset = val_dataset.batch(32)

    sess.run(tf.global_variables_initializer())
    rng = np.random.RandomState([2017, 8, 30])
    if TRAIN_NEW == 1:
        with sess.as_default():
            train(sess, loss, x_train, y_train, args=train_params, rng=rng)
            save("test.joblib", model)
    else:
        with sess.as_default():
            model = load("test.joblib")  #changed
        assert len(model.get_params()) > 0
        preds = model.get_logits(x)
        loss = CrossEntropy(model, smoothing=0.1)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using the Jacobian-based saliency map approach
    ###########################################################################
    print('Crafting ' + str(source_samples) + ' * ' + str(nb_classes - 1) +
          ' adversarial examples')

    # Keep track of success (adversarial example classified in target)
    results = np.zeros((nb_classes, source_samples), dtype='i')

    # Rate of perturbed features for each test set example and target class
    perturbations = np.zeros((nb_classes, source_samples), dtype='f')

    # Initialize our array for grid visualization
    grid_shape = (nb_classes, nb_classes, img_rows, img_cols, nchannels)
    grid_viz_data = np.zeros(grid_shape, dtype='f')

    # Instantiate a SaliencyMapMethod attack object
    jsma = SaliencyMapMethod(model, sess=sess)
    jsma_params = {
        'theta': 1.,
        'gamma': 0.1,
        'clip_min': 0.,
        'clip_max': 1.,
        'y_target': None
    }

    figure = None
    # Loop over the samples we want to perturb into adversarial examples
    seed(SEED)
    for sample_ind in xrange(0, source_samples):
        img = randint(0, 10000)
        print('--------------------------------------')
        print('Attacking input %i/%i' % (sample_ind + 1, source_samples))
        sample = x_test[img:(img +
                             1)]  #sample = x_test[sample_ind:(sample_ind + 1)]

        # We want to find an adversarial example for each possible target class
        # (i.e. all classes that differ from the label given in the dataset)
        current_class = int(np.argmax(
            y_test[img]))  #current_class = int(np.argmax(y_test[sample_ind]))
        target_classes = other_classes(nb_classes, current_class)

        # For the grid visualization, keep original images along the diagonal
        grid_viz_data[current_class, current_class, :, :, :] = np.reshape(
            sample, (img_rows, img_cols, nchannels))
        tn = 0
        totc = 0
        # Loop over all target classes
        for target in target_classes:
            print('Generating adv. example for target class %i' % target)

            # This call runs the Jacobian-based saliency map approach
            one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
            one_hot_target[0, target] = 1
            jsma_params['y_target'] = one_hot_target
            adv_x = jsma.generate_np(sample, **jsma_params)

            # Check if success was achieved
            res = int(model_argmax(sess, x, preds, adv_x) == target)

            # Compute number of modified features
            adv_x_reshape = adv_x.reshape(-1)
            test_in_reshape = x_test[sample_ind].reshape(-1)
            nb_changed = np.where(adv_x_reshape != test_in_reshape)[0].shape[0]
            percent_perturb = float(nb_changed) / adv_x.reshape(-1).shape[0]
            diff = np.array(adv_x - sample)
            #print(np.sum(diff))
            diff = np.reshape(diff, (28, 28))
            diff = diff * 255
            cv2.imwrite("test.png", diff)
            diff = cv2.imread("test.png")
            diff = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY)
            nieghbors = 0
            tc = 0
            for i in range(0, 28, 1):
                for j in range(0, 28, 1):
                    if diff[i, j] > 0:
                        tc = tc + 1
                        totc = totc + 1
                        if i > 0 and i < 27 and j > 0 and j < 27:  #main grid not edges or corners
                            if diff[i - 1, j - 1] > 0:
                                nieghbors = nieghbors + 1
                            if diff[i - 1, j] > 0:
                                nieghbors = nieghbors + 1
                            if diff[i - 1, j + 1] > 0:
                                nieghbors = nieghbors + 1
                            if diff[i, j - 1] > 0:
                                nieghbors = nieghbors + 1
                            if diff[i, j + 1] > 0:
                                nieghbors = nieghbors + 1
                            if diff[i + 1, j - 1] > 0:
                                nieghbors = nieghbors + 1
                            if diff[i + 1, j] > 0:
                                nieghbors = nieghbors + 1
                            if diff[i + 1, j + 1] > 0:
                                nieghbors = nieghbors + 1
                        else:
                            #corners
                            if i == 0 and j == 0:
                                if diff[i, j + 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i + 1, j] > 0:
                                    nieghbors = nieghbors + 1
                            if i == 27 and j == 0:
                                if diff[i, j + 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i - 1, j] > 0:
                                    nieghbors = nieghbors + 1
                            if i == 0 and j == 27:
                                if diff[i, j - 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i + 1, j] > 0:
                                    nieghbors = nieghbors + 1
                            if i == 27 and j == 27:
                                if diff[i, j - 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i - 1, j] > 0:
                                    nieghbors = nieghbors + 1
                            #edges
                            if i == 0 and j > 0 and j < 27:  #left side
                                if diff[i, j - 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i, j + 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i + 1, j - 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i + 1, j] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i + 1, j + 1] > 0:
                                    nieghbors = nieghbors + 1
                            if i == 27 and j > 0 and j < 27:  #right side
                                if diff[i, j - 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i, j + 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i - 1, j - 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i - 1, j] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i - 1, j + 1] > 0:
                                    nieghbors = nieghbors + 1
                            if j == 0 and i > 0 and i < 27:  #top side
                                if diff[i - 1, j] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i + 1, j] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i - 1, j + 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i, j + 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i + 1, j + 1] > 0:
                                    nieghbors = nieghbors + 1
                            if j == 27 and i > 0 and i < 27:  #bot side
                                if diff[i - 1, j] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i + 1, j] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i - 1, j - 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i, j - 1] > 0:
                                    nieghbors = nieghbors + 1
                                if diff[i + 1, j - 1] > 0:
                                    nieghbors = nieghbors + 1

            # print(tc)
            # print(nieghbors)
            tn = tn + nieghbors
            # if tc > 0:
            # print(nieghbors/tc)
            # Display the original and adversarial images side-by-side
            if viz_enabled:
                figure = pair_visual(
                    np.reshape(sample, (img_rows, img_cols, nchannels)),
                    np.reshape(adv_x, (img_rows, img_cols, nchannels)), figure)
            # Add our adversarial example to our grid data
            grid_viz_data[target, current_class, :, :, :] = np.reshape(
                adv_x, (img_rows, img_cols, nchannels))

            # Update the arrays for later analysis
            results[target, sample_ind] = res
            perturbations[target, sample_ind] = percent_perturb
            #print(perturbations[target, sample_ind])

    print('--------------------------------------')

    print("average neighbors per modified pixel ", tn / totc)
    # Compute the number of adversarial examples that were successfully found
    nb_targets_tried = ((nb_classes - 1) * source_samples)
    succ_rate = float(np.sum(results)) / nb_targets_tried
    print('Avg. rate of successful adv. examples {0:.8f}'.format(succ_rate))
    report.clean_train_adv_eval = 1. - succ_rate

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(perturbations)

    s = perturbations.shape
    myPert = np.empty(0)
    myResults = np.empty(0)
    for i in range(s[0]):
        for j in range(s[1]):
            if perturbations[i][j] > 0:
                myPert = np.append(myPert, perturbations[i][j])
                myResults = np.append(myResults, results[i][j])
    min_perturbed = np.min(myPert)
    max_perturbed = np.max(myPert)

    s2 = myResults.shape
    final = np.empty(0)
    for i in range(s2[0]):
        if myResults[i] > 0:
            final = np.append(final, myPert[i])

    print('Avg. rate of perturbed features {0:.8f}'.format(percent_perturbed))
    print('MIN of perturbed features {0:.8f}'.format(min_perturbed))
    print('MAX of perturbed features {0:.8f}'.format(max_perturbed))

    # Compute the average distortion introduced for successful samples only
    percent_perturb_succ = np.mean(perturbations * (results == 1))
    min_perturb_succ = np.min(final)
    max_perturb_succ = np.max(final)
    print('Avg. rate of perturbed features for successful '
          'adversarial examples {0:.8f}'.format(percent_perturb_succ))
    print('Min of perturbed features for successful '
          'adversarial examples {0:.8f}'.format(min_perturb_succ))
    print('Max of perturbed features for successful '
          'adversarial examples {0:.8f}'.format(max_perturb_succ))

    #Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        import matplotlib.pyplot as plt
        plt.close(figure)
        _ = grid_visual(grid_viz_data)

    return report
Example #15
0
def train_sub(sess,
              x,
              y,
              bbox_preds,
              x_sub,
              y_sub,
              nb_classes,
              nb_epochs_s,
              batch_size,
              learning_rate,
              data_aug,
              lmbda,
              aug_batch_size,
              rng,
              img_rows=28,
              img_cols=28,
              nchannels=1,
              preprocess=''):
    """
  This function creates the substitute by alternatively
  augmenting the training data and training the substitute.
  :param sess: TF session
  :param x: input TF placeholder
  :param y: output TF placeholder
  :param bbox_preds: output of black-box model predictions
  :param x_sub: initial substitute training data
  :param y_sub: initial substitute training labels
  :param nb_classes: number of output classes
  :param nb_epochs_s: number of epochs to train substitute model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param data_aug: number of times substitute training data is augmented
  :param lmbda: lambda from arxiv.org/abs/1602.02697
  :param rng: numpy.random.RandomState instance
  :return:
  """
    # Define TF model graph (for the black-box model)
    #model_sub = ModelSubstitute('model_s', nb_classes)
    model_sub = make_basic_picklable_substitute(nb_classes=nb_classes)
    preds_sub = model_sub.get_logits(x)
    loss_sub = CrossEntropy(model_sub, smoothing=0)

    print("Defined TensorFlow model graph for the substitute.")

    # Define the Jacobian symbolically using TensorFlow
    grads = jacobian_graph(preds_sub, x, nb_classes)

    # Train the substitute and augment dataset alternatively
    for rho in xrange(data_aug):
        print("Substitute training epoch #" + str(rho))
        train_params = {
            'nb_epochs': nb_epochs_s,
            'batch_size': batch_size,
            'learning_rate': learning_rate
        }
        with TemporaryLogLevel(logging.WARNING, "cleverhans.utils.tf"):
            train(sess,
                  loss_sub,
                  x_sub,
                  to_categorical(y_sub, nb_classes),
                  init_all=False,
                  args=train_params,
                  rng=rng,
                  var_list=model_sub.get_params())

        # If we are not at last substitute training iteration, augment dataset
        if rho < data_aug - 1:
            print("Augmenting substitute training data.")
            # Perform the Jacobian augmentation
            lmbda_coef = 2 * int(int(rho / 3) != 0) - 1
            x_sub = jacobian_augmentation(sess, x, x_sub, y_sub, grads,
                                          lmbda_coef * lmbda, aug_batch_size)

            print("Labeling substitute training data.")
            # Label the newly generated synthetic points using the black-box
            y_sub = np.hstack([y_sub, y_sub])
            x_sub_prev = x_sub[int(len(x_sub) / 2):]
            eval_params = {'batch_size': batch_size}
            bbox_val = batch_eval(sess, [x], [bbox_preds], [x_sub_prev],
                                  args=eval_params)[0]
            # Note here that we take the argmax because the adversary
            # only has access to the label (not the probabilities) output
            # by the black-box model
            y_sub[int(len(x_sub) / 2):] = np.argmax(bbox_val, axis=1)

    #Now, save the graph
    print("save model")
    with sess.as_default():
        save("../models/{}_{}.joblib".format(FILENAME, preprocess), model_sub)
    return model_sub, preds_sub
Example #16
0
File: train.py Project: ATPGN/ATPGN
def train_with_PGN(sess, model, loss, train_type='naive', evaluate=None, args=None,
          rng=None, classifier_var_list=None, generator_var_list=None, save_dir=None,
          fprop_args=None, optimizer=None, use_ema=False, ema_decay=.998,
          loss_threshold=1e10, dataset_train=None, dataset_size=None):
  """
  Run (optionally multi-replica, synchronous) training to minimize `loss`
  :param sess: TF session to use when training the graph
  :param loss: tensor, the loss to minimize
  :param evaluate: function that is run after each training iteration
                   (typically to display the test/validation accuracy).
  :param args: dict or argparse `Namespace` object.
               Should contain `nb_epochs`, `learning_rate`,
               `batch_size`
  :param rng: Instance of numpy.random.RandomState
  :param var_list: Optional list of parameters to train.
  :param fprop_args: dict, extra arguments to pass to fprop (loss and model).
  :param optimizer: Optimizer to be used for training
  :param use_ema: bool
      If true, uses an exponential moving average of the model parameters
  :param ema_decay: float or callable
      The decay parameter for EMA, if EMA is used
      If a callable rather than a float, this is a callable that takes
      the epoch and batch as arguments and returns the ema_decay for
      the current batch.
  :param loss_threshold: float
      Raise an exception if the loss exceeds this value.
      This is intended to rapidly detect numerical problems.
      Sometimes the loss may legitimately be higher than this value. In
      such cases, raise the value. If needed it can be np.inf.
  :param dataset_train: tf Dataset instance.
      Used as a replacement for x_train, y_train for faster performance.
    :param dataset_size: integer, the size of the dataset_train.
  :return: True if model trained
  """

  # Check whether the hardware is working correctly
  canary.run_canary()
  args = _ArgsWrapper(args or {})
  fprop_args = fprop_args or {}

  # Check that necessary arguments were given (see doc above)
  # Be sure to support 0 epochs for debugging purposes
  if args.nb_epochs is None:
    raise ValueError("`args` must specify number of epochs")
  if optimizer is None:
    if args.learning_rate is None:
      raise ValueError("Learning rate was not given in args dict")
  assert args.batch_size, "Batch size was not given in args dict"
  assert dataset_train and dataset_size, "dataset_train or dataset_size was not given"

  if rng is None:
    rng = np.random.RandomState()

  if optimizer is None:
    optimizer = tf.train.AdamOptimizer(learning_rate = args.learning_rate)
  else:
    if not isinstance(optimizer, tf.train.Optimizer):
      raise ValueError("optimizer object must be from a child class of "
                       "tf.train.Optimizer")

  grads_classifier = []
  if train_type == 'PGN':
    grads_generator = []
  xs = []
  ys = []
  data_iterator = dataset_train.make_one_shot_iterator().get_next()
  x_train, y_train = sess.run(data_iterator)

  devices = infer_devices()
  for device in devices:
    with tf.device(device):
      x = tf.placeholder(x_train.dtype, (None,) + x_train.shape[1:])
      y = tf.placeholder(y_train.dtype, (None,) + y_train.shape[1:])
      xs.append(x)
      ys.append(y)
      if train_type == 'PGN':
        loss_classifier, loss_generator = loss.fprop(x, y, **fprop_args)
      else:
        loss_classifier = loss.fprop(x, y, **fprop_args)
      grads_classifier.append(optimizer.compute_gradients(loss_classifier, var_list=classifier_var_list))
      if train_type == 'PGN':
        grads_generator.append(optimizer.compute_gradients(loss_generator, var_list=generator_var_list))

  num_devices = len(devices)
  print("num_devices: ", num_devices)

  grad_classifier = avg_grads(grads_classifier)
  if train_type == 'PGN':
    grad_generator = avg_grads(grads_generator)
  # Trigger update operations within the default graph (such as batch_norm).
  with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
    train_step = optimizer.apply_gradients(grad_classifier)
    if train_type == 'PGN':
      with tf.control_dependencies([train_step]):
        train_step = optimizer.apply_gradients(grad_generator)

  var_list = classifier_var_list
  if train_type == 'PGN':
    var_list += generator_var_list
  if use_ema:
    ema = tf.train.ExponentialMovingAverage(decay=ema_decay)
    with tf.control_dependencies([train_step]):
      train_step = ema.apply(var_list)
    # Get pointers to the EMA's running average variables
    avg_params = [ema.average(param) for param in var_list]
    # Make temporary buffers used for swapping the live and running average
    # parameters
    tmp_params = [tf.Variable(param, trainable=False)
                  for param in var_list]
    # Define the swapping operation
    param_to_tmp = [tf.assign(tmp, param)
                    for tmp, param in safe_zip(tmp_params, var_list)]
    with tf.control_dependencies(param_to_tmp):
      avg_to_param = [tf.assign(param, avg)
                      for param, avg in safe_zip(var_list, avg_params)]
    with tf.control_dependencies(avg_to_param):
      tmp_to_avg = [tf.assign(avg, tmp)
                    for avg, tmp in safe_zip(avg_params, tmp_params)]
    swap = tmp_to_avg

  batch_size = args.batch_size

  assert batch_size % num_devices == 0
  device_batch_size = batch_size // num_devices

  sess.run(tf.global_variables_initializer())
  best_acc = 0.0

  for epoch in xrange(args.nb_epochs):
    nb_batches = int(math.ceil(float(dataset_size) / batch_size))
    prev = time.time()
    for batch in range(nb_batches):
      x_train_shuffled, y_train_shuffled = sess.run(data_iterator)
      start, end = 0, batch_size
      feed_dict = dict()
      for dev_idx in xrange(num_devices):
        cur_start = start + dev_idx * device_batch_size
        cur_end = start + (dev_idx + 1) * device_batch_size
        feed_dict[xs[dev_idx]] = x_train_shuffled[cur_start:cur_end]
        feed_dict[ys[dev_idx]] = y_train_shuffled[cur_start:cur_end]

      
      _, loss_classifier_numpy = sess.run([train_step, loss_classifier], feed_dict=feed_dict)

      if np.abs(loss_classifier_numpy) > loss_threshold:
        raise ValueError("Extreme loss_classifier during training: ", loss_classifier_numpy)
      if np.isnan(loss_classifier_numpy) or np.isinf(loss_classifier_numpy):
        raise ValueError("NaN/Inf loss_classifier during training")
    cur = time.time()
    _logger.info("Epoch " + str(epoch) + " took " +
                 str(cur - prev) + " seconds")
    if evaluate is not None:
      if use_ema:
        sess.run(swap)
      r_value = evaluate(epoch)

      if use_ema:
        sess.run(swap)
  if use_ema:
    sess.run(swap)

  with sess.as_default():
    save_path = os.path.join(save_dir,'model.joblib')
    save(save_path, model)

  return True
def mnist_tutorial(train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   nb_epochs=NB_EPOCHS,
                   batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE,
                   clean_train=CLEAN_TRAIN,
                   testing=False,
                   backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                   nb_filters=NB_FILTERS,
                   num_threads=None,
                   label_smoothing=0.1):
    """
  MNIST cleverhans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param clean_train: perform normal training on clean examples only
                      before performing adversarial training.
  :param testing: if true, complete an AccuracyReport for unit tests
                  to verify that performance is adequate
  :param backprop_through_attack: If True, backprop through adversarial
                                  example construction process during
                                  adversarial training.
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get MNIST test data
    mnist = MNIST(train_start=train_start,
                  train_end=train_end,
                  test_start=test_start,
                  test_end=test_end)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_params = {'batch_size': batch_size}
    fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
    rng = np.random.RandomState([2017, 8, 30])

    def do_eval(preds, x_set, y_set, report_key, is_adv=None):
        acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        setattr(report, report_key, acc)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

    if clean_train:
        model = make_basic_picklable_cnn()
        # Tag the model so that when it is saved to disk, future scripts will
        # be able to tell what data it was trained on
        model.dataset_factory = mnist.get_factory()
        preds = model.get_logits(x)
        assert len(model.get_params()) > 0
        loss = CrossEntropy(model, smoothing=label_smoothing)

        def evaluate():
            do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)

        train(sess,
              loss,
              x_train,
              y_train,
              evaluate=evaluate,
              args=train_params,
              rng=rng,
              var_list=model.get_params())

        with sess.as_default():
            save("clean_model.joblib", model)

            print("Now that the model has been saved, you can evaluate it in a"
                  " separate process using `evaluate_pickled_model.py`. "
                  "You should get exactly the same result for both clean and "
                  "adversarial accuracy as you get within this program.")

        # Calculate training error
        if testing:
            do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval')

        # Initialize the Fast Gradient Sign Method (FGSM) attack object and
        # graph
        fgsm = FastGradientMethod(model, sess=sess)
        adv_x = fgsm.generate(x, **fgsm_params)
        preds_adv = model.get_logits(adv_x)

        # Evaluate the accuracy of the MNIST model on adversarial examples
        do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True)

        # Calculate training error
        if testing:
            do_eval(preds_adv, x_train, y_train, 'train_clean_train_adv_eval')

        print('Repeating the process, using adversarial training')

    # Create a new model and train it to be robust to FastGradientMethod
    model2 = make_basic_picklable_cnn()
    # Tag the model so that when it is saved to disk, future scripts will
    # be able to tell what data it was trained on
    model2.dataset_factory = mnist.get_factory()
    fgsm2 = FastGradientMethod(model2, sess=sess)

    def attack(x):
        return fgsm2.generate(x, **fgsm_params)

    loss2 = CrossEntropy(model2, smoothing=label_smoothing, attack=attack)
    preds2 = model2.get_logits(x)
    adv_x2 = attack(x)

    if not backprop_through_attack:
        # For the fgsm attack used in this tutorial, the attack has zero
        # gradient so enabling this flag does not change the gradient.
        # For some other attacks, enabling this flag increases the cost of
        # training, but gives the defender the ability to anticipate how
        # the atacker will change their strategy in response to updates to
        # the defender's parameters.
        adv_x2 = tf.stop_gradient(adv_x2)
    preds2_adv = model2.get_logits(adv_x2)

    def evaluate2():
        # Accuracy of adversarially trained model on legitimate test inputs
        do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False)
        # Accuracy of the adversarially trained model on adversarial examples
        do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True)

    # Perform and evaluate adversarial training
    train(sess,
          loss2,
          x_train,
          y_train,
          evaluate=evaluate2,
          args=train_params,
          rng=rng,
          var_list=model2.get_params())

    with sess.as_default():
        save("adv_model.joblib", model2)
        print(
            "Now that the model has been saved, you can evaluate it in a "
            "separate process using "
            "`python evaluate_pickled_model.py adv_model.joblib`. "
            "You should get exactly the same result for both clean and "
            "adversarial accuracy as you get within this program."
            " You can also move beyond the tutorials directory and run the "
            " real `compute_accuracy.py` script (make sure cleverhans/scripts "
            "is in your PATH) to see that this FGSM-trained "
            "model is actually not very robust---it's just a model that trains "
            " quickly so the tutorial does not take a long time")

    # Calculate training errors
    if testing:
        do_eval(preds2, x_train, y_train, 'train_adv_train_clean_eval')
        do_eval(preds2_adv, x_train, y_train, 'train_adv_train_adv_eval')

    return report
Example #18
0
def compute_geodesic_matrices():
    mnist = MNIST(train_start=0,
                  train_end=FLAGS.nb_train,
                  test_start=0,
                  test_end=1000)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    # Use Image Parameters.
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    with get_tensorflow_session() as sess:
        with tf.variable_scope('dknn'):
            tf.set_random_seed(FLAGS.seed)
            np.random.seed(int(FLAGS.seed))

            # Define input TF placeholder.
            x = tf.placeholder(tf.float32,
                               shape=(None, img_rows, img_cols, nchannels))
            y = tf.placeholder(tf.float32, shape=(None, nb_classes))

            # Define a model.
            model = make_basic_picklable_cnn()
            preds = model.get_logits(x)
            loss = CrossEntropy(model, smoothing=0.)

            # Define the test set accuracy evaluation.
            def evaluate():
                acc = model_eval(sess,
                                 x,
                                 y,
                                 preds,
                                 x_test,
                                 y_test,
                                 args={'batch_size': FLAGS.batch_size})
                print('Test accuracy on test examples: %0.4f' % acc)

            # Train the model
            train_params = {
                'nb_epochs': FLAGS.nb_epochs,
                'batch_size': FLAGS.batch_size,
                'learning_rate': FLAGS.lr
            }

            model_filepath = "../data/model.joblib"
            path = Path(model_filepath)

            if path.is_file():
                model = serial.load(model_filepath)
            else:
                train(sess,
                      loss,
                      x_train,
                      y_train,
                      evaluate=evaluate,
                      args=train_params,
                      var_list=model.get_params())
                serial.save(model_filepath, model)

            # Define callable that returns a dictionary of all activations for a dataset
            def get_activations(data):
                data_activations = {}
                for layer in layers:
                    layer_sym = tf.layers.flatten(model.get_layer(x, layer))
                    data_activations[layer] = batch_eval(
                        sess, [x], [layer_sym], [data],
                        args={'batch_size': FLAGS.batch_size})[0]
                return data_activations

            # Use a holdout of the test set to simulate calibration data for the DkNN.
            train_data = x_train
            train_labels = np.argmax(y_train, axis=1)
            cali_data = x_test[:FLAGS.nb_cali]
            y_cali = y_test[:FLAGS.nb_cali]
            cali_labels = np.argmax(y_cali, axis=1)
            test_data = x_test[FLAGS.nb_cali:]
            y_test = y_test[FLAGS.nb_cali:]

            # Extract representations for the training and calibration data at each layer of interest to the DkNN.
            layers = ['ReLU1', 'ReLU3', 'ReLU5', 'logits']

            # Wrap the model into a DkNNModel
            dknn = DkNNModel(FLAGS.neighbors,
                             layers,
                             get_activations,
                             train_data,
                             train_labels,
                             nb_classes,
                             scope='dknn')

    # Compute matrix for each layer
    geodesic_matrices = []
    for layer in layers:
        print(layer)
        activations = dknn.train_activations[layer]
        geodesic_matrix = hard_geodesics_euclidean_kernel(
            activations, FLAGS.proto_neighbors)
        geodesic_matrices.append(geodesic_matrix)

    matrix_path = '../results/geodesic_matrices_' + str(
        FLAGS.nb_train) + '_' + str(FLAGS.proto_neighbors) + '.pkl'
    with open(matrix_path, 'wb') as f:
        pickle.dump(geodesic_matrices, f)

    return True