Beispiel #1
0
def main(_):
    # Images for inception classifier are normalized to be in [-1, 1] interval,
    # eps is a difference between pixels so it should be in [0, 2] interval.
    # Renormalizing epsilon from [0, 255] to [0, 2].
    eps = 2.0 * FLAGS.max_epsilon / 255.0
    batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3]
    num_classes = 1001

    tf.logging.set_verbosity(tf.logging.INFO)

    with tf.Graph().as_default():
        # Prepare graph
        x_input = tf.placeholder(tf.float32, shape=batch_shape)

        model = InceptionModel(num_classes)

        fgsm = FastGradientMethod(model)
        x_adv = fgsm.generate(x_input, eps=eps, clip_min=-1., clip_max=1.)

        # Run computation
        saver = tf.train.Saver(slim.get_model_variables())
        session_creator = tf.train.ChiefSessionCreator(
            scaffold=tf.train.Scaffold(saver=saver),
            checkpoint_filename_with_path=FLAGS.checkpoint_path,
            master=FLAGS.master)

        with tf.train.MonitoredSession(
                session_creator=session_creator) as sess:
            for filenames, images in load_images(FLAGS.input_dir, batch_shape):
                adv_images = sess.run(x_adv, feed_dict={x_input: images})
                save_images(adv_images, filenames, FLAGS.output_dir)
Beispiel #2
0
 def test_generate_respects_dtype(self):
     self.attack = FastGradientMethod(self.model,
                                      sess=self.sess,
                                      dtypestr='float64')
     x = tf.placeholder(dtype=tf.float64, shape=(100, 2))
     x_adv = self.attack.generate(x)
     self.assertEqual(x_adv.dtype, tf.float64)
Beispiel #3
0
def get_logits_over_interval(sess,
                             model,
                             x_data,
                             fgsm_params,
                             min_epsilon=-10.,
                             max_epsilon=10.,
                             num_points=21):
    """Get logits when the input is perturbed in an interval in adv direction.

  Args:
      sess: Tf session
      model: Model for which we wish to get logits.
      x_data: Numpy array corresponding to single data.
              point of shape [height, width, channels].
      fgsm_params: Parameters for generating adversarial examples.
      min_epsilon: Minimum value of epsilon over the interval.
      max_epsilon: Maximum value of epsilon over the interval.
      num_points: Number of points used to interpolate.

  Returns:
      Numpy array containing logits.

  Raises:
      ValueError if min_epsilon is larger than max_epsilon.
  """
    # Get the height, width and number of channels
    height = x_data.shape[0]
    width = x_data.shape[1]
    channels = x_data.shape[2]
    size = height * width * channels

    x_data = np.expand_dims(x_data, axis=0)
    import tensorflow as tf
    from cleverhans_copy.attacks import FastGradientMethod

    # Define the data placeholder
    x = tf.placeholder(dtype=tf.float32,
                       shape=[1, height, width, channels],
                       name='x')
    # Define adv_x
    fgsm = FastGradientMethod(model, sess=sess)
    adv_x = fgsm.generate(x, **fgsm_params)

    if min_epsilon > max_epsilon:
        raise ValueError('Minimum epsilon is less than maximum epsilon')

    eta = tf.nn.l2_normalize(adv_x - x, dim=0)
    epsilon = tf.reshape(
        tf.lin_space(float(min_epsilon), float(max_epsilon), num_points),
        (num_points, 1, 1, 1))
    lin_batch = x + epsilon * eta
    logits = model.get_logits(lin_batch)
    with sess.as_default():
        log_prob_adv_array = sess.run(logits, feed_dict={x: x_data})
    return log_prob_adv_array
Beispiel #4
0
    def test_feature_pairing(self):
        fgsm = FastGradientMethod(self.model)
        attack = lambda x: fgsm.generate(x)
        loss = FeaturePairing(self.model, weight=0.1, attack=attack)
        l = loss.fprop(self.x, self.y)
        with tf.Session() as sess:
            vl1 = sess.run(l, feed_dict={self.x: self.vx, self.y: self.vy})
            vl2 = sess.run(l, feed_dict={self.x: self.vx, self.y: self.vy})
        self.assertClose(vl1, sum([4.296023369, 2.963884830]) / 2., atol=1e-6)
        self.assertClose(vl2, sum([4.296023369, 2.963884830]) / 2., atol=1e-6)

        loss = FeaturePairing(self.model, weight=10., attack=attack)
        l = loss.fprop(self.x, self.y)
        with tf.Session() as sess:
            vl1 = sess.run(l, feed_dict={self.x: self.vx, self.y: self.vy})
            vl2 = sess.run(l, feed_dict={self.x: self.vx, self.y: self.vy})
        self.assertClose(vl1, sum([4.333082676, 3.00094414]) / 2., atol=1e-6)
        self.assertClose(vl2, sum([4.333082676, 3.00094414]) / 2., atol=1e-6)
def evaluate_model(filepath,
                   train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   batch_size=128,
                   testing=False,
                   num_threads=None):
    """
  Run evaluation on a saved model
  :param filepath: path to model to evaluate
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param batch_size: size of evaluation batches
  """

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.INFO)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get MNIST test data
    x_train, y_train, x_test, y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)
    # Use Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    eval_params = {'batch_size': batch_size}
    fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}

    def do_eval(preds, x_set, y_set, report_key, is_adv=None):
        acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

    with sess.as_default():
        model = load(filepath)
    assert len(model.get_params()) > 0

    # Initialize the Fast Gradient Sign Method (FGSM) attack object and
    # graph
    fgsm = FastGradientMethod(model, sess=sess)
    adv_x = fgsm.generate(x, **fgsm_params)
    preds_adv = model.get_logits(adv_x)
    preds = model.get_logits(x)

    # Evaluate the accuracy of the MNIST model on adversarial examples
    do_eval(preds, x_test, y_test, 'train_clean_train_clean_eval', False)
    do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True)
Beispiel #6
0
def mnist_blackbox(train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   nb_classes=NB_CLASSES,
                   batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE,
                   nb_epochs=NB_EPOCHS,
                   holdout=HOLDOUT,
                   data_aug=DATA_AUG,
                   nb_epochs_s=NB_EPOCHS_S,
                   lmbda=LMBDA,
                   aug_batch_size=AUG_BATCH_SIZE):
    """
  MNIST tutorial for the black-box attack from arxiv.org/abs/1602.02697
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :return: a dictionary with:
           * black-box model accuracy on test set
           * substitute model accuracy on test set
           * black-box model accuracy on adversarial examples transferred
             from the substitute model
  """

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Dictionary used to keep track and return key accuracies
    accuracies = {}

    # Perform tutorial setup
    assert setup_tutorial()

    # Create TF session
    sess = tf.Session()

    # Get MNIST data
    x_train, y_train, x_test, y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)
    # Initialize substitute training set reserved for adversary
    x_sub = x_test[:holdout]
    y_sub = np.argmax(y_test[:holdout], axis=1)

    # Redefine test set as remaining samples unavailable to adversaries
    x_test = x_test[holdout:]
    y_test = y_test[holdout:]

    # Obtain Image parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Seed random number generator so tutorial is reproducible
    rng = np.random.RandomState([2017, 8, 30])

    # Simulate the black-box model locally
    # You could replace this by a remote labeling API for instance
    print("Preparing the black-box model.")
    prep_bbox_out = prep_bbox(sess, x, y, x_train, y_train, x_test, y_test,
                              nb_epochs, batch_size, learning_rate, rng,
                              nb_classes, img_rows, img_cols, nchannels)
    model, bbox_preds, accuracies['bbox'] = prep_bbox_out

    # Train substitute using method from https://arxiv.org/abs/1602.02697
    print("Training the substitute model.")
    train_sub_out = train_sub(sess, x, y, bbox_preds, x_sub, y_sub, nb_classes,
                              nb_epochs_s, batch_size, learning_rate, data_aug,
                              lmbda, aug_batch_size, rng, img_rows, img_cols,
                              nchannels)
    model_sub, preds_sub = train_sub_out

    # Evaluate the substitute model on clean test examples
    eval_params = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds_sub, x_test, y_test, args=eval_params)
    accuracies['sub'] = acc

    # Initialize the Fast Gradient Sign Method (FGSM) attack object.
    fgsm_par = {'eps': 0.3, 'ord': np.inf, 'clip_min': 0., 'clip_max': 1.}
    fgsm = FastGradientMethod(model_sub, sess=sess)

    # Craft adversarial examples using the substitute
    eval_params = {'batch_size': batch_size}
    x_adv_sub = fgsm.generate(x, **fgsm_par)

    # Evaluate the accuracy of the "black-box" model on adversarial examples
    accuracy = model_eval(sess,
                          x,
                          y,
                          model.get_logits(x_adv_sub),
                          x_test,
                          y_test,
                          args=eval_params)
    print('Test accuracy of oracle on adversarial examples generated '
          'using the substitute: ' + str(accuracy))
    accuracies['bbox_on_sub_adv_ex'] = accuracy

    return accuracies
def mnist_tutorial(train_start=0, train_end=60000, test_start=0,
                   test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE,
                   clean_train=CLEAN_TRAIN,
                   testing=False,
                   backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                   nb_filters=NB_FILTERS, num_threads=None,
                   label_smoothing=0.1):
  """
  MNIST cleverhans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param clean_train: perform normal training on clean examples only
                      before performing adversarial training.
  :param testing: if true, complete an AccuracyReport for unit tests
                  to verify that performance is adequate
  :param backprop_through_attack: If True, backprop through adversarial
                                  example construction process during
                                  adversarial training.
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """

  # Object used to keep track of (and return) key accuracies
  report = AccuracyReport()

  # Set TF random seed to improve reproducibility
  tf.set_random_seed(1234)

  # Set logging level to see debug information
  set_log_level(logging.DEBUG)

  # Create TF session
  if num_threads:
    config_args = dict(intra_op_parallelism_threads=1)
  else:
    config_args = {}
  sess = tf.Session(config=tf.ConfigProto(**config_args))

  # Get MNIST test data
  x_train, y_train, x_test, y_test = data_mnist(train_start=train_start,
                                                train_end=train_end,
                                                test_start=test_start,
                                                test_end=test_end)
  # Use Image Parameters
  img_rows, img_cols, nchannels = x_train.shape[1:4]
  nb_classes = y_train.shape[1]

  # Define input TF placeholder
  x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
  y = tf.placeholder(tf.float32, shape=(None, nb_classes))

  # Train an MNIST model
  train_params = {
      'nb_epochs': nb_epochs,
      'batch_size': batch_size,
      'learning_rate': learning_rate
  }
  eval_params = {'batch_size': batch_size}
  fgsm_params = {
      'eps': 0.3,
      'clip_min': 0.,
      'clip_max': 1.
  }
  rng = np.random.RandomState([2017, 8, 30])

  def do_eval(preds, x_set, y_set, report_key, is_adv=None):
    acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
    setattr(report, report_key, acc)
    if is_adv is None:
      report_text = None
    elif is_adv:
      report_text = 'adversarial'
    else:
      report_text = 'legitimate'
    if report_text:
      print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

  if clean_train:
    model = ModelBasicCNN('model1', nb_classes, nb_filters)
    preds = model.get_logits(x)
    loss = CrossEntropy(model, smoothing=label_smoothing)

    def evaluate():
      do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)

    train(sess, loss, x, y, x_train, y_train, evaluate=evaluate,
          args=train_params, rng=rng, var_list=model.get_params())

    # Calculate training error
    if testing:
      do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval')

    # Initialize the Fast Gradient Sign Method (FGSM) attack object and
    # graph
    fgsm = FastGradientMethod(model, sess=sess)
    adv_x = fgsm.generate(x, **fgsm_params)
    preds_adv = model.get_logits(adv_x)

    # Evaluate the accuracy of the MNIST model on adversarial examples
    do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True)

    # Calculate training error
    if testing:
      do_eval(preds_adv, x_train, y_train, 'train_clean_train_adv_eval')

    print('Repeating the process, using adversarial training')

  # Create a new model and train it to be robust to FastGradientMethod
  model2 = ModelBasicCNN('model2', nb_classes, nb_filters)
  fgsm2 = FastGradientMethod(model2, sess=sess)

  def attack(x):
    return fgsm2.generate(x, **fgsm_params)

  loss2 = CrossEntropy(model2, smoothing=label_smoothing, attack=attack)
  preds2 = model2.get_logits(x)
  adv_x2 = attack(x)

  if not backprop_through_attack:
    # For the fgsm attack used in this tutorial, the attack has zero
    # gradient so enabling this flag does not change the gradient.
    # For some other attacks, enabling this flag increases the cost of
    # training, but gives the defender the ability to anticipate how
    # the atacker will change their strategy in response to updates to
    # the defender's parameters.
    adv_x2 = tf.stop_gradient(adv_x2)
  preds2_adv = model2.get_logits(adv_x2)

  def evaluate2():
    # Accuracy of adversarially trained model on legitimate test inputs
    do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False)
    # Accuracy of the adversarially trained model on adversarial examples
    do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True)

  # Perform and evaluate adversarial training
  train(sess, loss2, x, y, x_train, y_train, evaluate=evaluate2,
        args=train_params, rng=rng, var_list=model2.get_params())

  # Calculate training errors
  if testing:
    do_eval(preds2, x_train, y_train, 'train_adv_train_clean_eval')
    do_eval(preds2_adv, x_train, y_train, 'train_adv_train_adv_eval')

  return report
Beispiel #8
0
def main(argv):

  model_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)

  if model_file is None:
    print('No model found')
    sys.exit()

  cifar = cifar10_input.CIFAR10Data(FLAGS.dataset_dir)

  nb_classes = 10
  X_test = cifar.eval_data.xs
  Y_test = to_categorical(cifar.eval_data.ys, nb_classes)
  assert Y_test.shape[1] == 10.

  set_log_level(logging.DEBUG)

  with tf.Session() as sess:

    x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    from madry_cifar10_model import make_madry_wresnet
    model = make_madry_wresnet()

    saver = tf.train.Saver()

    # Restore the checkpoint
    saver.restore(sess, model_file)

    nb_samples = FLAGS.nb_samples

    attack_params = {'batch_size': FLAGS.batch_size,
                     'clip_min': 0., 'clip_max': 255.}

    if FLAGS.attack_type == 'cwl2':
      from cleverhans_copy.attacks import CarliniWagnerL2
      attacker = CarliniWagnerL2(model, sess=sess)
      attack_params.update({'binary_search_steps': 1,
                            'max_iterations': 100,
                            'learning_rate': 0.1,
                            'initial_const': 10,
                            'batch_size': 10
                            })

    else:  # eps and eps_iter in range 0-255
      attack_params.update({'eps': 8, 'ord': np.inf})
      if FLAGS.attack_type == 'fgsm':
        from cleverhans_copy.attacks import FastGradientMethod
        attacker = FastGradientMethod(model, sess=sess)

      elif FLAGS.attack_type == 'pgd':
        attack_params.update({'eps_iter': 2, 'nb_iter': 20})
        from cleverhans_copy.attacks import MadryEtAl
        attacker = MadryEtAl(model, sess=sess)

    eval_par = {'batch_size': FLAGS.batch_size}

    if FLAGS.sweep:
      max_eps = 16
      epsilons = np.linspace(1, max_eps, max_eps)
      for e in epsilons:
        t1 = time.time()
        attack_params.update({'eps': e})
        x_adv = attacker.generate(x, **attack_params)
        preds_adv = model.get_probs(x_adv)
        acc = model_eval(sess, x, y, preds_adv, X_test[
            :nb_samples], Y_test[:nb_samples], args=eval_par)
        print('Epsilon %.2f, accuracy on adversarial' % e,
              'examples %0.4f\n' % acc)
      t2 = time.time()
    else:
      t1 = time.time()
      x_adv = attacker.generate(x, **attack_params)
      preds_adv = model.get_probs(x_adv)
      acc = model_eval(sess, x, y, preds_adv, X_test[
          :nb_samples], Y_test[:nb_samples], args=eval_par)
      t2 = time.time()
      print('Test accuracy on adversarial examples %0.4f\n' % acc)
    print("Took", t2 - t1, "seconds")
Beispiel #9
0
def mnist_tutorial(nb_epochs=NB_EPOCHS,
                   batch_size=BATCH_SIZE,
                   train_end=-1,
                   test_end=-1,
                   learning_rate=LEARNING_RATE):
    """
  MNIST cleverhans tutorial
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :return: an AccuracyReport object
  """
    # Train a pytorch MNIST model
    torch_model = PytorchMnistModel()
    if torch.cuda.is_available():
        torch_model = torch_model.cuda()
    report = AccuracyReport()

    train_loader = torch.utils.data.DataLoader(datasets.MNIST(
        'data', train=True, download=True, transform=transforms.ToTensor()),
                                               batch_size=batch_size,
                                               shuffle=True)
    test_loader = torch.utils.data.DataLoader(datasets.MNIST(
        'data', train=False, transform=transforms.ToTensor()),
                                              batch_size=batch_size)

    # Truncate the datasets so that our test run more quickly
    train_loader.dataset.train_data = train_loader.dataset.train_data[:
                                                                      train_end]
    test_loader.dataset.test_data = test_loader.dataset.test_data[:test_end]

    # Train our model
    optimizer = optim.Adam(torch_model.parameters(), lr=learning_rate)
    train_loss = []

    total = 0
    correct = 0
    step = 0
    for epoch in range(nb_epochs):
        for xs, ys in train_loader:
            xs, ys = Variable(xs), Variable(ys)
            if torch.cuda.is_available():
                xs, ys = xs.cuda(), ys.cuda()
            optimizer.zero_grad()
            preds = torch_model(xs)
            loss = F.nll_loss(preds, ys)
            loss.backward()  # calc gradients
            train_loss.append(loss.data.item())
            optimizer.step()  # update gradients

            preds_np = preds.data.cpu().numpy()
            correct += (np.argmax(preds_np, axis=1) == ys).sum()
            total += len(xs)
            step += 1
            if total % 1000 == 0:
                acc = float(correct) / total
                print('[%s] Training accuracy: %.2f%%' % (step, acc * 100))
                total = 0
                correct = 0

    # Evaluate on clean data
    total = 0
    correct = 0
    for xs, ys in test_loader:
        xs, ys = Variable(xs), Variable(ys)
        if torch.cuda.is_available():
            xs, ys = xs.cuda(), ys.cuda()

        preds = torch_model(xs)
        preds_np = preds.data.cpu().numpy()

        correct += (np.argmax(preds_np, axis=1) == ys).sum()
        total += len(xs)

    acc = float(correct) / total
    report.clean_train_clean_eval = acc
    print('[%s] Clean accuracy: %.2f%%' % (step, acc * 100))

    # We use tf for evaluation on adversarial data
    sess = tf.Session()
    x_op = tf.placeholder(tf.float32, shape=(
        None,
        1,
        28,
        28,
    ))

    # Convert pytorch model to a tf_model and wrap it in cleverhans
    tf_model_fn = convert_pytorch_model_to_tf(torch_model)
    cleverhans_model = CallableModelWrapper(tf_model_fn, output_layer='logits')

    # Create an FGSM attack
    fgsm_op = FastGradientMethod(cleverhans_model, sess=sess)
    fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
    adv_x_op = fgsm_op.generate(x_op, **fgsm_params)
    adv_preds_op = tf_model_fn(adv_x_op)

    # Run an evaluation of our model against fgsm
    total = 0
    correct = 0
    for xs, ys in test_loader:
        adv_preds = sess.run(adv_preds_op, feed_dict={x_op: xs})
        correct += (np.argmax(adv_preds, axis=1) == ys).sum()
        total += len(xs)

    acc = float(correct) / total
    print('Adv accuracy: {:.3f}'.format(acc * 100))
    report.clean_train_adv_eval = acc
    return report
Beispiel #10
0
    # Load pairs of faces and their labels in one-hot encoding
    faces1, faces2, labels = set_loader.load_testset(1000)

    # Create victims' embeddings using Facenet itself
    graph = tf.get_default_graph()
    phase_train_placeholder = graph.get_tensor_by_name("phase_train:0")
    feed_dict = {model.face_input: faces2,
                 phase_train_placeholder: False}
    victims_embeddings = sess.run(
        model.embedding_output, feed_dict=feed_dict)

    # Define FGSM for the model
    steps = 1
    eps = 0.01
    alpha = eps / steps
    fgsm = FastGradientMethod(model)
    fgsm_params = {'eps': alpha,
                   'clip_min': 0.,
                   'clip_max': 1.}
    adv_x = fgsm.generate(model.face_input, **fgsm_params)

    # Run FGSM
    adv = faces1
    for i in range(steps):
      print("FGSM step " + str(i + 1))
      feed_dict = {model.face_input: adv,
                   model.victim_embedding_input: victims_embeddings,
                   phase_train_placeholder: False}
      adv = sess.run(adv_x, feed_dict=feed_dict)

    # Test accuracy of the model
def tsc_tutorial(attack_method='fgsm',batch_size=BATCH_SIZE,
                 dataset_name='Adiac',eps=0.1,attack_on='train'):

    keras.layers.core.K.set_learning_phase(0)

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    if not hasattr(backend, "tf"):
        raise RuntimeError("This tutorial requires keras to be configured"
                           " to use the TensorFlow backend.")

    if keras.backend.image_dim_ordering() != 'tf':
        keras.backend.set_image_dim_ordering('tf')
        print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to "
              "'th', temporarily setting to 'tf'")

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)

    root_dir = '/b/home/uha/hfawaz-datas/dl-tsc/'

    # dataset_name = 'Adiac'
    archive_name = 'TSC'
    classifier_name = 'resnet'
    out_dir = 'ucr-attack/'
    file_path = root_dir + 'results/' + classifier_name + '/' + archive_name +\
                '/' + dataset_name + '/best_model.hdf5'

    adv_data_dir = out_dir+attack_method+'/'+archive_name+'/'+attack_on+\
                   '/eps-'+str(eps)+'/'

    if os.path.exists(adv_data_dir+dataset_name+'-adv'):
        print('Already_done:',dataset_name)
        return
    else:
        print('Doing:',dataset_name)

    dataset_dict = read_dataset(root_dir, archive_name, dataset_name)

    x_train, y_train, x_test, y_test, _, nb_classes = prepare_data(dataset_dict,dataset_name)

    if attack_on == 'train':
        X = x_train
        Y = y_train
        original_y = dataset_dict[dataset_name][1]
    elif attack_on =='test':
        X = x_test
        Y = y_test
        original_y = dataset_dict[dataset_name][3]
    else:
        print('Error either train or test options for attack_on param')
        exit()

    # for big datasets we should decompose in batches the evaluation of the attack
    # loop through the batches
    ori_acc = 0
    adv_acc = 0

    res_dir = out_dir + 'results'+attack_method+'.csv'
    if os.path.exists(res_dir):
        res_ori = pd.read_csv(res_dir, index_col=False)
    else:
        res_ori = pd.DataFrame(data=np.zeros((0, 3), dtype=np.float), index=[],
                               columns=['dataset_name', 'ori_acc', 'adv_acc'])

    test_set = np.zeros((Y.shape[0], x_train.shape[1] + 1), dtype=np.float64)

    for i in range(0,len(X),batch_size):
        curr_X = X[i:i+batch_size]
        curr_Y = Y[i:i+batch_size]

        # Obtain series Parameters
        img_rows, nchannels = x_train.shape[1:3]

        # Define input TF placeholder
        x = tf.placeholder(tf.float32, shape=(None, img_rows, nchannels))
        y = tf.placeholder(tf.float32, shape=(None, nb_classes))

        # Define TF model graph
        model = keras.models.load_model(file_path)
        preds = model(x)
        print("Defined TensorFlow model graph.")

        def evaluate():
            # Evaluate the accuracy of the model on legitimate test examples
            eval_params = {'batch_size': batch_size}
            acc = model_eval(sess, x, y, preds, curr_X, curr_Y, args=eval_params)
            report.clean_train_clean_eval = acc
            print('Test accuracy on legitimate examples: %0.4f' % acc)
            return acc

        wrap = KerasModelWrapper(model)

        ori_acc += evaluate() * len(curr_X)/len(X)

        if attack_method == 'fgsm':
            # Initialize the Fast Gradient Sign Method (FGSM) attack object and graph
            fgsm = FastGradientMethod(wrap, sess=sess)
            fgsm_params = {'eps': eps }
            adv_x = fgsm.generate(x, **fgsm_params)
        elif attack_method == 'bim':
            # BasicIterativeMethod
            bim = BasicIterativeMethod(wrap,sess=sess)
            bim_params = {'eps':eps, 'eps_iter':0.05, 'nb_iter':10}
            adv_x = bim.generate(x,**bim_params)
        else:
            print('Either bim or fgsm are acceptable as attack methods')
            return

        # Consider the attack to be constant
        adv_x = tf.stop_gradient(adv_x)

        adv = adv_x.eval({x: curr_X}, session=sess)
        adv = adv.reshape(adv.shape[0],adv.shape[1])

        preds_adv = model(adv_x)

        # Evaluate the accuracy of the model on adversarial examples
        eval_par = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds_adv, curr_X, curr_Y, args=eval_par)
        print('Test accuracy on adversarial examples: %0.4f\n' % acc)
        report.clean_train_adv_eval = acc
        adv_acc += acc * len(curr_X)/len(X)

        test_set[i:i+batch_size,0] = original_y[i:i+batch_size]
        test_set[i:i+batch_size,1:] = adv


    create_directory(adv_data_dir)

    np.savetxt(adv_data_dir+dataset_name+'-adv',test_set, delimiter=',')

    add_labels_to_adv_test_set(dataset_dict, dataset_name, adv_data_dir,original_y)

    res = pd.DataFrame(data = np.zeros((1,3),dtype=np.float), index=[0],
            columns=['dataset_name','ori_acc','adv_acc'])
    res['dataset_name'] = dataset_name+str(eps)
    res['ori_acc'] = ori_acc
    res['adv_acc'] = adv_acc
    res_ori = pd.concat((res_ori,res),sort=False)
    res_ori.to_csv(res_dir,index=False)

    return report
Beispiel #12
0
    def setUp(self):
        super(TestFastGradientMethod, self).setUp()

        self.sess = tf.Session()
        self.model = SimpleModel()
        self.attack = FastGradientMethod(self.model, sess=self.sess)
Beispiel #13
0
class TestFastGradientMethod(CleverHansTest):
    def setUp(self):
        super(TestFastGradientMethod, self).setUp()

        self.sess = tf.Session()
        self.model = SimpleModel()
        self.attack = FastGradientMethod(self.model, sess=self.sess)

    def generate_adversarial_examples_np(self, ord, eps, **kwargs):
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val,
                                        eps=eps,
                                        ord=ord,
                                        clip_min=-5,
                                        clip_max=5,
                                        **kwargs)
        if ord == np.inf:
            delta = np.max(np.abs(x_adv - x_val), axis=1)
        elif ord == 1:
            delta = np.sum(np.abs(x_adv - x_val), axis=1)
        elif ord == 2:
            delta = np.sum(np.square(x_adv - x_val), axis=1)**.5

        return x_val, x_adv, delta

    def help_generate_np_gives_adversarial_example(self,
                                                   ord,
                                                   eps=.5,
                                                   **kwargs):
        x_val, x_adv, delta = self.generate_adversarial_examples_np(
            ord, eps, **kwargs)
        self.assertClose(delta, eps)
        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)
        self.assertTrue(np.mean(orig_labs == new_labs) < 0.5)

    def test_generate_np_gives_adversarial_example_linfinity(self):
        self.help_generate_np_gives_adversarial_example(np.infty)

    def test_generate_np_gives_adversarial_example_l1(self):
        self.help_generate_np_gives_adversarial_example(1)

    def test_generate_np_gives_adversarial_example_l2(self):
        self.help_generate_np_gives_adversarial_example(2)

    def test_generate_respects_dtype(self):
        self.attack = FastGradientMethod(self.model,
                                         sess=self.sess,
                                         dtypestr='float64')
        x = tf.placeholder(dtype=tf.float64, shape=(100, 2))
        x_adv = self.attack.generate(x)
        self.assertEqual(x_adv.dtype, tf.float64)

    def test_targeted_generate_np_gives_adversarial_example(self):
        random_labs = np.random.random_integers(0, 1, 100)
        random_labs_one_hot = np.zeros((100, 2))
        random_labs_one_hot[np.arange(100), random_labs] = 1

        _, x_adv, delta = self.generate_adversarial_examples_np(
            eps=.5, ord=np.inf, y_target=random_labs_one_hot)

        self.assertClose(delta, 0.5)

        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)
        self.assertTrue(np.mean(random_labs == new_labs) > 0.7)

    def test_generate_np_can_be_called_with_different_eps(self):
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        for eps in [0.1, 0.2, 0.3, 0.4]:
            x_adv = self.attack.generate_np(x_val,
                                            eps=eps,
                                            ord=np.inf,
                                            clip_min=-5.0,
                                            clip_max=5.0)

            delta = np.max(np.abs(x_adv - x_val), axis=1)
            self.assertClose(delta, eps)

    def test_generate_np_clip_works_as_expected(self):
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val,
                                        eps=0.5,
                                        ord=np.inf,
                                        clip_min=-0.2,
                                        clip_max=0.1)

        self.assertClose(np.min(x_adv), -0.2)
        self.assertClose(np.max(x_adv), 0.1)

    def test_generate_np_caches_graph_computation_for_eps_clip_or_xi(self):

        x_val = np.random.rand(1, 2)
        x_val = np.array(x_val, dtype=np.float32)

        self.attack.generate_np(x_val,
                                eps=.3,
                                num_iterations=10,
                                clip_max=-5.0,
                                clip_min=-5.0,
                                xi=1e-6)

        old_grads = tf.gradients

        def fn(*x, **y):
            raise RuntimeError()

        tf.gradients = fn

        self.attack.generate_np(x_val,
                                eps=.2,
                                num_iterations=10,
                                clip_max=-4.0,
                                clip_min=-4.0,
                                xi=1e-5)

        tf.gradients = old_grads
Beispiel #14
0
def main(argv):
    checkpoint = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)

    if checkpoint is None:
        raise ValueError("Couldn't find latest checkpoint in " +
                         FLAGS.checkpoint_dir)

    train_start = 0
    train_end = 60000
    test_start = 0
    test_end = 10000
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    assert Y_train.shape[1] == 10

    # NOTE: for compatibility with Madry Lab downloadable checkpoints,
    # we cannot enclose this in a scope or do anything else that would
    # change the automatic naming of the variables.
    model = MadryMNIST()

    x_input = tf.placeholder(tf.float32, shape=[None, 784])
    x_image = tf.placeholder(tf.float32, shape=[None, 28, 28, 1])
    y = tf.placeholder(tf.float32, shape=[None, 10])

    if FLAGS.attack_type == 'fgsm':
        fgsm = FastGradientMethod(model)
        fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
        adv_x = fgsm.generate(x_image, **fgsm_params)
    elif FLAGS.attack_type == 'bim':
        bim = BasicIterativeMethod(model)
        bim_params = {
            'eps': 0.3,
            'clip_min': 0.,
            'clip_max': 1.,
            'nb_iter': 50,
            'eps_iter': .01
        }
        adv_x = bim.generate(x_image, **bim_params)
    else:
        raise ValueError(FLAGS.attack_type)
    preds_adv = model.get_probs(adv_x)

    saver = tf.train.Saver()

    with tf.Session() as sess:
        # Restore the checkpoint
        saver.restore(sess, checkpoint)

        # Evaluate the accuracy of the MNIST model on adversarial examples
        eval_par = {'batch_size': FLAGS.batch_size}
        t1 = time.time()
        acc = model_eval(sess,
                         x_image,
                         y,
                         preds_adv,
                         X_test,
                         Y_test,
                         args=eval_par)
        t2 = time.time()
        print("Took", t2 - t1, "seconds")
        print('Test accuracy on adversarial examples: %0.4f\n' % acc)
Beispiel #15
0
def attack(model, session, a):
    fgsm = FastGradientMethod(model, sess=session)
    image = a.original_image[np.newaxis]
    return fgsm.generate_np(image)