def get_ensemble_diversity_values(sess, x, y, predictions, number_model, X_test=None, Y_test=None,
               feed=None, args=None):
  """
  Compute the accuracy of a TF model on some data
  :param sess: TF session to use
  :param x: input placeholder
  :param y: output placeholder (for labels)
  :param predictions: model output predictions
  :param X_test: numpy array with training inputs
  :param Y_test: numpy array with training outputs
  :param feed: An optional dictionary that is appended to the feeding
           dictionary before the session runs. Can be used to feed
           the learning phase of a Keras model for instance.
  :param args: dict or argparse `Namespace` object.
               Should contain `batch_size`
  :return: a float with the accuracy value
  """
  args = _ArgsWrapper(args or {})

  assert args.batch_size, "Batch size was not given in args dict"
  if X_test is None or Y_test is None:
    raise ValueError("X_test argument and Y_test argument"
                     "must be supplied.")

  ensemble_diversity_records = np.array([])
  get_batch_ensemble_diversity = ensemble_diversity(y, predictions, number_model)
  with sess.as_default():
    # Compute number of batches
    nb_batches = int(math.ceil(float(len(X_test)) / args.batch_size))
    assert nb_batches * args.batch_size >= len(X_test)

    X_cur = np.zeros((args.batch_size,) + X_test.shape[1:],
                     dtype=X_test.dtype)
    Y_cur = np.zeros((args.batch_size,) + Y_test.shape[1:],
                     dtype=Y_test.dtype)
    for batch in range(nb_batches):
      if batch % 100 == 0 and batch > 0:
        _logger.debug("Batch " + str(batch))

      # Must not use the `batch_indices` function here, because it
      # repeats some examples.
      # It's acceptable to repeat during training, but not eval.
      start = batch * args.batch_size
      end = min(len(X_test), start + args.batch_size)

      # The last batch may be smaller than all others. This should not
      # affect the accuarcy disproportionately.
      cur_batch_size = end - start
      X_cur[:cur_batch_size] = X_test[start:end]
      Y_cur[:cur_batch_size] = Y_test[start:end]
      feed_dict = {x: X_cur, y: Y_cur}
      if feed is not None:
        feed_dict.update(feed)
      ensemble_diversity_records_batch = get_batch_ensemble_diversity.eval(feed_dict=feed_dict)

      ensemble_diversity_records = np.concatenate((ensemble_diversity_records, ensemble_diversity_records_batch), axis=0)

    assert end >= len(X_test)

  return ensemble_diversity_records #len(X_test) X 1
Beispiel #2
0
def batch_eval(sess, tf_inputs, tf_outputs, numpy_inputs, feed=None,
               args=None):
    """
    A helper function that computes a tensor on numpy inputs by batches.

    :param sess:
    :param tf_inputs:
    :param tf_outputs:
    :param numpy_inputs:
    :param feed: An optional dictionary that is appended to the feeding
             dictionary before the session runs. Can be used to feed
             the learning phase of a Keras model for instance.
    :param args: dict or argparse `Namespace` object.
                 Should contain `batch_size`
    """
    args = _ArgsWrapper(args or {})

    assert args.batch_size, "Batch size was not given in args dict"

    n = len(numpy_inputs)
    assert n > 0
    assert n == len(tf_inputs)
    m = numpy_inputs[0].shape[0]
    for i in range(1, n):
        assert numpy_inputs[i].shape[0] == m
    out = []
    for _ in tf_outputs:
        out.append([])
    with sess.as_default():
        for start in range(0, m, args.batch_size):
            batch = start // args.batch_size
            if batch % 100 == 0 and batch > 0:
                _logger.debug("Batch " + str(batch))

            # Compute batch start and end indices
            start = batch * args.batch_size
            end = start + args.batch_size
            numpy_input_batches = [numpy_input[start:end]
                                   for numpy_input in numpy_inputs]
            cur_batch_size = numpy_input_batches[0].shape[0]
            assert cur_batch_size <= args.batch_size
            for e in numpy_input_batches:
                assert e.shape[0] == cur_batch_size

            feed_dict = dict(zip(tf_inputs, numpy_input_batches))
            if feed is not None:
                feed_dict.update(feed)
            numpy_output_batches = sess.run(tf_outputs, feed_dict=feed_dict)
            for e in numpy_output_batches:
                assert e.shape[0] == cur_batch_size, e.shape
            for out_elem, numpy_output_batch in zip(out, numpy_output_batches):
                out_elem.append(numpy_output_batch)

    out = [np.concatenate(x, axis=0) for x in out]
    for e in out:
        assert e.shape[0] == m, e.shape
    return out
Beispiel #3
0
def color_shift_attack(sess, x, y, X_test, Y_test, prediction, args=None, num_trials=1000):
  import matplotlib
  N = X_test.shape[0]  # number of samples

  args = _ArgsWrapper(args or {})

  # extract out images that the model misclassifies
  pred_label = tf.argmax(prediction, axis=-1)

  succ_rate = 0

  with sess.as_default():
    nb_batches = int(math.ceil(float(len(X_test)) / args.batch_size))
    assert nb_batches * args.batch_size >= len(X_test)

    X_cur = np.zeros((min(args.batch_size, len(X_test)),) + X_test.shape[1:],
                     dtype=X_test.dtype)
    Y_cur = np.zeros((min(args.batch_size, len(X_test)), ) + Y_test.shape[1:],
                     dtype=Y_test.dtype)
    # X_cur = np.zeros((args.batch_size,) + X_test.shape[1:],
    #                  dtype=X_test.dtype)
    # Y_cur = np.zeros((args.batch_size,) + Y_test.shape[1:],
    #                  dtype=Y_test.dtype)

    X_adv = []  # accumulator of adversarial examples
    Y_adv = []  # the example is save without proper order, therefore, we need to save the label accordingly
    for batch in range(nb_batches):
      if batch % 100 == 0 and batch > 0:
        _logger.debug("Batch " + str(batch))

      # Must not use the `batch_indices` function here, because it
      # repeats some examples.
      # It's acceptable to repeat during training, but not eval.
      start = batch * args.batch_size
      end = min(len(X_test), start + args.batch_size)

      # The last batch may be smaller than all others. This should not
      # affect the accuarcy disproportionately.
      cur_batch_size = end - start
      X_cur[:cur_batch_size] = X_test[start:end]
      X_cur = X_cur[:cur_batch_size]
      Y_cur[:cur_batch_size] = Y_test[start:end]
      Y_cur = Y_cur[:cur_batch_size]
      xtest = np.copy(X_cur)


      feed_dict = {x: convert_uniimage(X_cur), y: Y_test}
      cur_pred_label = pred_label.eval(feed_dict=feed_dict)

      wrong_labels = cur_pred_label != np.argmax(Y_cur, axis=-1)
      X = X_cur[wrong_labels == 0]
      Y = Y_cur[wrong_labels == 0]


      X_adv.append(X_cur[wrong_labels]) # Store all images if it has misclassified before
      Y_adv.append(Y_cur[wrong_labels])  # store wrongly-classified images

      # adv_succ_num[i]: number of adversarial samples generated after i trials
      # adv_succ_num[0] is number of clean images misclassified by model
      adv_succ_num = np.zeros((num_trials + 1, 1))
      adv_succ_num[0] = np.sum(wrong_labels)

      print('Trial 0' + ', Attack success rate: ' + str(succ_rate + np.sum(adv_succ_num) / N))

      ####################################################################

      # Convert RGB to HSV
      X_hsv = matplotlib.colors.rgb_to_hsv(X)

      tmp_succ_rate = 0
      for i in range(num_trials):
        if len(X_hsv) <= 0:
          break
        # Randomly shift Hue and Saturation components

        X_adv_hsv = np.copy(X_hsv)

        d_h = np.random.uniform(0, 1, size=(X_adv_hsv.shape[0], 1))
        d_s = np.random.uniform(-1, 1, size=(X_adv_hsv.shape[0], 1)) * float(i) / num_trials

        for j in range(X_adv_hsv.shape[0]):
          X_adv_hsv[j, :, :, 0] = (X_hsv[j, :, :, 0] + d_h[j]) % 1.0
          X_adv_hsv[j, :, :, 1] = np.clip(X_hsv[j, :, :, 1] + d_s[j], 0., 1.)

        X = matplotlib.colors.hsv_to_rgb(X_adv_hsv)
        X = np.clip(X, 0., 1.)

        # extract out wrongly-classified images
        feed_dict = {x: convert_uniimage(X), y: Y}
        cur_pred_label = pred_label.eval(feed_dict=feed_dict)
        wrong_labels = cur_pred_label != np.argmax(Y, axis=-1)

        X_adv.append(X[wrong_labels])  # store wrongly-classified images
        Y_adv.append(Y[wrong_labels])  # store wrongly-classified images

        X_hsv = X_hsv[wrong_labels == 0]
        Y = Y[wrong_labels == 0]

        adv_succ_num[i + 1] = np.sum(wrong_labels)

        tmp_succ_rate = np.sum(adv_succ_num) / N
        if i % 100 == 0:
          print(batch, " X_hsv left: ", len(X_hsv))
          print('Trial ' + str(i + 1) +
                ', Attack success rate: ' + str(succ_rate+tmp_succ_rate))

        if i == (num_trials - 1) and len(X_hsv) > 0:
          X_adv.append(X_hsv) # Store all images if there is remaining images cannot be modified
          Y_adv.append(Y) # Store all images if there is remaining images cannot be modified

      succ_rate = succ_rate + tmp_succ_rate
      print('Batch ' + str(batch+1) +
            ', Attack success rate: ' + str(succ_rate))

  X_adv = np.concatenate(X_adv, axis=0)
  Y_adv = np.concatenate(Y_adv, axis=0)
  print("Total X_adv:", len(X_adv))
  return np.array(X_adv), np.array(Y_adv)
Beispiel #4
0
def train_with_PGN(sess, model, loss, train_type='naive', evaluate=None, args=None,
          rng=None, classifier_var_list=None, generator_var_list=None, save_dir=None,
          fprop_args=None, optimizer=None, use_ema=False, ema_decay=.998,
          loss_threshold=1e10, dataset_train=None, dataset_size=None):
  """
  Run (optionally multi-replica, synchronous) training to minimize `loss`
  :param sess: TF session to use when training the graph
  :param loss: tensor, the loss to minimize
  :param evaluate: function that is run after each training iteration
                   (typically to display the test/validation accuracy).
  :param args: dict or argparse `Namespace` object.
               Should contain `nb_epochs`, `learning_rate`,
               `batch_size`
  :param rng: Instance of numpy.random.RandomState
  :param var_list: Optional list of parameters to train.
  :param fprop_args: dict, extra arguments to pass to fprop (loss and model).
  :param optimizer: Optimizer to be used for training
  :param use_ema: bool
      If true, uses an exponential moving average of the model parameters
  :param ema_decay: float or callable
      The decay parameter for EMA, if EMA is used
      If a callable rather than a float, this is a callable that takes
      the epoch and batch as arguments and returns the ema_decay for
      the current batch.
  :param loss_threshold: float
      Raise an exception if the loss exceeds this value.
      This is intended to rapidly detect numerical problems.
      Sometimes the loss may legitimately be higher than this value. In
      such cases, raise the value. If needed it can be np.inf.
  :param dataset_train: tf Dataset instance.
      Used as a replacement for x_train, y_train for faster performance.
    :param dataset_size: integer, the size of the dataset_train.
  :return: True if model trained
  """

  # Check whether the hardware is working correctly
  canary.run_canary()
  args = _ArgsWrapper(args or {})
  fprop_args = fprop_args or {}

  # Check that necessary arguments were given (see doc above)
  # Be sure to support 0 epochs for debugging purposes
  if args.nb_epochs is None:
    raise ValueError("`args` must specify number of epochs")
  if optimizer is None:
    if args.learning_rate is None:
      raise ValueError("Learning rate was not given in args dict")
  assert args.batch_size, "Batch size was not given in args dict"
  assert dataset_train and dataset_size, "dataset_train or dataset_size was not given"

  if rng is None:
    rng = np.random.RandomState()

  if optimizer is None:
    optimizer = tf.train.AdamOptimizer(learning_rate = args.learning_rate)
  else:
    if not isinstance(optimizer, tf.train.Optimizer):
      raise ValueError("optimizer object must be from a child class of "
                       "tf.train.Optimizer")

  grads_classifier = []
  if train_type == 'PGN':
    grads_generator = []
  xs = []
  ys = []
  data_iterator = dataset_train.make_one_shot_iterator().get_next()
  x_train, y_train = sess.run(data_iterator)

  devices = infer_devices()
  for device in devices:
    with tf.device(device):
      x = tf.placeholder(x_train.dtype, (None,) + x_train.shape[1:])
      y = tf.placeholder(y_train.dtype, (None,) + y_train.shape[1:])
      xs.append(x)
      ys.append(y)
      if train_type == 'PGN':
        loss_classifier, loss_generator = loss.fprop(x, y, **fprop_args)
      else:
        loss_classifier = loss.fprop(x, y, **fprop_args)
      grads_classifier.append(optimizer.compute_gradients(loss_classifier, var_list=classifier_var_list))
      if train_type == 'PGN':
        grads_generator.append(optimizer.compute_gradients(loss_generator, var_list=generator_var_list))

  num_devices = len(devices)
  print("num_devices: ", num_devices)

  grad_classifier = avg_grads(grads_classifier)
  if train_type == 'PGN':
    grad_generator = avg_grads(grads_generator)
  # Trigger update operations within the default graph (such as batch_norm).
  with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
    train_step = optimizer.apply_gradients(grad_classifier)
    if train_type == 'PGN':
      with tf.control_dependencies([train_step]):
        train_step = optimizer.apply_gradients(grad_generator)

  var_list = classifier_var_list
  if train_type == 'PGN':
    var_list += generator_var_list
  if use_ema:
    ema = tf.train.ExponentialMovingAverage(decay=ema_decay)
    with tf.control_dependencies([train_step]):
      train_step = ema.apply(var_list)
    # Get pointers to the EMA's running average variables
    avg_params = [ema.average(param) for param in var_list]
    # Make temporary buffers used for swapping the live and running average
    # parameters
    tmp_params = [tf.Variable(param, trainable=False)
                  for param in var_list]
    # Define the swapping operation
    param_to_tmp = [tf.assign(tmp, param)
                    for tmp, param in safe_zip(tmp_params, var_list)]
    with tf.control_dependencies(param_to_tmp):
      avg_to_param = [tf.assign(param, avg)
                      for param, avg in safe_zip(var_list, avg_params)]
    with tf.control_dependencies(avg_to_param):
      tmp_to_avg = [tf.assign(avg, tmp)
                    for avg, tmp in safe_zip(avg_params, tmp_params)]
    swap = tmp_to_avg

  batch_size = args.batch_size

  assert batch_size % num_devices == 0
  device_batch_size = batch_size // num_devices

  sess.run(tf.global_variables_initializer())
  best_acc = 0.0

  for epoch in xrange(args.nb_epochs):
    nb_batches = int(math.ceil(float(dataset_size) / batch_size))
    prev = time.time()
    for batch in range(nb_batches):
      x_train_shuffled, y_train_shuffled = sess.run(data_iterator)
      start, end = 0, batch_size
      feed_dict = dict()
      for dev_idx in xrange(num_devices):
        cur_start = start + dev_idx * device_batch_size
        cur_end = start + (dev_idx + 1) * device_batch_size
        feed_dict[xs[dev_idx]] = x_train_shuffled[cur_start:cur_end]
        feed_dict[ys[dev_idx]] = y_train_shuffled[cur_start:cur_end]

      
      _, loss_classifier_numpy = sess.run([train_step, loss_classifier], feed_dict=feed_dict)

      if np.abs(loss_classifier_numpy) > loss_threshold:
        raise ValueError("Extreme loss_classifier during training: ", loss_classifier_numpy)
      if np.isnan(loss_classifier_numpy) or np.isinf(loss_classifier_numpy):
        raise ValueError("NaN/Inf loss_classifier during training")
    cur = time.time()
    _logger.info("Epoch " + str(epoch) + " took " +
                 str(cur - prev) + " seconds")
    if evaluate is not None:
      if use_ema:
        sess.run(swap)
      r_value = evaluate(epoch)

      if use_ema:
        sess.run(swap)
  if use_ema:
    sess.run(swap)

  with sess.as_default():
    save_path = os.path.join(save_dir,'model.joblib')
    save(save_path, model)

  return True
Beispiel #5
0
def model_eval_full(sess,
                    x,
                    y,
                    predictions,
                    x_t,
                    X_test=None,
                    Y_test=None,
                    X_target_test=None,
                    feed=None,
                    args=None):
    """
  Compute the accuracy of a TF model on some data
  :param sess: TF session to use
  :param x: input placeholder
  :param y: output placeholder (for labels)
  :param predictions: model output predictions
  :param X_test: numpy array with training inputs
  :param Y_test: numpy array with training outputs
  :param feed: An optional dictionary that is appended to the feeding
           dictionary before the session runs. Can be used to feed
           the learning phase of a Keras model for instance.
  :param args: dict or argparse `Namespace` object.
               Should contain `batch_size`
  :return: a float with the accuracy value
  """
    global _model_eval_cache
    args = _ArgsWrapper(args or {})

    assert args.batch_size, "Batch size was not given in args dict"
    if X_test is None or Y_test is None:
        raise ValueError("X_test argument and Y_test argument "
                         "must be supplied.")

    # Define accuracy symbolically
    key = (y, predictions)
    if key in _model_eval_cache:
        correct_preds = _model_eval_cache[key]
    else:
        correct_preds = tf.equal(tf.argmax(y, axis=-1),
                                 tf.argmax(predictions, axis=-1))
        _model_eval_cache[key] = correct_preds

    # Init result var
    accuracy = 0.0

    with sess.as_default():
        # Compute number of batches

        feed_dict = {x: X_test, y: Y_test, x_t: X_target_test}

        if feed is not None:
            feed_dict.update(feed)

        cur_corr_preds = correct_preds.eval(feed_dict=feed_dict)

        accuracy += cur_corr_preds.sum()

    # Divide by number of examples to get final value
    accuracy /= len(X_test)

    return accuracy
Beispiel #6
0
def model_eval(sess, x, y, predictions, X_test=None, Y_test=None,
               feed=None, args=None, is_adv=None, ae=None, type=None, datasetName="MNIST", discretizeColor=1):
  """
  Compute the accuracy of a TF model on some data
  :param sess: TF session to use
  :param x: input placeholder
  :param y: output placeholder (for labels)
  :param predictions: model output predictions
  :param X_test: numpy array with training inputs
  :param Y_test: numpy array with training outputs
  :param feed: An optional dictionary that is appended to the feeding
           dictionary before the session runs. Can be used to feed
           the learning phase of a Keras model for instance.
  :param args: dict or argparse `Namespace` object.
               Should contain `batch_size`
  :return: a float with the accuracy value
  """
  global _model_eval_cache
  args = _ArgsWrapper(args or {})

  assert args.batch_size, "Batch size was not given in args dict"
  if X_test is None or Y_test is None:
    raise ValueError("X_test argument and Y_test argument "
                     "must be supplied.")

  # Define accuracy symbolically
  key = (y, predictions)
  if key in _model_eval_cache:
    correct_preds = _model_eval_cache[key]
  else:
    correct_preds = tf.equal(tf.argmax(y, axis=-1),
                             tf.argmax(predictions, axis=-1))
    _model_eval_cache[key] = correct_preds

  _, width, height, channel = list(X_test.shape)

  # Init result var
  accuracy = 0.0
  percent_perturbed = 0.0
  uniImagePreds = []

  # X_test = convert_uniimage(X_test)
  with sess.as_default():
    # Compute number of batches
    nb_batches = int(math.ceil(float(len(X_test)) / args.batch_size))
    assert nb_batches * args.batch_size >= len(X_test)

    X_cur = np.zeros((min(args.batch_size, len(X_test)),) + X_test.shape[1:],
                     dtype=X_test.dtype)
    Y_cur = np.zeros((min(args.batch_size, len(X_test)), ) + Y_test.shape[1:],
                     dtype=Y_test.dtype)

    # X_cur = np.zeros((args.batch_size,) + X_test.shape[1:],
    #                  dtype=X_test.dtype)
    # Y_cur = np.zeros((args.batch_size,) + Y_test.shape[1:],
    #                  dtype=Y_test.dtype)
    for batch in range(nb_batches):
      if batch % 100 == 0 and batch > 0:
        _logger.debug("Batch " + str(batch))

      # Must not use the `batch_indices` function here, because it
      # repeats some examples.
      # It's acceptable to repeat during training, but not eval.
      start = batch * args.batch_size
      end = min(len(X_test), start + args.batch_size)

      # The last batch may be smaller than all others. This should not
      # affect the accuarcy disproportionately.
      cur_batch_size = end - start
      X_cur[:cur_batch_size] = X_test[start:end]
      Y_cur[:cur_batch_size] = Y_test[start:end]
      xtest = np.copy(X_cur)

      ###############################
      # Generate adversarial images #
      ###############################
      adv_x = np.copy(X_cur)
      # print(is_adv, ae)
      if is_adv and ae != None:
        # print(X_cur.shape)
        feed_dict = {x: X_cur}
        adv_x = ae.eval(feed_dict=feed_dict)

      ############################
      # Test for YINGYANG attack #
      ############################
      # if is_adv == False:
      #   print("Testing YY attack")
      #   adv_x = 1 - adv_x



      ###############################
      # Transform image to uniimage #
      ###############################
      tmpX = adv_x
      if type == "noise":
        # Use it for MNIST and Fashion MNIST
        if datasetName == "MNIST":
          tmpX = np.clip(adv_x+(np.random.uniform(0, 0.8, (len(adv_x), width, height, channel)) - 0.4), 0, 1)
        # Use it for MNIST and Fashion CIFAR10
        if datasetName == "CIFAR10":
          tmpX = np.clip(adv_x+(np.random.uniform(0, 0.3, (len(adv_x), width, height, channel)) - 0.15), 0, 1)
      if type == "normal" or type == "noise":
        X_cur = convert_uniimage(tmpX, discretizeColor)
        # X_cur = tmpX
      else:
        X_cur = tmpX
      # X_cur2 = convert_uniimage(x_test, discretizeColor)
      # X_cur = adv_x
      # X_cur = convert_uniimage(adv_x)
      # X_cur = convert_uniimage(tmpX)
        # print("adv_x", adv_x.shape)
        # X_cur = adv_x
        # print(X_cur.shape)




      ##################
      # Showing images #
      ##################
      showImg = True
      showImg = False
      if showImg:
        shapeImg = (width, height, channel)
        if channel == 1:
          shapeImg = (width, height)
        for iii in range(len(X_cur)):
          fig = plt.figure()
          pixels = xtest[iii].reshape(shapeImg)
          sub = fig.add_subplot(1, 4, 1)
          plt.imshow(pixels, cmap='gray')
          pixels = tmpX[iii].reshape(shapeImg)
          sub = fig.add_subplot(1, 4, 2)
          plt.imshow(pixels, cmap='gray')
          pixels = X_cur[iii].reshape(shapeImg)
          sub = fig.add_subplot(1, 4, 3)
          plt.imshow(pixels, cmap='gray')
          pixels = adv_x[iii].reshape(shapeImg)
          sub = fig.add_subplot(1, 4, 4)
          plt.imshow(pixels, cmap='gray')
          # pixels = adv_x[iii].reshape((28, 28)) - xtrain[iii].reshape((28, 28))
          # print(np.mean(np.sum((adv_x[iii:iii+1] - xtrain[iii:iii+1]) ** 2,
          #        axis=(1, 2, 3)) ** .5))
          # sub = fig.add_subplot(1, 3, iii+3)
          # plt.imshow(pixels / abs(pixels).max() * 0.2 + 0.5, cmap='gray')

          plt.show()


      feed_dict = {x: X_cur, y: Y_cur}
      # print(feed_dict)
      if feed is not None:
        feed_dict.update(feed)
      cur_corr_preds = correct_preds.eval(feed_dict=feed_dict)

      accuracy += cur_corr_preds[:cur_batch_size].sum()

      # Compute the percentage of perturbation if they are adversarial images
      if is_adv and ae != None:
        percent_perturbed += np.mean(np.sum((adv_x - xtest) ** 2,
                       axis=(1, 2, 3)) ** .5) / nb_batches

    assert end >= len(X_test)

    # Divide by number of examples to get final value
    accuracy /= len(X_test)

  return accuracy, percent_perturbed
def model_train(sess,
                x,
                y,
                predictions,
                X_train,
                Y_train,
                save=False,
                predictions_adv=None,
                init_all=True,
                evaluate=None,
                feed=None,
                args=None,
                rng=None,
                var_list=None):
    """
  Train a TF graph
  :param sess: TF session to use when training the graph
  :param x: input placeholder
  :param y: output placeholder (for labels)
  :param predictions: model output predictions
  :param X_train: numpy array with training inputs
  :param Y_train: numpy array with training outputs
  :param save: boolean controlling the save operation
  :param predictions_adv: if set with the adversarial example tensor,
                          will run adversarial training
  :param init_all: (boolean) If set to true, all TF variables in the session
                   are (re)initialized, otherwise only previously
                   uninitialized variables are initialized before training.
  :param evaluate: function that is run after each training iteration
                   (typically to display the test/validation accuracy).
  :param feed: An optional dictionary that is appended to the feeding
               dictionary before the session runs. Can be used to feed
               the learning phase of a Keras model for instance.
  :param args: dict or argparse `Namespace` object.
               Should contain `nb_epochs`, `learning_rate`,
               `batch_size`
               If save is True, should also contain 'train_dir'
               and 'filename'
  :param rng: Instance of numpy.random.RandomState
  :param var_list: Optional list of parameters to train.
  :return: True if model trained
  """
    warnings.warn("This function is deprecated and will be removed on or after"
                  " 2019-04-05. Switch to cleverhans.train.train.")
    args = _ArgsWrapper(args or {})

    # Check that necessary arguments were given (see doc above)
    assert args.nb_epochs, "Number of epochs was not given in args dict"
    assert args.learning_rate, "Learning rate was not given in args dict"
    assert args.batch_size, "Batch size was not given in args dict"

    if save:
        assert args.train_dir, "Directory for save was not given in args dict"
        assert args.filename, "Filename for save was not given in args dict"

    if rng is None:
        rng = np.random.RandomState()

    # Define loss
    loss = model_loss(y, predictions)
    if predictions_adv is not None:
        loss = (loss + model_loss(y, predictions_adv)) / 2

    train_step = tf.train.AdamOptimizer(learning_rate=args.learning_rate)
    train_step = train_step.minimize(loss, var_list=var_list)

    with sess.as_default():
        if hasattr(tf, "global_variables_initializer"):
            if init_all:
                tf.global_variables_initializer().run()
            else:
                initialize_uninitialized_global_variables(sess)
        else:
            warnings.warn("Update your copy of tensorflow; future versions of "
                          "CleverHans may drop support for this version.")
            sess.run(tf.initialize_all_variables())

        for epoch in xrange(args.nb_epochs):
            # Compute number of batches
            nb_batches = int(math.ceil(float(len(X_train)) / args.batch_size))
            assert nb_batches * args.batch_size >= len(X_train)

            # Indices to shuffle training set
            index_shuf = list(range(len(X_train)))
            rng.shuffle(index_shuf)

            prev = time.time()
            for batch in range(nb_batches):

                # Compute batch start and end indices
                start, end = batch_indices(batch, len(X_train),
                                           args.batch_size)

                # Perform one training step
                feed_dict = {
                    x: X_train[index_shuf[start:end]],
                    y: Y_train[index_shuf[start:end]]
                }
                if feed is not None:
                    feed_dict.update(feed)
                train_step.run(feed_dict=feed_dict)
            assert end >= len(X_train)  # Check that all examples were used
            cur = time.time()
            _logger.info("Epoch " + str(epoch) + " took " + str(cur - prev) +
                         " seconds")
            if evaluate is not None:
                evaluate()

        if save:
            save_path = os.path.join(args.train_dir, args.filename)
            saver = tf.train.Saver()
            saver.save(sess, save_path)
            _logger.info("Completed model training and saved at: " +
                         str(save_path))
        else:
            _logger.info("Completed model training.")

    return True
def train(sess, loss, x_train, y_train,
          init_all=False, evaluate=None, feed=None, args=None,
          rng=None, var_list=None, fprop_args=None, optimizer=None,
          devices=None, x_batch_preprocessor=None, use_ema=False,
          ema_decay=.998, run_canary=None,
          loss_threshold=1e5, dataset_train=None, dataset_size=None):
  """
  Run (optionally multi-replica, synchronous) training to minimize `loss`
  :param sess: TF session to use when training the graph
  :param loss: tensor, the loss to minimize
  :param x_train: numpy array with training inputs or tf Dataset
  :param y_train: numpy array with training outputs or tf Dataset
  :param init_all: (boolean) If set to true, all TF variables in the session
                   are (re)initialized, otherwise only previously
                   uninitialized variables are initialized before training.
  :param evaluate: function that is run after each training iteration
                   (typically to display the test/validation accuracy).
  :param feed: An optional dictionary that is appended to the feeding
               dictionary before the session runs. Can be used to feed
               the learning phase of a Keras model for instance.
  :param args: dict or argparse `Namespace` object.
               Should contain `nb_epochs`, `learning_rate`,
               `batch_size`
  :param rng: Instance of numpy.random.RandomState
  :param var_list: Optional list of parameters to train.
  :param fprop_args: dict, extra arguments to pass to fprop (loss and model).
  :param optimizer: Optimizer to be used for training
  :param devices: list of device names to use for training
      If None, defaults to: all GPUs, if GPUs are available
                            all devices, if no GPUs are available
  :param x_batch_preprocessor: callable
      Takes a single tensor containing an x_train batch as input
      Returns a single tensor containing an x_train batch as output
      Called to preprocess the data before passing the data to the Loss
  :param use_ema: bool
      If true, uses an exponential moving average of the model parameters
  :param ema_decay: float or callable
      The decay parameter for EMA, if EMA is used
      If a callable rather than a float, this is a callable that takes
      the epoch and batch as arguments and returns the ema_decay for
      the current batch.
  :param loss_threshold: float
      Raise an exception if the loss exceeds this value.
      This is intended to rapidly detect numerical problems.
      Sometimes the loss may legitimately be higher than this value. In
      such cases, raise the value. If needed it can be np.inf.
  :param dataset_train: tf Dataset instance.
      Used as a replacement for x_train, y_train for faster performance.
    :param dataset_size: integer, the size of the dataset_train.
  :return: True if model trained
  """

  # Check whether the hardware is working correctly
  canary.run_canary()
  if run_canary is not None:
    warnings.warn("The `run_canary` argument is deprecated. The canary "
                  "is now much cheaper and thus runs all the time. The "
                  "canary now uses its own loss function so it is not "
                  "necessary to turn off the canary when training with "
                  " a stochastic loss. Simply quit passing `run_canary`."
                  "Passing `run_canary` may become an error on or after "
                  "2019-10-16.")

  args = _ArgsWrapper(args or {})
  fprop_args = fprop_args or {}

  # Check that necessary arguments were given (see doc above)
  # Be sure to support 0 epochs for debugging purposes
  if args.nb_epochs is None:
    raise ValueError("`args` must specify number of epochs")
  if optimizer is None:
    if args.learning_rate is None:
      raise ValueError("Learning rate was not given in args dict")
  assert args.batch_size, "Batch size was not given in args dict"

  if rng is None:
    rng = np.random.RandomState()

  if optimizer is None:
    optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate)
  else:
    if not isinstance(optimizer, tf.train.Optimizer):
      raise ValueError("optimizer object must be from a child class of "
                       "tf.train.Optimizer")

  grads = []
  xs = []
  preprocessed_xs = []
  ys = []
  if dataset_train is not None:
    assert x_train is None and y_train is None and x_batch_preprocessor is None
    if dataset_size is None:
      raise ValueError("You must provide a dataset size")
    data_iterator = dataset_train.make_one_shot_iterator().get_next()
    x_train, y_train = sess.run(data_iterator)

  devices = infer_devices(devices)
  for device in devices:
    with tf.device(device):
      # x = tf.placeholder(x_train.dtype, (None,) + x_train.shape[1:])
      # y = tf.placeholder(y_train.dtype, (None,) + y_train.shape[1:])
      x = tf.placeholder(tf.float32, (None,) + x_train.shape[1:])
      y = tf.placeholder(tf.float32, (None,) + y_train.shape[1:])
      xs.append(x)
      ys.append(y)

      if x_batch_preprocessor is not None:
        x = x_batch_preprocessor(x)

      # We need to keep track of these so that the canary can feed
      # preprocessed values. If the canary had to feed raw values,
      # stochastic preprocessing could make the canary fail.
      preprocessed_xs.append(x)

      loss_value = loss.fprop(x, y, **fprop_args)
      print("loss_value", loss_value)
      grads.append(optimizer.compute_gradients(
          loss_value, var_list=var_list))
      print("grads:", grads)
  num_devices = len(devices)
  print("num_devices: ", num_devices)

  grad = avg_grads(grads)
  # Trigger update operations within the default graph (such as batch_norm).
  with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
    train_step = optimizer.apply_gradients(grad)

  epoch_tf = tf.placeholder(tf.int32, [])
  batch_tf = tf.placeholder(tf.int32, [])

  if use_ema:
    if callable(ema_decay):
      ema_decay = ema_decay(epoch_tf, batch_tf)
    ema = tf.train.ExponentialMovingAverage(decay=ema_decay)
    with tf.control_dependencies([train_step]):
      train_step = ema.apply(var_list)
    # Get pointers to the EMA's running average variables
    avg_params = [ema.average(param) for param in var_list]
    # Make temporary buffers used for swapping the live and running average
    # parameters
    tmp_params = [tf.Variable(param, trainable=False)
                  for param in var_list]
    # Define the swapping operation
    param_to_tmp = [tf.assign(tmp, param)
                    for tmp, param in safe_zip(tmp_params, var_list)]
    with tf.control_dependencies(param_to_tmp):
      avg_to_param = [tf.assign(param, avg)
                      for param, avg in safe_zip(var_list, avg_params)]
    with tf.control_dependencies(avg_to_param):
      tmp_to_avg = [tf.assign(avg, tmp)
                    for avg, tmp in safe_zip(avg_params, tmp_params)]
    swap = tmp_to_avg

  batch_size = args.batch_size

  assert batch_size % num_devices == 0
  device_batch_size = batch_size // num_devices

  if init_all:
    sess.run(tf.global_variables_initializer())
  else:
    initialize_uninitialized_global_variables(sess)

  for epoch in xrange(args.nb_epochs):
    if dataset_train is not None:
      nb_batches = int(math.ceil(float(dataset_size) / batch_size))
    else:
      # Indices to shuffle training set
      index_shuf = list(range(len(x_train)))
      # Randomly repeat a few training examples each epoch to avoid
      # having a too-small batch
      while len(index_shuf) % batch_size != 0:
        index_shuf.append(rng.randint(len(x_train)))
      nb_batches = len(index_shuf) // batch_size
      rng.shuffle(index_shuf)
      # Shuffling here versus inside the loop doesn't seem to affect
      # timing very much, but shuffling here makes the code slightly
      # easier to read
      x_train_shuffled = x_train[index_shuf]
      y_train_shuffled = y_train[index_shuf]

    prev = time.time()
    for batch in range(nb_batches):
      if dataset_train is not None:
        x_train_shuffled, y_train_shuffled = sess.run(data_iterator)
        start, end = 0, batch_size
      else:
        # Compute batch start and end indices
        start = batch * batch_size
        end = (batch + 1) * batch_size
        # Perform one training step
        diff = end - start
        assert diff == batch_size

      feed_dict = {epoch_tf: epoch, batch_tf: batch}
      for dev_idx in xrange(num_devices):
        cur_start = start + dev_idx * device_batch_size
        cur_end = start + (dev_idx + 1) * device_batch_size
        feed_dict[xs[dev_idx]] = x_train_shuffled[cur_start:cur_end]
        feed_dict[ys[dev_idx]] = y_train_shuffled[cur_start:cur_end]
      if cur_end != end and dataset_train is None:
        msg = ("batch_size (%d) must be a multiple of num_devices "
               "(%d).\nCUDA_VISIBLE_DEVICES: %s"
               "\ndevices: %s")
        args = (batch_size, num_devices,
                os.environ['CUDA_VISIBLE_DEVICES'],
                str(devices))
        raise ValueError(msg % args)
      if feed is not None:
        feed_dict.update(feed)

      _, loss_numpy = sess.run([train_step, loss_value], feed_dict=feed_dict)

      if np.abs(loss_numpy) > loss_threshold:
        raise ValueError("Extreme loss during training: ", loss_numpy)
      if np.isnan(loss_numpy) or np.isinf(loss_numpy):
        raise ValueError("NaN/Inf loss during training")
    assert (dataset_train is not None or end == len(index_shuf))  # Check that all examples were used
    cur = time.time()
    _logger.info("Epoch " + str(epoch) + " took " + str(cur - prev) + " seconds")
    print("loss:", loss_numpy)
    if evaluate is not None:
      if use_ema:
        # Before running evaluation, load the running average
        # parameters into the live slot, so we can see how well
        # the EMA parameters are performing
        sess.run(swap)
      evaluate()
      if use_ema:
        # Swap the parameters back, so that we continue training
        # on the live parameters
        sess.run(swap)
  if use_ema:
    # When training is done, swap the running average parameters into
    # the live slot, so that we use them when we deploy the model
    sess.run(swap)

  return True
Beispiel #9
0
def train(sess,
          loss,
          x_train,
          y_train,
          init_all=True,
          evaluate=None,
          feed=None,
          args=None,
          rng=None,
          var_list=None,
          fprop_args=None,
          optimizer=None,
          devices=None,
          x_batch_preprocessor=None,
          use_ema=False,
          ema_decay=.998,
          run_canary=True,
          loss_threshold=1e5):
    """
  Run (optionally multi-replica, synchronous) training to minimize `loss`
  :param sess: TF session to use when training the graph
  :param loss: tensor, the loss to minimize
  :param x_train: numpy array with training inputs
  :param y_train: numpy array with training outputs
  :param init_all: (boolean) If set to true, all TF variables in the session
                   are (re)initialized, otherwise only previously
                   uninitialized variables are initialized before training.
  :param evaluate: function that is run after each training iteration
                   (typically to display the test/validation accuracy).
  :param feed: An optional dictionary that is appended to the feeding
               dictionary before the session runs. Can be used to feed
               the learning phase of a Keras model for instance.
  :param args: dict or argparse `Namespace` object.
               Should contain `nb_epochs`, `learning_rate`,
               `batch_size`
  :param rng: Instance of numpy.random.RandomState
  :param var_list: Optional list of parameters to train.
  :param fprop_args: dict, extra arguments to pass to fprop (loss and model).
  :param optimizer: Optimizer to be used for training
  :param devices: list of device names to use for training
      If None, defaults to: all GPUs, if GPUs are available
                            all devices, if no GPUs are available
  :param x_batch_preprocessor: callable
      Takes a single tensor containing an x_train batch as input
      Returns a single tensor containing an x_train batch as output
      Called to preprocess the data before passing the data to the Loss
  :param use_ema: bool
      If true, uses an exponential moving average of the model parameters
  :param ema_decay: float or callable
      The decay parameter for EMA, if EMA is used
      If a callable rather than a float, this is a callable that takes
      the epoch and batch as arguments and returns the ema_decay for
      the current batch.
  :param run_canary: bool
      If True and using 3 or more GPUs, runs some canary code that should
      fail if there is a multi-GPU driver problem.
      Turn this off if your gradients are inherently stochastic (e.g.
      if you use dropout). The canary code checks that all GPUs give
      approximately the same gradient.
  :param loss_threshold: float
      Raise an exception if the loss exceeds this value.
      This is intended to rapidly detect numerical problems.
      Sometimes the loss may legitimately be higher than this value. In
      such cases, raise the value. If needed it can be np.inf.
  :return: True if model trained
  """
    args = _ArgsWrapper(args or {})
    fprop_args = fprop_args or {}

    # Check that necessary arguments were given (see doc above)
    assert args.nb_epochs, "Number of epochs was not given in args dict"
    if optimizer is None:
        if args.learning_rate is None:
            raise ValueError("Learning rate was not given in args dict")
    assert args.batch_size, "Batch size was not given in args dict"

    if rng is None:
        rng = np.random.RandomState()

    if optimizer is None:
        optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate)
    else:
        if not isinstance(optimizer, tf.train.Optimizer):
            raise ValueError("optimizer object must be from a child class of "
                             "tf.train.Optimizer")

    grads = []
    xs = []
    preprocessed_xs = []
    ys = []

    devices = infer_devices(devices)
    for idx, device in enumerate(devices):
        with tf.device(device):
            x = tf.placeholder(x_train.dtype, (None, ) + x_train.shape[1:])
            y = tf.placeholder(x_train.dtype, (None, ) + y_train.shape[1:])
            xs.append(x)
            ys.append(y)

            if x_batch_preprocessor is not None:
                x = x_batch_preprocessor(x)

            # We need to keep track of these so that the canary can feed
            # preprocessed values. If the canary had to feed raw values,
            # stochastic preprocessing could make the canary fail.
            preprocessed_xs.append(x)

            loss_value = loss.fprop(x, y, **fprop_args)

            grads.append(
                optimizer.compute_gradients(loss_value, var_list=var_list))
    num_devices = len(devices)
    print("num_devices: ", num_devices)

    grad = avg_grads(grads)
    # Trigger update operations within the default graph (such as batch_norm).
    with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
        train_step = optimizer.apply_gradients(grad)

    epoch_tf = tf.placeholder(tf.int32, [])
    batch_tf = tf.placeholder(tf.int32, [])

    if use_ema:
        if callable(ema_decay):
            ema_decay = ema_decay(epoch_tf, batch_tf)
        ema = tf.train.ExponentialMovingAverage(decay=ema_decay)
        with tf.control_dependencies([train_step]):
            train_step = ema.apply(var_list)
        # Get pointers to the EMA's running average variables
        avg_params = [ema.average(param) for param in var_list]
        # Make temporary buffers used for swapping the live and running average
        # parameters
        tmp_params = [
            tf.Variable(param, trainable=False) for param in var_list
        ]
        # Define the swapping operation
        param_to_tmp = [
            tf.assign(tmp, param)
            for tmp, param in safe_zip(tmp_params, var_list)
        ]
        with tf.control_dependencies(param_to_tmp):
            avg_to_param = [
                tf.assign(param, avg)
                for param, avg in safe_zip(var_list, avg_params)
            ]
        with tf.control_dependencies(avg_to_param):
            tmp_to_avg = [
                tf.assign(avg, tmp)
                for avg, tmp in safe_zip(avg_params, tmp_params)
            ]
        swap = tmp_to_avg

    batch_size = args.batch_size

    assert batch_size % num_devices == 0
    device_batch_size = batch_size // num_devices

    if init_all:
        sess.run(tf.global_variables_initializer())
    else:
        initialize_uninitialized_global_variables(sess)

    # Check whether the hardware is working correctly

    # So far the failure has only been observed with 3 or more GPUs
    run_canary = run_canary and num_devices > 2
    if run_canary:
        canary_feed_dict = {}
        for x, y in safe_zip(preprocessed_xs, ys):
            canary_feed_dict[x] = x_train[:device_batch_size].copy()
            canary_feed_dict[y] = y_train[:device_batch_size].copy()
        # To reduce the runtime and memory cost of this canary,
        # we test the gradient of only one parameter.
        # For now this is just set to the first parameter in the list,
        # because it is an index that is always guaranteed to work.
        # If we think that this is causing false negatives and we should
        # test other parameters, we could test a random parameter from
        # the list or we could rewrite the canary to examine more than
        # one parameter.
        param_to_test = 0
        grad_vars = []
        for i in xrange(num_devices):
            dev_grads = grads[i]
            grad_vars.append(dev_grads[param_to_test][0])
        grad_values = sess.run(grad_vars, feed_dict=canary_feed_dict)
        failed = False
        for i in xrange(1, num_devices):
            if grad_values[0].shape != grad_values[i].shape:
                print("shape 0 does not match shape %d:" % i,
                      grad_values[0].shape, grad_values[i].shape)
                failed = True
                continue
            if not np.allclose(grad_values[0], grad_values[i], atol=1e-6):
                print("grad_values[0]: ", grad_values[0].mean(),
                      grad_values[0].max())
                print("grad_values[%d]: " % i, grad_values[i].mean(),
                      grad_values[i].max())
                print("max diff: ",
                      np.abs(grad_values[0] - grad_values[1]).max())
                failed = True
        if failed:
            print("Canary failed.")
            quit()

    for epoch in xrange(args.nb_epochs):
        # Indices to shuffle training set
        index_shuf = list(range(len(x_train)))
        # Randomly repeat a few training examples each epoch to avoid
        # having a too-small batch
        while len(index_shuf) % batch_size != 0:
            index_shuf.append(rng.randint(len(x_train)))
        nb_batches = len(index_shuf) // batch_size
        rng.shuffle(index_shuf)
        # Shuffling here versus inside the loop doesn't seem to affect
        # timing very much, but shuffling here makes the code slightly
        # easier to read
        x_train_shuffled = x_train[index_shuf]
        y_train_shuffled = y_train[index_shuf]

        prev = time.time()
        for batch in range(nb_batches):

            # Compute batch start and end indices
            start = batch * batch_size
            end = (batch + 1) * batch_size

            # Perform one training step
            feed_dict = {epoch_tf: epoch, batch_tf: batch}
            diff = end - start
            assert diff == batch_size
            for dev_idx in xrange(num_devices):
                cur_start = start + dev_idx * device_batch_size
                cur_end = start + (dev_idx + 1) * device_batch_size
                feed_dict[xs[dev_idx]] = x_train_shuffled[cur_start:cur_end]
                feed_dict[ys[dev_idx]] = y_train_shuffled[cur_start:cur_end]
            if cur_end != end:
                msg = ("batch_size (%d) must be a multiple of num_devices "
                       "(%d).\nCUDA_VISIBLE_DEVICES: %s"
                       "\ndevices: %s")
                args = (batch_size, num_devices,
                        os.environ['CUDA_VISIBLE_DEVICES'], str(devices))
                raise ValueError(msg % args)
            if feed is not None:
                feed_dict.update(feed)

            _, loss_numpy = sess.run([train_step, loss_value],
                                     feed_dict=feed_dict)

            if np.abs(loss_numpy) > loss_threshold:
                raise ValueError("Extreme loss during training: ", loss_numpy)
            if np.isnan(loss_numpy) or np.isinf(loss_numpy):
                raise ValueError("NaN/Inf loss during training")
        assert end == len(index_shuf)  # Check that all examples were used
        cur = time.time()
        _logger.info("Epoch " + str(epoch) + " took " + str(cur - prev) +
                     " seconds")
        if evaluate is not None:
            if use_ema:
                # Before running evaluation, load the running average
                # parameters into the live slot, so we can see how well
                # the EMA parameters are performing
                sess.run(swap)
            evaluate()
            if use_ema:
                # Swap the parameters back, so that we continue training
                # on the live parameters
                sess.run(swap)
    if use_ema:
        # When training is done, swap the running average parameters into
        # the live slot, so that we use them when we deploy the model
        sess.run(swap)

    return True
Beispiel #10
0
def model_eval(sess, x, y, predictions, X_test=None, Y_test=None,
               feed=None, args=None, aux_loss_lst=[None], summary=None):
    """
    Compute the accuracy of a TF model on some data
    :param sess: TF session to use when training the graph
    :param x: input placeholder
    :param y: output placeholder (for labels)
    :param predictions: model output predictions
    :param X_test: numpy array with training inputs
    :param Y_test: numpy array with training outputs
    :param feed: An optional dictionary that is appended to the feeding
             dictionary before the session runs. Can be used to feed
             the learning phase of a Keras model for instance.
    :param args: dict or argparse `Namespace` object.
                 Should contain `batch_size`
    :return: a float with the accuracy value
    """
    args = _ArgsWrapper(args or {})

    test_writer = tf.summary.FileWriter('./logs/test')

    assert args.batch_size, "Batch size was not given in args dict"
    if X_test is None or Y_test is None:
        raise ValueError("X_test argument and Y_test argument "
                         "must be supplied.")

    # Define accuracy symbolically
    if LooseVersion(tf.__version__) >= LooseVersion('1.0.0'):
        correct_preds = tf.equal(tf.argmax(y, axis=-1),
                                 tf.argmax(predictions, axis=-1))
    else:
        correct_preds = tf.equal(tf.argmax(y, axis=tf.rank(y) - 1),
                                 tf.argmax(predictions,
                                     axis=tf.rank(predictions) - 1))

    # Init result var
    accuracy = 0.0
    total_aux_loss = [0.] * len(aux_loss_lst)

    with sess.as_default():
        # Compute number of batches
        nb_batches = int(math.ceil(float(len(X_test)) / args.batch_size))
        assert nb_batches * args.batch_size >= len(X_test)

        X_cur = np.zeros((args.batch_size,) + X_test.shape[1:],
                         dtype=X_test.dtype)
        Y_cur = np.zeros((args.batch_size,) + Y_test.shape[1:],
                         dtype=Y_test.dtype)
        for batch in range(nb_batches):
            if batch % 100 == 0 and batch > 0:
                _logger.debug("Batch " + str(batch))

            # Must not use the `batch_indices` function here, because it
            # repeats some examples.
            # It's acceptable to repeat during training, but not eval.
            start = batch * args.batch_size
            end = min(len(X_test), start + args.batch_size)

            # The last batch may be smaller than all others. This should not
            # affect the accuarcy disproportionately.
            cur_batch_size = end - start
            X_cur[:cur_batch_size] = X_test[start:end]
            Y_cur[:cur_batch_size] = Y_test[start:end]
            feed_dict = {x: X_cur, y: Y_cur}
            if feed is not None:
                feed_dict.update(feed)
            if summary is None:
                cur_corr_preds = correct_preds.eval(feed_dict=feed_dict)
            else:
                cur_corr_preds, summary_val = sess.run([correct_preds, summary], feed_dict=feed_dict)
                test_writer.add_summary(summary_val)

            accuracy += cur_corr_preds[:cur_batch_size].sum()

            for i, aux_loss in enumerate(aux_loss_lst):
                if aux_loss is None:
                    continue
                cur_aux_loss = aux_loss.eval(feed_dict=feed_dict)
                total_aux_loss[i] += cur_aux_loss[:cur_batch_size].sum()

        assert end >= len(X_test)

        # Divide by number of examples to get final value
        accuracy /= len(X_test)
        for i in range(len(aux_loss_lst)):
            total_aux_loss[i] /= len(X_test)

    return [accuracy] + total_aux_loss
Beispiel #11
0
def model_train(sess, x, y, predictions, X_train, Y_train, save=False,
                predictions_adv=None, init_all=True, evaluate=None,
                verbose=True, feed=None, args=None, rng=None, aux_loss=None,
                opt_type=None, summary=None):
    """
    Train a TF graph
    :param sess: TF session to use when training the graph
    :param x: input placeholder
    :param y: output placeholder (for labels)
    :param predictions: model output predictions
    :param X_train: numpy array with training inputs
    :param Y_train: numpy array with training outputs
    :param save: boolean controlling the save operation
    :param predictions_adv: if set with the adversarial example tensor,
                            will run adversarial training
    :param init_all: (boolean) If set to true, all TF variables in the session
                     are (re)initialized, otherwise only previously
                     uninitialized variables are initialized before training.
    :param evaluate: function that is run after each training iteration
                     (typically to display the test/validation accuracy).
    :param verbose: (boolean) all print statements disabled when set to False.
    :param feed: An optional dictionary that is appended to the feeding
                 dictionary before the session runs. Can be used to feed
                 the learning phase of a Keras model for instance.
    :param args: dict or argparse `Namespace` object.
                 Should contain `nb_epochs`, `learning_rate`,
                 `batch_size`
                 If save is True, should also contain 'train_dir'
                 and 'filename'
    :param rng: Instance of numpy.random.RandomState
    :return: True if model trained
    """
    args = _ArgsWrapper(args or {})

    train_writer = tf.summary.FileWriter('./logs/train', sess.graph)

    # Check that necessary arguments were given (see doc above)
    assert args.nb_epochs, "Number of epochs was not given in args dict"
    assert args.learning_rate, "Learning rate was not given in args dict"
    assert args.batch_size, "Batch size was not given in args dict"

    if save:
        assert args.train_dir, "Directory for save was not given in args dict"
        assert args.filename, "Filename for save was not given in args dict"

    if not verbose:
        old_log_level = get_log_level(name=_logger.name)
        set_log_level(logging.WARNING, name=_logger.name)
        warnings.warn("verbose argument is deprecated and will be removed"
                      " on 2018-02-11. Instead, use utils.set_log_level()."
                      " For backward compatibility, log_level was set to"
                      " logging.WARNING (30).")

    if rng is None:
        rng = np.random.RandomState()

    # Define loss
    loss = model_loss(y, predictions, aux_loss=aux_loss)
    if predictions_adv is not None:
        loss = (loss + model_loss(y, predictions_adv, aux_loss=aux_loss)) / 2

    #XXX this is new
    if opt_type == "momentum":
        initial_learning_rate = 0.1 * args.batch_size / 128 * 0.01
        batches_per_epoch = X_train.shape[0] / args.batch_size
        global_step = tf.train.get_or_create_global_step()
        _MOMENTUM = 0.9

        # Multiply the learning rate by 0.1 at 100, 150, and 200 epochs.
        boundaries = [int(batches_per_epoch * epoch) for epoch in [100, 150, 200]]
        values = [initial_learning_rate * decay for decay in [1, 0.1, 0.01, 0.001]]
        learning_rate = tf.train.piecewise_constant(
            tf.cast(global_step, tf.int32), boundaries, values)

        # Create a tensor named learning_rate for logging purposes
        tf.identity(learning_rate, name='learning_rate')
        tf.summary.scalar('learning_rate', learning_rate)

        optimizer = tf.train.MomentumOptimizer(
            learning_rate=learning_rate,
            momentum=_MOMENTUM)
    elif opt_type == "adam":
        optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate)
        global_step = tf.train.get_or_create_global_step()
    else:
        raise ValueError

    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        train_step = optimizer.minimize(loss, global_step)
    #XXX original version:
    #train_step = tf.train.AdamOptimizer(learning_rate=args.learning_rate)
    #train_step = train_step.minimize(loss)

    with sess.as_default():
        if init_all:
            tf.global_variables_initializer().run()
        else:
            initialize_uninitialized_global_variables(sess)

        for epoch in range(args.nb_epochs):
            # Compute number of batches
            nb_batches = int(math.ceil(float(len(X_train)) / args.batch_size))
            assert nb_batches * args.batch_size >= len(X_train)

            # Indices to shuffle training set
            index_shuf = list(range(len(X_train)))
            rng.shuffle(index_shuf)

            prev = time.time()
            for batch in range(nb_batches):

                # Compute batch start and end indices
                start, end = batch_indices(
                    batch, len(X_train), args.batch_size)

                # Perform one training step
                feed_dict = {x: X_train[index_shuf[start:end]],
                             y: Y_train[index_shuf[start:end]]}
                if feed is not None:
                    feed_dict.update(feed)
                if summary is None:
                    train_step.run(feed_dict=feed_dict)
                else:
                    summary_val, _ = sess.run([summary, train_step], feed_dict=feed_dict)
                    train_writer.add_summary(summary_val, batch + epoch * nb_batches)
            assert end >= len(X_train)  # Check that all examples were used
            cur = time.time()
            if verbose:
                _logger.info("Epoch " + str(epoch) + " took " +
                             str(cur - prev) + " seconds")
            if evaluate is not None:
                evaluate()

        if save:
            save_path = os.path.join(args.train_dir, args.filename)
            saver = tf.train.Saver()
            saver.save(sess, save_path)
            _logger.info("Completed model training and saved at: " +
                         str(save_path))
        else:
            _logger.info("Completed model training.")

    if not verbose:
        set_log_level(old_log_level, name=_logger.name)

    return True
Beispiel #12
0
def train(sess, loss, x_train, y_train,
          init_all=True, evaluate=None, feed=None, args=None,
          rng=None, var_list=None, fprop_args=None, optimizer=None,
          devices=None, x_batch_preprocessor=None):
    """
    Run (optionally multi-replica, synchronous) training to minimize `loss`
    :param sess: TF session to use when training the graph
    :param loss: tensor, the loss to minimize
    :param x_train: numpy array with training inputs
    :param y_train: numpy array with training outputs
    :param init_all: (boolean) If set to true, all TF variables in the session
                     are (re)initialized, otherwise only previously
                     uninitialized variables are initialized before training.
    :param evaluate: function that is run after each training iteration
                     (typically to display the test/validation accuracy).
    :param feed: An optional dictionary that is appended to the feeding
                 dictionary before the session runs. Can be used to feed
                 the learning phase of a Keras model for instance.
    :param args: dict or argparse `Namespace` object.
                 Should contain `nb_epochs`, `learning_rate`,
                 `batch_size`
    :param rng: Instance of numpy.random.RandomState
    :param var_list: Optional list of parameters to train.
    :param fprop_args: dict, extra arguments to pass to fprop (loss and model).
    :param optimizer: Optimizer to be used for training
    :param devices: list of device names to use for training
        If None, defaults to: all GPUs, if GPUs are available
                              all devices, if no GPUs are available
    :param x_batch_preprocessor: callable
        Takes a single tensor containing an x_train batch as input
        Returns a single tensor containing an x_train batch as output
        Called to preprocess the data before passing the data to the Loss
    :return: True if model trained
    """
    args = _ArgsWrapper(args or {})
    fprop_args = fprop_args or {}

    # Check that necessary arguments were given (see doc above)
    assert args.nb_epochs, "Number of epochs was not given in args dict"
    if optimizer is None:
        if args.learning_rate is None:
            raise ValueError("Learning rate was not given in args dict")
    assert args.batch_size, "Batch size was not given in args dict"

    if rng is None:
        rng = np.random.RandomState()

    if optimizer is None:
        optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate)
    else:
        if not isinstance(optimizer, tf.train.Optimizer):
            raise ValueError("optimizer object must be from a child class of "
                             "tf.train.Optimizer")

    grads = []
    xs = []
    preprocessed_xs = []
    ys = []

    devices = infer_devices(devices)
    for idx, device in enumerate(devices):
        with tf.device(device):
            x = tf.placeholder(x_train.dtype, (None,) + x_train.shape[1:])
            y = tf.placeholder(x_train.dtype, (None,) + y_train.shape[1:])
            xs.append(x)
            ys.append(y)

            if x_batch_preprocessor is not None:
                x = x_batch_preprocessor(x)

            # We need to keep track of these so that the canary can feed
            # preprocessed values. If the canary had to feed raw values,
            # stochastic preprocessing could make the canary fail.
            preprocessed_xs.append(x)

            loss_value = loss.fprop(x, y, **fprop_args)

            grads.append(optimizer.compute_gradients(
                loss_value, var_list=var_list))
    num_devices = len(devices)
    print("num_devices: ", num_devices)

    grad = avg_grads(grads)
    # Trigger update operations within the default graph (such as batch_norm).
    with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
        train_step = optimizer.apply_gradients(grad)

    batch_size = args.batch_size

    assert batch_size % num_devices == 0
    device_batch_size = batch_size // num_devices

    if init_all:
        sess.run(tf.global_variables_initializer())
    else:
        initialize_uninitialized_global_variables(sess)

    # Check whether the hardware is working correctly

    # So far the failure has only been observed with 3 or more GPUs
    run_canary = num_devices > 2
    if run_canary:
        canary_feed_dict = {}
        for x, y in safe_zip(preprocessed_xs, ys):
            canary_feed_dict[x] = x_train[:device_batch_size].copy()
            canary_feed_dict[y] = y_train[:device_batch_size].copy()
        # To reduce the runtime and memory cost of this canary,
        # we test the gradient of only one parameter.
        # For now this is just set to the first parameter in the list,
        # because it is an index that is always guaranteed to work.
        # If we think that this is causing false negatives and we should
        # test other parameters, we could test a random parameter from
        # the list or we could rewrite the canary to examine more than
        # one parameter.
        param_to_test = 0
        grad_vars = []
        for i in xrange(num_devices):
            dev_grads = grads[i]
            grad_vars.append(dev_grads[param_to_test][0])
        grad_values = sess.run(grad_vars, feed_dict=canary_feed_dict)
        failed = False
        for i in xrange(1, num_devices):
            if grad_values[0].shape != grad_values[i].shape:
                print("shape 0 does not match shape %d:" % i,
                      grad_values[0].shape, grad_values[i].shape)
                failed = True
                continue
            if not np.allclose(grad_values[0], grad_values[i], atol=1e-6):
                print("grad_values[0]: ",
                      grad_values[0].mean(), grad_values[0].max())
                print("grad_values[%d]: " %
                      i, grad_values[i].mean(), grad_values[i].max())
                print("max diff: ", np.abs(
                    grad_values[0] - grad_values[1]).max())
                failed = True
        if failed:
            print("Canary failed.")
            quit()

    for epoch in xrange(args.nb_epochs):
        # Indices to shuffle training set
        index_shuf = list(range(len(x_train)))
        # Randomly repeat a few training examples each epoch to avoid
        # having a too-small batch
        while len(index_shuf) % batch_size != 0:
            index_shuf.append(rng.randint(len(x_train)))
        nb_batches = len(index_shuf) // batch_size
        rng.shuffle(index_shuf)
        # Shuffling here versus inside the loop doesn't seem to affect
        # timing very much, but shuffling here makes the code slightly
        # easier to read
        x_train_shuffled = x_train[index_shuf]
        y_train_shuffled = y_train[index_shuf]

        prev = time.time()
        for batch in range(nb_batches):

            # Compute batch start and end indices
            start = batch * batch_size
            end = (batch + 1) * batch_size
            # start, end = batch_indices(
            #    batch, len(x_train), args.batch_size)

            # Perform one training step
            feed_dict = {}
            diff = end - start
            assert diff == batch_size
            for dev_idx in xrange(num_devices):
                cur_start = start + dev_idx * device_batch_size
                cur_end = start + (dev_idx + 1) * device_batch_size
                feed_dict[xs[dev_idx]
                          ] = x_train_shuffled[cur_start:cur_end]
                feed_dict[ys[dev_idx]
                          ] = y_train_shuffled[cur_start:cur_end]
            if cur_end != end:
                msg = ("batch_size (%d) must be a multiple of num_devices "
                       "(%d).\nCUDA_VISIBLE_DEVICES: %s"
                       "\ndevices: %s")
                args = (batch_size, num_devices,
                        os.environ['CUDA_VISIBLE_DEVICES'],
                        str(devices))
                raise ValueError(msg % args)
            if feed is not None:
                feed_dict.update(feed)
            sess.run(train_step, feed_dict=feed_dict)
        assert end == len(index_shuf)  # Check that all examples were used
        cur = time.time()
        _logger.info("Epoch " + str(epoch) + " took " +
                     str(cur - prev) + " seconds")
        if evaluate is not None:
            evaluate()

    return True
Beispiel #13
0
def model_eval(sess,
               x,
               y,
               predictions,
               X_test=None,
               Y_test=None,
               feed=None,
               filename=None,
               save_logit=False,
               args=None):
    """
  Compute the accuracy of a TF model on some data
  :param sess: TF session to use
  :param x: input placeholder
  :param y: output placeholder (for labels)
  :param predictions: model output predictions
  :param X_test: numpy array with training inputs
  :param Y_test: numpy array with training outputs
  :param feed: An optional dictionary that is appended to the feeding
           dictionary before the session runs. Can be used to feed
           the learning phase of a Keras model for instance.
  :param args: dict or argparse `Namespace` object.
               Should contain `batch_size`
  :return: a float with the accuracy value
  """
    global _model_eval_cache
    args = _ArgsWrapper(args or {})
    logit_arr = None
    print(filename)

    assert args.batch_size, "Batch size was not given in args dict"
    if X_test is None or Y_test is None:
        raise ValueError("X_test argument and Y_test argument "
                         "must be supplied.")

    # Define accuracy symbolically
    key = (y, predictions)
    if key in _model_eval_cache:
        correct_preds = _model_eval_cache[key]
    else:
        correct_preds = tf.equal(tf.argmax(y, axis=-1),
                                 tf.argmax(predictions, axis=-1))
        _model_eval_cache[key] = correct_preds

    # Init result var
    accuracy = 0.0

    with sess.as_default():
        # Compute number of batches
        nb_batches = int(math.ceil(float(len(X_test)) / args.batch_size))
        assert nb_batches * args.batch_size >= len(X_test)

        X_cur = np.zeros((args.batch_size, ) + X_test.shape[1:],
                         dtype=X_test.dtype)
        Y_cur = np.zeros((args.batch_size, ) + Y_test.shape[1:],
                         dtype=Y_test.dtype)
        for batch in range(nb_batches):
            if batch % 100 == 0 and batch > 0:
                _logger.debug("Batch " + str(batch))

            # Must not use the `batch_indices` function here, because it
            # repeats some examples.
            # It's acceptable to repeat during training, but not eval.
            start = batch * args.batch_size
            end = min(len(X_test), start + args.batch_size)

            # The last batch may be smaller than all others. This should not
            # affect the accuracy disproportionately.
            cur_batch_size = end - start
            X_cur[:cur_batch_size] = X_test[start:end]
            Y_cur[:cur_batch_size] = Y_test[start:end]

            feed_dict = {x: X_cur, y: Y_cur}
            if feed is not None:
                feed_dict.update(feed)
            cur_corr_preds = correct_preds.eval(feed_dict=feed_dict)
            prediction_arr = predictions.eval(feed_dict={x: X_cur})

            # create logit arr
            if logit_arr is None:
                logit_arr = prediction_arr
            else:
                logit_arr = np.vstack((logit_arr, prediction_arr))

            accuracy += cur_corr_preds[:cur_batch_size].sum()

        assert end >= len(X_test)

        # Divide by number of examples to get final value
        accuracy /= len(X_test)

        # calculate metric
        true_labels = np.argmax(Y_test, axis=1)
        pred_labels = np.argmax(logit_arr[:len(Y_test)], axis=1)
        # True Positive (TP): we predict a label of 1 (positive), and the true label is 1.
        TP = np.sum(np.logical_and(pred_labels == 1, true_labels == 1))

        # True Negative (TN): we predict a label of 0 (negative), and the true label is 0.
        TN = np.sum(np.logical_and(pred_labels == 0, true_labels == 0))

        # False Positive (FP): we predict a label of 1 (positive), but the true label is 0.
        FP = np.sum(np.logical_and(pred_labels == 1, true_labels == 0))

        # False Negative (FN): we predict a label of 0 (negative), but the true label is 1.
        FN = np.sum(np.logical_and(pred_labels == 0, true_labels == 1))

        recall = TP / float(TP + FN)
        precision = TP / float(TP + FP)
        f1 = 2. * TP / (2. * TP + FP + FN)
    print(filename, "accuracy", accuracy)
    print(filename, "precision", precision)
    print(filename, "recall", recall)
    print(filename, "f1", f1)
    if save_logit:
        assert len(logit_arr) >= len(X_test)
        logit_arr = logit_arr[:len(X_test)]
        assert len(logit_arr) == len(X_test)
        if 'preprocess' in FLAGS.__flags:
            preprocess = FLAGS.preprocess
        else:
            preprocess = ''
        with open(
                '../../cleverhans/pickle/{}_{}.pickle'.format(
                    filename, preprocess), 'wb') as handle:
            pickle.dump(logit_arr, handle)

    return accuracy
Beispiel #14
0
def confident_model_eval(sess, x, y, predictions, X_test=None, Y_test=None,
                         feed=None, args=None):
    """
    Compute the accuracy of a TF model on some data
    :param sess: TF session to use when training the graph
    :param x: input placeholder
    :param y: output placeholder (for labels)
    :param predictions: model output predictions, [prediction, confidence]
    :param X_test: numpy array with training inputs
    :param Y_test: numpy array with training outputs
    :param feed: An optional dictionary that is appended to the feeding
             dictionary before the session runs. Can be used to feed
             the learning phase of a Keras model for instance.
    :param args: dict or argparse `Namespace` object.
                 Should contain `batch_size`
    :return: a float with the accuracy value
    """
    args = _ArgsWrapper(args or {})
    
    # Check that necessary inputs are given
    assert len(predictions) == 2, "Number of predictions was not match"
    if X_test is None or Y_test is None:
        raise ValueError("X_test argument and Y_test argument "
                         "must be supplied.")
    
    # Check that necessary arguments are given
    assert args.reject_threshold, "The reject threshold was not given in args dict"
    assert args.is_clean is not None, "The clean adversarial indicator was not given in args dict"
    assert args.batch_size, "Batch size was not given in args dict"

    # Define accuracy symbolically
    if LooseVersion(tf.__version__) >= LooseVersion('1.0.0'):
        correct_class = tf.equal(tf.argmax(y, axis=-1),
                                 tf.argmax(predictions[0], axis=-1))
    else:
        correct_class = tf.equal(tf.argmax(y, axis=tf.rank(y) - 1),
                                 tf.argmax(predictions[0],
                                           axis=tf.rank(predictions) - 1))
    if args.use_dic:
        if args.is_clean:
            correct_reject = tf.less(predictions[1],
                                     tf.ones(shape=tf.shape(predictions[1])) * args.reject_threshold)
            correct_reject = tf.reshape(correct_reject, shape=[-1])
            correct_preds = tf.logical_and(correct_class,
                                           correct_reject)
        else:
            correct_reject = tf.greater_equal(predictions[1],
                                              tf.ones(shape=tf.shape(predictions[1])) * args.reject_threshold)
            correct_reject = tf.reshape(correct_reject, shape=[-1])
            correct_preds = tf.logical_or(correct_class,
                                          correct_reject)
    else:
        correct_preds = correct_class

    # Init result var
    accuracy = 0.0

    with sess.as_default():
        # Compute number of batches
        nb_batches = int(math.ceil(float(len(X_test)) / args.batch_size))
        assert nb_batches * args.batch_size >= len(X_test)

        X_cur = np.zeros((args.batch_size,) + X_test.shape[1:],
                         dtype=X_test.dtype)
        Y_cur = np.zeros((args.batch_size,) + Y_test.shape[1:],
                         dtype=Y_test.dtype)
        for batch in range(nb_batches):
            if batch % 100 == 0 and batch > 0:
                _logger.debug("Batch " + str(batch))

            # Must not use the `batch_indices` function here, because it
            # repeats some examples.
            # It's acceptable to repeat during training, but not eval.
            start = batch * args.batch_size
            end = min(len(X_test), start + args.batch_size)

            # The last batch may be smaller than all others. This should not
            # affect the accuarcy disproportionately.
            cur_batch_size = end - start
            X_cur[:cur_batch_size] = X_test[start:end]
            Y_cur[:cur_batch_size] = Y_test[start:end]
            feed_dict = {x: X_cur, y: Y_cur}
            if feed is not None:
                feed_dict.update(feed)
            cur_corr_preds = correct_preds.eval(feed_dict=feed_dict)

            accuracy += cur_corr_preds[:cur_batch_size].sum()

        assert end >= len(X_test)

        # Divide by number of examples to get final value
        accuracy /= len(X_test)

    return accuracy
Beispiel #15
0
def train(sess,
          loss,
          x_train,
          y_train,
          init_all=True,
          evaluate=None,
          feed=None,
          args=None,
          rng=None,
          var_list=None,
          fprop_args=None,
          optimizer=None,
          devices=None,
          x_batch_preprocessor=None):
    """
    Run (optionally multi-replica, synchronous) training to minimize `loss`
    :param sess: TF session to use when training the graph
    :param loss: tensor, the loss to minimize
    :param x_train: numpy array with training inputs
    :param y_train: numpy array with training outputs
    :param init_all: (boolean) If set to true, all TF variables in the session
                     are (re)initialized, otherwise only previously
                     uninitialized variables are initialized before training.
    :param evaluate: function that is run after each training iteration
                     (typically to display the test/validation accuracy).
    :param feed: An optional dictionary that is appended to the feeding
                 dictionary before the session runs. Can be used to feed
                 the learning phase of a Keras model for instance.
    :param args: dict or argparse `Namespace` object.
                 Should contain `nb_epochs`, `learning_rate`,
                 `batch_size`
    :param rng: Instance of numpy.random.RandomState
    :param var_list: Optional list of parameters to train.
    :param fprop_args: dict, extra arguments to pass to fprop (loss and model).
    :param optimizer: Optimizer to be used for training
    :param devices: list of device names to use for training
        If None, defaults to: all GPUs, if GPUs are available
                              all devices, if no GPUs are available
    :param x_batch_preprocessor: callable
        Takes a single tensor containing an x_train batch as input
        Returns a single tensor containing an x_train batch as output
        Called to preprocess the data before passing the data to the Loss
    :return: True if model trained
    """
    args = _ArgsWrapper(args or {})
    fprop_args = fprop_args or {}

    # Check that necessary arguments were given (see doc above)
    assert args.nb_epochs, "Number of epochs was not given in args dict"
    if optimizer is None:
        if args.learning_rate is None:
            raise ValueError("Learning rate was not given in args dict")
    assert args.batch_size, "Batch size was not given in args dict"

    if rng is None:
        rng = np.random.RandomState()

    if optimizer is None:
        optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate)
    else:
        if not isinstance(optimizer, tf.train.Optimizer):
            raise ValueError("optimizer object must be from a child class of "
                             "tf.train.Optimizer")

    grads = []
    xs = []
    ys = []

    devices = infer_devices(devices)
    for idx, device in enumerate(devices):
        with tf.device(device):
            x = tf.placeholder(x_train.dtype, (None, ) + x_train.shape[1:])
            y = tf.placeholder(x_train.dtype, (None, ) + y_train.shape[1:])
            xs.append(x)
            ys.append(y)

            if x_batch_preprocessor is not None:
                x = x_batch_preprocessor(x)
            loss_value = loss.fprop(x, y, **fprop_args)

            grads.append(
                optimizer.compute_gradients(loss_value, var_list=var_list))
    num_devices = len(devices)
    print("num_devices: ", num_devices)
    grad = avg_grads(grads)
    # Trigger update operations within the default graph (such as batch_norm).
    with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
        train_step = optimizer.apply_gradients(grad)

    batch_size = args.batch_size

    with sess.as_default():
        if init_all:
            sess.run(tf.global_variables_initializer())
        else:
            initialize_uninitialized_global_variables(sess)

        for epoch in xrange(args.nb_epochs):
            # Indices to shuffle training set
            index_shuf = list(range(len(x_train)))
            # Randomly repeat a few training examples each epoch to avoid
            # having a too-small batch
            while len(index_shuf) % batch_size != 0:
                index_shuf.append(rng.randint(len(x_train)))
            nb_batches = len(index_shuf) // batch_size
            rng.shuffle(index_shuf)
            # Shuffling here versus inside the loop doesn't seem to affect
            # timing very much, but shuffling here makes the code slightly
            # easier to read
            x_train_shuffled = x_train[index_shuf]
            y_train_shuffled = y_train[index_shuf]

            prev = time.time()
            for batch in range(nb_batches):

                # Compute batch start and end indices
                start = batch * batch_size
                end = (batch + 1) * batch_size
                # start, end = batch_indices(
                #    batch, len(x_train), args.batch_size)

                # Perform one training step
                feed_dict = {}
                diff = end - start
                assert diff == batch_size
                stride = diff // num_devices
                for dev_idx in xrange(num_devices):
                    cur_start = start + dev_idx * stride
                    cur_end = start + (dev_idx + 1) * stride
                    feed_dict[
                        xs[dev_idx]] = x_train_shuffled[cur_start:cur_end]
                    feed_dict[
                        ys[dev_idx]] = y_train_shuffled[cur_start:cur_end]
                if cur_end != end:
                    msg = ("batch_size (%d) must be a multiple of num_devices "
                           "(%d).\nCUDA_VISIBLE_DEVICES: %s"
                           "\ndevices: %s")
                    args = (batch_size, num_devices,
                            os.environ['CUDA_VISIBLE_DEVICES'], str(devices))
                    raise ValueError(msg % args)
                if feed is not None:
                    feed_dict.update(feed)
                sess.run(train_step, feed_dict=feed_dict)
            assert end == len(index_shuf)  # Check that all examples were used
            cur = time.time()
            _logger.info("Epoch " + str(epoch) + " took " + str(cur - prev) +
                         " seconds")
            if evaluate is not None:
                evaluate()

    return True
def gan_train_v2(
    sess,
    x,
    y,
    predictions,
    X_train,
    Y_train,
    loss_func=None,
    optimizer=None,
    predictions_adv=None,
    init_all=True,
    evaluate=None,
    feed=None,
    args=None,
    rng=None,
    var_list=None,
):
    """
    Train a TF graph
    :param sess: TF session to use when training the graph
    :param x: input placeholder
    :param y: output placeholder (for labels)
    :param predictions: model output predictions [class_pred, source_pred]
    :param X_train: numpy array with training inputs
    :param Y_train: numpy array with training outputs
    :param trade_off: balance trade off between classifier and discriminator loss
    :param loss_func: list of loss functions [clf_loss, dic_loss]
    :param optimizer: tensorflow optimizer
    :param predictions_adv: if set with the adversarial example tensor,
                            will run adversarial training [adv_class_pred, adv_source_pred]
    :param init_all: (boolean) If set to true, all TF variables in the session
                     are (re)initialized, otherwise only previously
                     uninitialized variables are initialized before training.
    :param evaluate: function that is run after each training iteration
                     (typically to display the test/validation accuracy).
    :param feed: An optional dictionary that is appended to the feeding
                 dictionary before the session runs. Can be used to feed
                 the learning phase of a Keras model for instance.
    :param args: dict or argparse `Namespace` object.
                 Should contain `nb_epochs`, `learning_rate`,
                 `batch_size`
                 If save is True, should also contain 'train_dir'
                 and 'filename'
    :param rng: Instance of numpy.random.RandomState
    :param var_list: Optional list of parameters to train.
    :return: True if model trained
    """
    args = _ArgsWrapper(args or {})

    # Check that necessary inputs were given
    assert len(predictions) == 2, "Number of prediction inputs was not match"
    assert len(predictions_adv
               ) == 2, "Number of adversarial prediction inputs was not match"
    assert len(var_list) == 2, "Number of variable list was not match"

    # Check that necessary arguments were given (see doc above)
    assert args.nb_epochs, "Number of epochs was not given in args dict"
    assert args.batch_size, "Batch size was not given in args dict"
    assert args.trade_off, "Balance parameter was not given in args dict"
    assert args.inner_epochs, "Number of inner epochs was not given in args dict"

    # Check that necessary operators were given
    assert len(loss_func) == 2, "Number of loss function was not match"
    assert len(optimizer) == 2, "Number of optimizer was not match"

    if rng is None:
        rng = np.random.RandomState()

    # Define discriminator loss
    adv_source_loss = loss_func[1](tf.ones(shape=[tf.shape(y)[0], 1]),
                                   predictions_adv[1])
    dic_loss = (loss_func[1](tf.zeros(shape=[tf.shape(y)[0], 1]),
                             predictions[1]) + adv_source_loss) / 2

    # Define classifier loss
    class_loss = loss_func[0](y, predictions[0])
    pre_loss = (class_loss + loss_func[0](y, predictions_adv[0])) / 2
    clf_loss = pre_loss - args.trade_off * adv_source_loss

    # Add weight decay
    if args.weight_decay is not None:
        weights = []
        for var in tf.trainable_variables():
            if var.op.name.find('clf') > 0 and var.op.name.find('kernel') > 0:
                weights.append(tf.nn.l2_loss(var))
        weight_loss = args.weight_decay * tf.add_n(weights)
        pre_loss += weight_loss
        clf_loss += weight_loss

    # Define training operation
    if args.global_step is not None:
        pre_step = optimizer[0].minimize(pre_loss,
                                         var_list=var_list[0],
                                         global_step=args.global_step)
        clf_step = optimizer[0].minimize(clf_loss,
                                         var_list=var_list[0],
                                         global_step=args.global_step)
    else:
        pre_step = optimizer[0].minimize(pre_loss, var_list=var_list[0])
        clf_step = optimizer[0].minimize(clf_loss, var_list=var_list[0])
    dic_step = optimizer[1].minimize(dic_loss, var_list=var_list[1])

    with sess.as_default():
        if hasattr(tf, "global_variables_initializer"):
            if init_all:
                tf.global_variables_initializer().run()
            else:
                initialize_uninitialized_global_variables(sess)
        else:
            warnings.warn("Update your copy of tensorflow; future versions of "
                          "CleverHans may drop support for this version.")
            sess.run(tf.initialize_all_variables())

        for epoch in xrange(args.nb_epochs):
            # Compute number of batches
            nb_batches = int(math.ceil(float(len(X_train)) / args.batch_size))
            assert nb_batches * args.batch_size >= len(X_train)

            # Indices to shuffle training set
            index_shuf = list(range(len(X_train)))
            rng.shuffle(index_shuf)

            prev = time.time()

            if epoch < args.pretrain_epochs:
                # Pre-train Classifier
                _logger.info("Pre-train Epoch")
                for batch in range(nb_batches):
                    # Train Classifier
                    # Compute batch start and end indices
                    start, end = batch_indices(batch, len(X_train),
                                               args.batch_size)
                    # Perform one training step
                    feed_dict = {
                        x: X_train[index_shuf[start:end]],
                        y: Y_train[index_shuf[start:end]]
                    }
                    if feed is not None:
                        feed_dict.update(feed)
                    pre_step.run(feed_dict=feed_dict)
            else:
                # GAN Training
                _logger.info("GAN-train Epoch")
                for batch in range(nb_batches):
                    # Train Discriminator
                    inner_batches = np.random.choice(nb_batches,
                                                     args.inner_epochs)
                    for inner_batch in inner_batches:
                        # Compute batch start and end indices
                        inner_start, inner_end = batch_indices(
                            inner_batch, len(X_train), args.batch_size)
                        # Perform one training step
                        feed_dict = {
                            x: X_train[index_shuf[inner_start:inner_end]],
                            y: Y_train[index_shuf[inner_start:inner_end]]
                        }
                        if feed is not None:
                            feed_dict.update(feed)
                        dic_step.run(feed_dict=feed_dict)
                    # Train Classifier
                    # Compute batch start and end indices
                    start, end = batch_indices(batch, len(X_train),
                                               args.batch_size)
                    # Perform one training step
                    feed_dict = {
                        x: X_train[index_shuf[start:end]],
                        y: Y_train[index_shuf[start:end]]
                    }
                    if feed is not None:
                        feed_dict.update(feed)
                    '''
                    clf_step.run(feed_dict=feed_dict)
                    '''
                    _, cl, dl = sess.run(
                        fetches=[clf_step, pre_loss, dic_loss],
                        feed_dict=feed_dict)

            # check loss
            _logger.info(
                "Epoch %d - Classifier Loss %4f - Discriminator Loss %4f " %
                (epoch, cl, dl))

            # Check that all examples were used
            assert end >= len(X_train)
            cur = time.time()
            _logger.info("Epoch " + str(epoch) + " took " + str(cur - prev) +
                         " seconds")

            if evaluate is not None:
                evaluate()

        _logger.info("Completed model training.")

    return True
Beispiel #17
0
def train_ae(sess,
             loss,
             x_train,
             x_train_target,
             init_all=False,
             evaluate=None,
             feed=None,
             args=None,
             rng=None,
             var_list=None,
             fprop_args=None,
             optimizer=None,
             devices=None,
             x_batch_preprocessor=None,
             use_ema=False,
             ema_decay=.998,
             run_canary=None,
             loss_threshold=1e5,
             dataset_train=None,
             dataset_size=None):
    # Check whether the hardware is working correctly
    start_time = time.time()
    canary.run_canary()
    if run_canary is not None:
        warnings.warn("The `run_canary` argument is deprecated. The canary "
                      "is now much cheaper and thus runs all the time. The "
                      "canary now uses its own loss function so it is not "
                      "necessary to turn off the canary when training with "
                      " a stochastic loss. Simply quit passing `run_canary`."
                      "Passing `run_canary` may become an error on or after "
                      "2019-10-16.")

    args = _ArgsWrapper(args or {})
    fprop_args = fprop_args or {}

    # Check that necessary arguments were given (see doc above)
    # Be sure to support 0 epochs for debugging purposes
    if args.nb_epochs is None:
        raise ValueError("`args` must specify number of epochs")
    if optimizer is None:
        if args.learning_rate is None:
            raise ValueError("Learning rate was not given in args dict")
    assert args.batch_size, "Batch size was not given in args dict"

    if rng is None:
        rng = np.random.RandomState()

    if optimizer is None:
        optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate)
    else:
        if not isinstance(optimizer, tf.train.Optimizer):
            raise ValueError("optimizer object must be from a child class of "
                             "tf.train.Optimizer")

    grads = []
    xs = []
    xs_t = []
    preprocessed_xs = []
    preprocessed_xs_t = []
    #ys = []
    if dataset_train is not None:
        assert x_train is None and x_batch_preprocessor is None
        if dataset_size is None:
            raise ValueError("You must provide a dataset size")
        data_iterator = dataset_train.make_one_shot_iterator().get_next()
        x_train, x_train_target = sess.run(data_iterator)

    devices = infer_devices(devices)
    for device in devices:
        with tf.device(device):
            x = tf.placeholder(x_train.dtype, (None, ) + x_train.shape[1:])
            x_t = tf.placeholder(x_train_target.dtype,
                                 (None, ) + x_train_target.shape[1:])
            #y = tf.placeholder(y_train.dtype, (None,) + y_train.shape[1:])
            xs.append(x)
            xs_t.append(x_t)
            #ys.append(y)

            if x_batch_preprocessor is not None:
                x = x_batch_preprocessor(x)
                x_t = x_batch_preprocessor(x_t)

            # We need to keep track of these so that the canary can feed
            # preprocessed values. If the canary had to feed raw values,
            # stochastic preprocessing could make the canary fail.
            preprocessed_xs.append(x)
            preprocessed_xs_t.append(x_t)

            loss_value = loss.fprop(x, x_t, **fprop_args)

            grads.append(
                optimizer.compute_gradients(loss_value, var_list=var_list))
    num_devices = len(devices)
    print("num_devices: ", num_devices)

    grad = avg_grads(grads)
    # Trigger update operations within the default graph (such as batch_norm).
    with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
        train_step = optimizer.apply_gradients(grad)

    epoch_tf = tf.placeholder(tf.int32, [])
    batch_tf = tf.placeholder(tf.int32, [])

    if use_ema:
        if callable(ema_decay):
            ema_decay = ema_decay(epoch_tf, batch_tf)
        ema = tf.train.ExponentialMovingAverage(decay=ema_decay)
        with tf.control_dependencies([train_step]):
            train_step = ema.apply(var_list)
        # Get pointers to the EMA's running average variables
        avg_params = [ema.average(param) for param in var_list]
        # Make temporary buffers used for swapping the live and running average
        # parameters
        tmp_params = [
            tf.Variable(param, trainable=False) for param in var_list
        ]
        # Define the swapping operation
        param_to_tmp = [
            tf.assign(tmp, param)
            for tmp, param in safe_zip(tmp_params, var_list)
        ]
        with tf.control_dependencies(param_to_tmp):
            avg_to_param = [
                tf.assign(param, avg)
                for param, avg in safe_zip(var_list, avg_params)
            ]
        with tf.control_dependencies(avg_to_param):
            tmp_to_avg = [
                tf.assign(avg, tmp)
                for avg, tmp in safe_zip(avg_params, tmp_params)
            ]
        swap = tmp_to_avg

    batch_size = args.batch_size

    assert batch_size % num_devices == 0
    device_batch_size = batch_size // num_devices

    if init_all:
        sess.run(tf.global_variables_initializer())
    else:
        initialize_uninitialized_global_variables(sess)

    for epoch in xrange(args.nb_epochs):
        if dataset_train is not None:
            nb_batches = int(math.ceil(float(dataset_size) / batch_size))
        else:
            # Indices to shuffle training set
            index_shuf = list(range(len(x_train)))
            # Randomly repeat a few training examples each epoch to avoid
            # having a too-small batch
            while len(index_shuf) % batch_size != 0:
                index_shuf.append(rng.randint(len(x_train)))
            nb_batches = len(index_shuf) // batch_size
            rng.shuffle(index_shuf)
            # Shuffling here versus inside the loop doesn't seem to affect
            # timing very much, but shuffling here makes the code slightly
            # easier to read
            x_train_shuffled = x_train[index_shuf]
            x_train_target_shuffled = x_train_target[index_shuf]
            #y_train_shuffled = y_train[index_shuf]

        prev = time.time()
        for batch in range(nb_batches):
            if dataset_train is not None:
                x_train_shuffled, x_train_target_shuffled = sess.run(
                    data_iterator)
                start, end = 0, batch_size
            else:
                # Compute batch start and end indices
                start = batch * batch_size
                end = (batch + 1) * batch_size
                # Perform one training step
                diff = end - start
                assert diff == batch_size

            feed_dict = {epoch_tf: epoch, batch_tf: batch}
            for dev_idx in xrange(num_devices):
                cur_start = start + dev_idx * device_batch_size
                cur_end = start + (dev_idx + 1) * device_batch_size
                feed_dict[xs[dev_idx]] = x_train_shuffled[cur_start:cur_end]
                feed_dict[
                    xs_t[dev_idx]] = x_train_target_shuffled[cur_start:cur_end]
                #feed_dict[ys[dev_idx]] = y_train_shuffled[cur_start:cur_end]
            if cur_end != end and dataset_train is None:
                msg = ("batch_size (%d) must be a multiple of num_devices "
                       "(%d).\nCUDA_VISIBLE_DEVICES: %s"
                       "\ndevices: %s")
                args = (batch_size, num_devices,
                        os.environ['CUDA_VISIBLE_DEVICES'], str(devices))
                raise ValueError(msg % args)
            if feed is not None:
                feed_dict.update(feed)

            _, loss_numpy = sess.run([train_step, loss_value],
                                     feed_dict=feed_dict)

            if np.abs(loss_numpy) > loss_threshold:
                raise ValueError("Extreme loss during training: ", loss_numpy)
            if np.isnan(loss_numpy) or np.isinf(loss_numpy):
                raise ValueError("NaN/Inf loss during training")
        assert (dataset_train is not None
                or end == len(index_shuf))  # Check that all examples were used
        cur = time.time()
        _logger.info("Epoch " + str(epoch) + " took " + str(cur - prev) +
                     " seconds")
        if evaluate is not None:
            if use_ema:
                # Before running evaluation, load the running average
                # parameters into the live slot, so we can see how well
                # the EMA parameters are performing
                sess.run(swap)
            evaluate()
            if use_ema:
                # Swap the parameters back, so that we continue training
                # on the live parameters
                sess.run(swap)
    if use_ema:
        # When training is done, swap the running average parameters into
        # the live slot, so that we use them when we deploy the model
        sess.run(swap)
    end_time = time.time()
    print("Time taken for training: ", end_time - start_time)
    return True
Beispiel #18
0
def model_eval_gan(
    sess,
    images,
    labels,
    predictions=None,
    predictions_rec=None,
    test_images=None,
    test_labels=None,
    feed=None,
    args=None,
    model=None,
    diff_op=None,
):
    """Computes the accuracy of a model on test data as well as the
    reconstruction errors for attack detection.
    
    Args:
        sess: TF session to use when training the graph.
        images: input placeholder.
        labels: output placeholder (for labels).
        predictions: model output predictions.
        predictions_rec: model output prediction for reconstructions.
        test_images: numpy array with training inputs
        test_labels: numpy array with training outputs
        feed: An optional dictionary that is appended to the feeding
             dictionary before the session runs. Can be used to feed
             the learning phase of a Keras model for instance.
        args: dict or argparse `Namespace` object.
                 Should contain `batch_size`
        model: (deprecated) if not None, holds model output predictions.
        diff_op: The operation that calculates the difference between input
            and attack.

    Returns:
        accuracy: The accuracy on the test data.
        accuracy_rec: The accuracy on the reconstructed test data (if
            predictions_rec is provided)
        roc_info: The differences between input and reconstruction for
            attack detection.
    """
    args = _ArgsWrapper(args or {})

    assert args.batch_size, "Batch size was not given in args dict"
    if test_images is None or test_labels is None:
        raise ValueError("X_test argument and Y_test argument "
                         "must be supplied.")
    if model is None and predictions is None:
        raise ValueError("One of model argument "
                         "or predictions argument must be supplied.")
    if model is not None:
        warnings.warn("model argument is deprecated. "
                      "Switch to predictions argument. "
                      "model argument will be removed after 2018-01-05.")
        if predictions is None:
            predictions = model
        else:
            raise ValueError("Exactly one of model argument"
                             " and predictions argument should be specified.")

    # Define accuracy symbolically.
    correct_preds = tf.equal(tf.argmax(labels, axis=-1),
                             tf.argmax(predictions, axis=-1))

    if predictions_rec is not None:
        correct_preds_rec = tf.equal(tf.argmax(labels, axis=-1),
                                     tf.argmax(predictions_rec, axis=-1))
        acc_value_rec = tf.reduce_sum(tf.to_float(correct_preds_rec))

    accuracy_rec = 0.0
    cur_labels = tf.argmax(labels, axis=-1),
    cur_preds = tf.argmax(predictions, axis=-1)

    acc_value = tf.reduce_sum(tf.to_float(correct_preds))

    diffs = []
    all_labels = []
    preds = []

    accuracy = 0.0

    # Compute number of batches.
    nb_batches = int(math.ceil(float(len(test_images)) / args.batch_size))
    assert nb_batches * args.batch_size >= len(test_images)

    for batch in range(nb_batches):
        # To initialize the variables of Defense-GAN at test time.
        sess.run(tf.local_variables_initializer())
        print("[#] Eval batch {}/{}".format(batch, nb_batches))

        # Must not use the `batch_indices` function here, because it
        # repeats some examples.
        # It's acceptable to repeat during training, but not eval.
        start = batch * args.batch_size
        end = min(len(test_images), start + args.batch_size)
        cur_batch_size = end - start

        # The last batch may be smaller than all others, so we need to
        # account for variable batch size here.
        feed_dict = {
            images: test_images[start:end],
            labels: test_labels[start:end]
        }
        if feed is not None:
            feed_dict.update(feed)

        run_list = [acc_value, cur_labels, cur_preds]

        if diff_op is not None:
            run_list += [diff_op]

        if predictions_rec is not None:
            run_list += [acc_value_rec]
            acc_val_ind = len(run_list) - 1

        outs = sess.run(run_list, feed_dict=feed_dict)
        cur_acc = outs[0]

        if diff_op is not None:
            cur_diffs_val = outs[3]
            diffs.append(cur_diffs_val)

        if predictions_rec is not None:
            cur_acc_rec = outs[acc_val_ind]
            accuracy_rec += cur_acc_rec

        cur_labels_val = outs[1][0]
        cur_preds_val = outs[2]
        all_labels.append(cur_labels_val)
        preds.append(cur_preds_val)

        accuracy += cur_acc

    assert end >= len(test_images)

    # Divide by number of examples to get final value.
    accuracy /= len(test_images)
    accuracy_rec /= len(test_images)
    preds = np.concatenate(preds)
    all_labels = np.concatenate(all_labels)

    if diff_op is not None:
        diffs = np.concatenate(diffs)

    roc_info = [all_labels, preds, diffs]
    if predictions_rec is not None:
        return accuracy, accuracy_rec, roc_info
    else:
        return accuracy, roc_info
def untargeted_advx_image_eval(sess, x, y, adversarial_image, logit_adv_x, X_test=None, Y_test=None,
                               feed=None, args=None):
    global _model_eval_cache
    args = _ArgsWrapper(args or {})

    assert args.batch_size, "Batch size was not given in args dict"
    if X_test is None or Y_test is None:
        raise ValueError("X_test argument and Y_test argument "
                         "must be supplied.")

    # Define accuracy symbolically
    key = (y, logit_adv_x)
    pred_adv_x = tf.argmax(logit_adv_x, axis=-1)
    if key in _model_eval_cache:
        pred_not_equal_orig = _model_eval_cache[key]
    else:

        pred_not_equal_orig = tf.math.logical_not(tf.equal(tf.argmax(y, axis=-1),
                                                           pred_adv_x))
        _model_eval_cache[key] = pred_not_equal_orig

    # Init result var
    success_rate = 0.0

    adv_images_total = []
    adv_pred_total = []
    gt_label_total = []

    with sess.as_default():
        # Compute number of batches
        nb_batches = int(math.ceil(float(len(X_test)) / args.batch_size))
        assert nb_batches * args.batch_size >= len(X_test)

        X_cur = np.zeros((args.batch_size,) + X_test.shape[1:],
                         dtype=X_test.dtype)
        Y_cur = np.zeros((args.batch_size,) + Y_test.shape[1:],
                         dtype=Y_test.dtype)

        for batch in range(nb_batches):

            # Must not use the `batch_indices` function here, because it
            # repeats some examples.
            # It's acceptable to repeat during training, but not eval.
            start = batch * args.batch_size
            end = min(len(X_test), start + args.batch_size)

            # The last batch may be smaller than all others. This should not
            # affect the accuarcy disproportionately.
            cur_batch_size = end - start
            X_cur[:cur_batch_size] = X_test[start:end]
            Y_cur[:cur_batch_size] = Y_test[start:end]
            feed_dict = {x: X_cur, y: Y_cur}
            if feed is not None:
                feed_dict.update(feed)
            handle = sess.partial_run_setup([adversarial_image, pred_adv_x, pred_not_equal_orig], feeds=[x, y])
            adv_image_np = sess.partial_run(handle, adversarial_image, feed_dict=feed_dict)
            adv_pred_np = sess.partial_run(handle, pred_adv_x)
            cur_not_equal_preds = sess.partial_run(handle, pred_not_equal_orig)
            # print("attack success rate is {}".format(cur_not_equal_preds.mean()))
            adv_images_total.extend(adv_image_np)
            adv_pred_total.extend(adv_pred_np)
            gt_label_total.extend(np.argmax(Y_cur, axis=-1))

            success_rate += cur_not_equal_preds[:cur_batch_size].sum()

        # Divide by number of examples to get final value
        success_rate /= len(X_test)
        adv_images_total = np.stack(adv_images_total)
        adv_pred_total = np.stack(adv_pred_total)
        gt_label_total = np.stack(gt_label_total)

    return adv_images_total, adv_pred_total, gt_label_total, success_rate
Beispiel #20
0
def model_eval(sess,
               x,
               y,
               predictions=None,
               X_test=None,
               Y_test=None,
               feed=None,
               args=None,
               model=None):
    """
    Compute the accuracy of a TF model on some data
    :param sess: TF session to use when training the graph
    :param x: input placeholder
    :param y: output placeholder (for labels)
    :param predictions: model output predictions
    :param X_test: numpy array with training inputs
    :param Y_test: numpy array with training outputs
    :param feed: An optional dictionary that is appended to the feeding
             dictionary before the session runs. Can be used to feed
             the learning phase of a Keras model for instance.
    :param args: dict or argparse `Namespace` object.
                 Should contain `batch_size`
    :param model: (deprecated) if not None, holds model output predictions
    :return: a float with the accuracy value
    """
    args = _ArgsWrapper(args or {})

    assert args.batch_size, "Batch size was not given in args dict"
    if X_test is None or Y_test is None:
        raise ValueError("X_test argument and Y_test argument "
                         "must be supplied.")
    if model is None and predictions is None:
        raise ValueError("One of model argument "
                         "or predictions argument must be supplied.")
    if model is not None:
        warnings.warn("model argument is deprecated. "
                      "Switch to predictions argument. "
                      "model argument will be removed after 2018-01-05.")
        if predictions is None:
            predictions = model
        else:
            raise ValueError("Exactly one of model argument"
                             " and predictions argument should be specified.")

    # Define accuracy symbolically
    if LooseVersion(tf.__version__) >= LooseVersion('1.0.0'):
        correct_preds = tf.equal(tf.argmax(y, axis=-1),
                                 tf.argmax(predictions, axis=-1))
    else:
        correct_preds = tf.equal(
            tf.argmax(y, axis=tf.rank(y) - 1),
            tf.argmax(predictions, axis=tf.rank(predictions) - 1))

    acc_value = tf.reduce_mean(tf.to_float(correct_preds))

    # Init result var
    accuracy = 0.0

    with sess.as_default():
        # Compute number of batches
        nb_batches = int(math.ceil(float(len(X_test)) / args.batch_size))
        assert nb_batches * args.batch_size >= len(X_test)

        for batch in range(nb_batches):
            if batch % 100 == 0 and batch > 0:
                _logger.debug("Batch " + str(batch))

            # Must not use the `batch_indices` function here, because it
            # repeats some examples.
            # It's acceptable to repeat during training, but not eval.
            start = batch * args.batch_size
            end = min(len(X_test), start + args.batch_size)
            cur_batch_size = end - start

            # The last batch may be smaller than all others, so we need to
            # account for variable batch size here
            feed_dict = {x: X_test[start:end], y: Y_test[start:end]}
            if feed is not None:
                feed_dict.update(feed)
            cur_acc = acc_value.eval(feed_dict=feed_dict)

            accuracy += (cur_batch_size * cur_acc)

        assert end >= len(X_test)

        # Divide by number of examples to get final value
        accuracy /= len(X_test)

    return accuracy
def model_eval(sess,
               x,
               y,
               predictions,
               X_test=None,
               Y_test=None,
               feed=None,
               args=None):
    """
  Compute the accuracy of a TF model on some data
  :param sess: TF session to use
  :param x: input placeholder
  :param y: output placeholder (for labels)
  :param predictions: model output predictions
  :param X_test: numpy array with training inputs
  :param Y_test: numpy array with training outputs
  :param feed: An optional dictionary that is appended to the feeding
           dictionary before the session runs. Can be used to feed
           the learning phase of a Keras model for instance.
  :param args: dict or argparse `Namespace` object.
               Should contain `batch_size`
  :return: a float with the accuracy value
  """
    global _model_eval_cache
    args = _ArgsWrapper(args or {})

    assert args.batch_size, "Batch size was not given in args dict"
    if X_test is None or Y_test is None:
        raise ValueError("X_test argument and Y_test argument "
                         "must be supplied.")

    # Define accuracy symbolically
    key = (y, predictions)
    if key in _model_eval_cache:
        correct_preds = _model_eval_cache[key]
    else:
        correct_preds = tf.equal(tf.argmax(y, axis=-1),
                                 tf.argmax(predictions, axis=-1))
        _model_eval_cache[key] = correct_preds

    # Init result var
    accuracy = 0.0

    with sess.as_default():
        # Compute number of batches
        nb_batches = int(math.ceil(float(len(X_test)) / args.batch_size))
        assert nb_batches * args.batch_size >= len(X_test)

        X_cur = np.zeros((args.batch_size, ) + X_test.shape[1:],
                         dtype=X_test.dtype)
        Y_cur = np.zeros((args.batch_size, ) + Y_test.shape[1:],
                         dtype=Y_test.dtype)
        for batch in range(nb_batches):
            if batch % 100 == 0 and batch > 0:
                _logger.debug("Batch " + str(batch))

            # Must not use the `batch_indices` function here, because it
            # repeats some examples.
            # It's acceptable to repeat during training, but not eval.
            start = batch * args.batch_size
            end = min(len(X_test), start + args.batch_size)

            # The last batch may be smaller than all others. This should not
            # affect the accuarcy disproportionately.
            cur_batch_size = end - start
            X_cur[:cur_batch_size] = X_test[start:end]
            Y_cur[:cur_batch_size] = Y_test[start:end]
            feed_dict = {x: X_cur, y: Y_cur}
            if feed is not None:
                feed_dict.update(feed)
            cur_corr_preds = correct_preds.eval(feed_dict=feed_dict)

            accuracy += cur_corr_preds[:cur_batch_size].sum()

        assert end >= len(X_test)

        # Divide by number of examples to get final value
        accuracy /= len(X_test)

    return accuracy
Beispiel #22
0
def model_eval_ae(sess,
                  x_orig,
                  x_target,
                  recon,
                  X_test=None,
                  X_test_target=None,
                  x_adv=None,
                  adv_recon=None,
                  lat_orig=None,
                  lat_recon=None,
                  feed=None,
                  args=None):
    global _model_eval_cache
    args = _ArgsWrapper(args or {})

    #print("shape of X_test: ", np.shape(X_test))
    #print("shape of X_test_target: ", np.shape(X_test_target))
    #print("shape of x_adv: ", np.shape(x_adv))
    assert args.batch_size, "Batch size was not given in args dict"
    if X_test is None or X_test_target is None:
        raise ValueError("X_test argument and X_test_target argument "
                         "must be supplied.")

    shape = np.shape(x_orig)
    w = shape[1]
    h = shape[2]
    c = shape[3]
    # Define accuracy symbolically
    key = (recon, x_orig, x_target)
    if x_adv is not None and lat_orig is not None and key in _model_eval_cache:
        d1, d2, dist_diff, noise, dist_lat = _model_eval_cache[key]
    else:
        if x_adv is not None and lat_orig is None and key in _model_eval_cache:
            d1, d2, dist_diff, noise = _model_eval_cache[key]
        else:
            if x_adv is None and lat_orig is not None and key in _model_eval_cache:
                d1, d2, dist_diff, dist_lat = _model_eval_cache[key]
            else:
                if key in _model_eval_cache:
                    d1, d2, dist_diff = _model_eval_cache[key]
                else:
                    d1 = tf.reduce_sum(
                        tf.squared_difference(
                            tf.reshape(recon, (tf.shape(recon)[0], w * h * c)),
                            tf.reshape(x_orig,
                                       (tf.shape(x_orig)[0], w * h * c))), 1)
                    d2 = tf.reduce_sum(
                        tf.squared_difference(
                            tf.reshape(recon, (tf.shape(recon)[0], w * h * c)),
                            tf.reshape(x_target,
                                       (tf.shape(x_target)[0], w * h * c))), 1)
                    dist_diff = d1 - d2

                    if (x_adv is not None and lat_orig is not None):
                        #noise = tf.sqrt(tf.reduce_sum(tf.squared_difference(tf.reshape(x_orig,(tf.shape(x_orig)[0],784)), tf.reshape(x_adv, (tf.shape(x_adv)[0],784))),1))
                        noise = reduce_sum(tf.square(x_orig - x_adv),
                                           list(range(1, len(shape))))
                        noise = pow(noise, 0.5)
                        dist_lat = tf.reduce_sum(
                            tf.squared_difference(lat_orig, lat_recon), 1)
                        _model_eval_cache[
                            key] = d1, d2, dist_diff, noise, dist_lat
                    else:
                        if (x_adv is not None and lat_orig is None):
                            #noise = tf.sqrt(tf.reduce_sum(tf.squared_difference(tf.reshape(x_orig,(tf.shape(x_orig)[0],784)), tf.reshape(x_adv, (tf.shape(x_adv)[0],784))),1))
                            noise = reduce_sum(tf.square(x_orig - x_adv),
                                               list(range(1, len(shape))))
                            noise = pow(noise, 0.5)
                            _model_eval_cache[key] = d1, d2, dist_diff, noise
                        else:
                            if (x_adv is None and lat_orig is not None):
                                dist_lat = tf.reduce_sum(
                                    tf.squared_difference(lat_orig, lat_recon),
                                    1)
                                _model_eval_cache[
                                    key] = d1, d2, dist_diff, dist_lat
                            else:
                                _model_eval_cache[key] = d1, d2, dist_diff

    # Init result var
    #accuracy = 0.0

    avg_dist_diff = 0
    avg_dist_orig = 0
    avg_dist_targ = 0
    avg_noise = 0
    avg_dist_lat = 0
    with sess.as_default():
        # Compute number of batches
        nb_batches = int(math.ceil(float(len(X_test)) / args.batch_size))
        assert nb_batches * args.batch_size >= len(X_test)

        l1 = np.shape(X_test)
        l2 = np.shape(X_test_target)
        X_cur = np.zeros((args.batch_size, l1[1], l1[2], l1[3]),
                         dtype='float64')
        X_targ_cur = np.zeros((args.batch_size, l2[1], l2[2], l2[3]),
                              dtype='float64')
        #X_cur = np.zeros((args.batch_size, X_test.shape[1:]),
        #                  dtype=X_test.dtype)
        #X_targ_cur = np.zeros((args.batch_size,X_test_target.shape[1:]),
        #                dtype=X_test_target.dtype)
        start, end = batch_indices(0, len(X_test), args.batch_size)

        #feed_dict_1 = {x_orig: X_test[index_shuf[start:end]],
        #                x_target: X_test_target[index_shuf[start:end]]}
        for batch in range(nb_batches):
            if batch % 100 == 0 and batch > 0:
                _logger.debug("Batch " + str(batch))

            # Must not use the `batch_indices` function here, because it
            # repeats some examples.
            # It's acceptable to repeat during training, but not eval.
            start = batch * args.batch_size
            end = min(len(X_test), start + args.batch_size)

            # The last batch may be smaller than all others. This should not
            # affect the accuarcy disproportionately.
            cur_batch_size = end - start
            #print()
            #print("np.shape(X_test_target[start:end]: ", np.shape(X_test_target[start:end]))
            #print("np.shape(X_targ_cur[:cur_batch_size]: ",np.shape(X_targ_cur[:cur_batch_size]))
            X_cur[:cur_batch_size] = X_test[start:end]
            X_targ_cur[:cur_batch_size] = X_test_target[start:end]
            feed_dict_1 = {x_orig: X_cur, x_target: X_targ_cur}
            if feed is not None:
                feed_dict_1.update(feed)
            cur_avg_dist_diff = dist_diff.eval(feed_dict=feed_dict_1)
            cur_avg_dist_orig = d1.eval(feed_dict=feed_dict_1)
            cur_avg_dist_targ = d2.eval(feed_dict=feed_dict_1)
            if (lat_orig is not None):
                cur_avg_dist_lat = dist_lat.eval(feed_dict=feed_dict_1)
            if x_adv is not None:
                cur_avg_noise = noise.eval(feed_dict=feed_dict_1)

            avg_dist_diff += cur_avg_dist_diff[:cur_batch_size].sum()
            avg_dist_orig += cur_avg_dist_orig[:cur_batch_size].sum()
            avg_dist_targ += cur_avg_dist_targ[:cur_batch_size].sum()
            if lat_orig is not None:
                avg_dist_lat += cur_avg_dist_lat[:cur_batch_size].sum()
            if x_adv is not None:
                avg_noise += cur_avg_noise[:cur_batch_size].sum()
        assert end >= len(X_test)

        # Divide by number of examples to get final value
        avg_dist_diff /= len(X_test)
        avg_dist_orig /= len(X_test)
        avg_dist_targ /= len(X_test)
        avg_noise /= len(X_test)
        avg_dist_lat /= len(X_test)
    return avg_noise, avg_dist_orig, avg_dist_targ, avg_dist_diff, avg_dist_lat
def train(sess,
          loss,
          x,
          y,
          X_train,
          Y_train,
          save=False,
          init_all=False,
          evaluate=None,
          feed=None,
          args=None,
          rng=None,
          var_list=None,
          fprop_args=None,
          optimizer=None):
    """
  Train a TF graph.
  This function is deprecated. Prefer cleverhans.train.train when possible.
  cleverhans.train.train supports multiple GPUs but this function is still
  needed to support legacy models that do not support calling fprop more
  than once.

  :param sess: TF session to use when training the graph
  :param loss: tensor, the model training loss.
  :param x: input placeholder
  :param y: output placeholder (for labels)
  :param X_train: numpy array with training inputs
  :param Y_train: numpy array with training outputs
  :param save: boolean controlling the save operation
  :param init_all: (boolean) If set to true, all TF variables in the session
                   are (re)initialized, otherwise only previously
                   uninitialized variables are initialized before training.
  :param evaluate: function that is run after each training iteration
                   (typically to display the test/validation accuracy).
  :param feed: An optional dictionary that is appended to the feeding
               dictionary before the session runs. Can be used to feed
               the learning phase of a Keras model for instance.
  :param args: dict or argparse `Namespace` object.
               Should contain `nb_epochs`, `learning_rate`,
               `batch_size`
               If save is True, should also contain 'train_dir'
               and 'filename'
  :param rng: Instance of numpy.random.RandomState
  :param var_list: Optional list of parameters to train.
  :param fprop_args: dict, extra arguments to pass to fprop (loss and model).
  :param optimizer: Optimizer to be used for training
  :return: True if model trained
  """
    warnings.warn("This function is deprecated and will be removed on or after"
                  " 2019-04-05. Switch to cleverhans.train.train.")

    args = _ArgsWrapper(args or {})
    fprop_args = fprop_args or {}

    # Check that necessary arguments were given (see doc above)
    assert args.nb_epochs, "Number of epochs was not given in args dict"
    if optimizer is None:
        assert args.learning_rate is not None, ("Learning rate was not given "
                                                "in args dict")
    assert args.batch_size, "Batch size was not given in args dict"

    if save:
        assert args.train_dir, "Directory for save was not given in args dict"
        assert args.filename, "Filename for save was not given in args dict"

    if rng is None:
        rng = np.random.RandomState()

    # Define optimizer
    loss_value = loss.fprop(x, y, **fprop_args)
    if optimizer is None:
        optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate)
    else:
        if not isinstance(optimizer, tf.train.Optimizer):
            raise ValueError("optimizer object must be from a child class of "
                             "tf.train.Optimizer")
    # Trigger update operations within the default graph (such as batch_norm).
    with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
        train_step = optimizer.minimize(loss_value, var_list=var_list)

    with sess.as_default():
        if hasattr(tf, "global_variables_initializer"):
            if init_all:
                tf.global_variables_initializer().run()
            else:
                initialize_uninitialized_global_variables(sess)
        else:
            warnings.warn("Update your copy of tensorflow; future versions of "
                          "CleverHans may drop support for this version.")
            sess.run(tf.initialize_all_variables())

        for epoch in xrange(args.nb_epochs):
            # Compute number of batches
            nb_batches = int(math.ceil(float(len(X_train)) / args.batch_size))
            assert nb_batches * args.batch_size >= len(X_train)

            # Indices to shuffle training set
            index_shuf = list(range(len(X_train)))
            rng.shuffle(index_shuf)

            prev = time.time()
            for batch in range(nb_batches):

                # Compute batch start and end indices
                start, end = batch_indices(batch, len(X_train),
                                           args.batch_size)

                # Perform one training step
                feed_dict = {
                    x: X_train[index_shuf[start:end]],
                    y: Y_train[index_shuf[start:end]]
                }
                if feed is not None:
                    feed_dict.update(feed)
                train_step.run(feed_dict=feed_dict)
            assert end >= len(X_train)  # Check that all examples were used
            cur = time.time()
            _logger.info("Epoch " + str(epoch) + " took " + str(cur - prev) +
                         " seconds")
            if evaluate is not None:
                evaluate()

        if save:
            save_path = os.path.join(args.train_dir, args.filename)
            saver = tf.train.Saver()
            saver.save(sess, save_path)
            _logger.info("Completed model training and saved at: " +
                         str(save_path))
        else:
            _logger.info("Completed model training.")

    return True
Beispiel #24
0
def train_with_noise(sess, loss, x_train, y_train,
          init_all=False, evaluate=None, feed=None, args=None,
          rng=None, var_list=None, fprop_args=None, optimizer=None,
          devices=None, x_batch_preprocessor=None, use_ema=False,
          ema_decay=.998, run_canary=None,
          loss_threshold=1e5, dataset_train=None, dataset_size=None,
          save=False, type="normal", datasetName="MNIST", retrain=False, discretizeColor=1):
  """
  Run (optionally multi-replica, synchronous) training to minimize `loss`
  :param sess: TF session to use when training the graph
  :param loss: tensor, the loss to minimize
  :param x_train: numpy array with training inputs or tf Dataset
  :param y_train: numpy array with training outputs or tf Dataset
  :param init_all: (boolean) If set to true, all TF variables in the session
                   are (re)initialized, otherwise only previously
                   uninitialized variables are initialized before training.
  :param evaluate: function that is run after each training iteration
                   (typically to display the test/validation accuracy).
  :param feed: An optional dictionary that is appended to the feeding
               dictionary before the session runs. Can be used to feed
               the learning phase of a Keras model for instance.
  :param args: dict or argparse `Namespace` object.
               Should contain `nb_epochs`, `learning_rate`,
               `batch_size`
  :param rng: Instance of numpy.random.RandomState
  :param var_list: Optional list of parameters to train.
  :param fprop_args: dict, extra arguments to pass to fprop (loss and model).
  :param optimizer: Optimizer to be used for training
  :param devices: list of device names to use for training
      If None, defaults to: all GPUs, if GPUs are available
                            all devices, if no GPUs are available
  :param x_batch_preprocessor: callable
      Takes a single tensor containing an x_train batch as input
      Returns a single tensor containing an x_train batch as output
      Called to preprocess the data before passing the data to the Loss
  :param use_ema: bool
      If true, uses an exponential moving average of the model parameters
  :param ema_decay: float or callable
      The decay parameter for EMA, if EMA is used
      If a callable rather than a float, this is a callable that takes
      the epoch and batch as arguments and returns the ema_decay for
      the current batch.
  :param loss_threshold: float
      Raise an exception if the loss exceeds this value.
      This is intended to rapidly detect numerical problems.
      Sometimes the loss may legitimately be higher than this value. In
      such cases, raise the value. If needed it can be np.inf.
  :param dataset_train: tf Dataset instance.
      Used as a replacement for x_train, y_train for faster performance.
    :param dataset_size: integer, the size of the dataset_train.
  :return: True if model trained
  """

  _, width, height, channel = list(np.shape(x_train))

  # Check whether the hardware is working correctly
  canary.run_canary()
  if run_canary is not None:
    warnings.warn("The `run_canary` argument is deprecated. The canary "
                  "is now much cheaper and thus runs all the time. The "
                  "canary now uses its own loss function so it is not "
                  "necessary to turn off the canary when training with "
                  " a stochastic loss. Simply quit passing `run_canary`."
                  "Passing `run_canary` may become an error on or after "
                  "2019-10-16.")

  args = _ArgsWrapper(args or {})
  fprop_args = fprop_args or {}

  # Check that necessary arguments were given (see doc above)
  # Be sure to support 0 epochs for debugging purposes
  if args.nb_epochs is None:
    raise ValueError("`args` must specify number of epochs")
  if optimizer is None:
    if args.learning_rate is None:
      raise ValueError("Learning rate was not given in args dict")
  assert args.batch_size, "Batch size was not given in args dict"

  if rng is None:
    rng = np.random.RandomState()

  if optimizer is None:
    optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate)
  else:
    if not isinstance(optimizer, tf.train.Optimizer):
      raise ValueError("optimizer object must be from a child class of "
                       "tf.train.Optimizer")

  grads = []
  xs = []
  preprocessed_xs = []
  ys = []
  if dataset_train is not None:
    assert x_train is None and y_train is None and x_batch_preprocessor is None
    if dataset_size is None:
      raise ValueError("You must provide a dataset size")
    data_iterator = dataset_train.make_one_shot_iterator().get_next()
    x_train, y_train = sess.run(data_iterator)

  devices = infer_devices(devices)
  for device in devices:
    with tf.device(device):
      x = tf.placeholder(x_train.dtype, (None,) + x_train.shape[1:])
      y = tf.placeholder(y_train.dtype, (None,) + y_train.shape[1:])
      xs.append(x)
      ys.append(y)

      if x_batch_preprocessor is not None:
        x = x_batch_preprocessor(x)

      # We need to keep track of these so that the canary can feed
      # preprocessed values. If the canary had to feed raw values,
      # stochastic preprocessing could make the canary fail.
      preprocessed_xs.append(x)

      loss_value = loss.fprop(x, y, **fprop_args)

      grads.append(optimizer.compute_gradients(
          loss_value, var_list=var_list))
  num_devices = len(devices)
  print("num_devices: ", num_devices)

  grad = avg_grads(grads)
  # Trigger update operations within the default graph (such as batch_norm).
  with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
    train_step = optimizer.apply_gradients(grad)

  epoch_tf = tf.placeholder(tf.int32, [])
  batch_tf = tf.placeholder(tf.int32, [])

  if use_ema:
    if callable(ema_decay):
      ema_decay = ema_decay(epoch_tf, batch_tf)
    ema = tf.train.ExponentialMovingAverage(decay=ema_decay)
    with tf.control_dependencies([train_step]):
      train_step = ema.apply(var_list)
    # Get pointers to the EMA's running average variables
    avg_params = [ema.average(param) for param in var_list]
    # Make temporary buffers used for swapping the live and running average
    # parameters
    tmp_params = [tf.Variable(param, trainable=False)
                  for param in var_list]
    # Define the swapping operation
    param_to_tmp = [tf.assign(tmp, param)
                    for tmp, param in safe_zip(tmp_params, var_list)]
    with tf.control_dependencies(param_to_tmp):
      avg_to_param = [tf.assign(param, avg)
                      for param, avg in safe_zip(var_list, avg_params)]
    with tf.control_dependencies(avg_to_param):
      tmp_to_avg = [tf.assign(avg, tmp)
                    for avg, tmp in safe_zip(avg_params, tmp_params)]
    swap = tmp_to_avg

  batch_size = args.batch_size

  assert batch_size % num_devices == 0
  device_batch_size = batch_size // num_devices

  saver = tf.train.Saver(max_to_keep=100)
  startingEpoch = 0
  # if retrainEpoch is not None:
  #   startingEpoch = retrainEpoch
  if init_all:
    sess.run(tf.global_variables_initializer())
  else:
    initialize_uninitialized_global_variables(sess)

  # Used for retraining the model
  if retrain == True:
    print("Retrain is in progress...")
    retrain = False # Set to false once it has retrained just in case we have run this script for multiple loops
    latestFileName = tf.train.latest_checkpoint(args.train_dir, latest_filename=None)
    splitFileName = latestFileName.split("-")

    startingEpoch = int(splitFileName[len(splitFileName)-1])
    model_path = os.path.join(args.train_dir, args.filename+"-"+str(startingEpoch))
    print("Trying to load trained model from: "+model_path)
    if os.path.exists(model_path + ".meta"):
      tf_model_load(sess, model_path)
      print("Load trained model")

  # x_train = x_train[0:10]
  feed_x_train = x_train
  if type == "normal":
    feed_x_train = convert_uniimage(x_train, discretizeColor)

  for epoch in xrange(startingEpoch, args.nb_epochs):
    tmpX = np.copy(x_train)
    if type == "noise":
      # Use it for MNIST and Fashion MNIST
      if datasetName == "MNIST":
        tmpX = np.clip(x_train+(np.random.uniform(0, 0.8, (len(x_train), width, height, channel)) - 0.4), 0, 1)

      # Use it for MNIST and Fashion CIFAR10
      if datasetName == "CIFAR10":
        tmpX = np.clip(x_train+(np.random.uniform(0, 0.3, (len(x_train), width, height, channel)) - 0.15), 0, 1)
      feed_x_train = convert_uniimage(tmpX, discretizeColor)
    ##################
    # Showing images #
    ##################
    showImg = True
    showImg = False
    if showImg:
      shapeImg = (width, height, channel)
      if channel == 1:
        shapeImg = (width, height)
      for iii in range(len(feed_x_train)):
        fig = plt.figure()
        pixels = x_train[iii].reshape(shapeImg)
        sub = fig.add_subplot(1, 4, 1)
        plt.imshow(pixels, cmap='gray')
        pixels = tmpX[iii].reshape(shapeImg)
        sub = fig.add_subplot(1, 4, 2)
        plt.imshow(pixels, cmap='gray')
        pixels = feed_x_train[iii].reshape(shapeImg)
        sub = fig.add_subplot(1, 4, 3)
        plt.imshow(pixels, cmap='gray')
        # pixels = X_cur[iii].reshape((width, height, channel))
        # sub = fig.add_subplot(1, 4, 4)
        # plt.imshow(pixels, cmap='gray')
        # pixels = adv_x[iii].reshape((28, 28)) - xtrain[iii].reshape((28, 28))
        # print(np.mean(np.sum((adv_x[iii:iii+1] - xtrain[iii:iii+1]) ** 2,
        #        axis=(1, 2, 3)) ** .5))
        # sub = fig.add_subplot(1, 3, iii+3)
        # plt.imshow(pixels / abs(pixels).max() * 0.2 + 0.5, cmap='gray')

        plt.show()

    if dataset_train is not None:
      nb_batches = int(math.ceil(float(dataset_size) / batch_size))
    else:
      # Indices to shuffle training set
      index_shuf = list(range(len(x_train)))
      # Randomly repeat a few training examples each epoch to avoid
      # having a too-small batch
      while len(index_shuf) % batch_size != 0:
        index_shuf.append(rng.randint(len(x_train)))
      nb_batches = len(index_shuf) // batch_size
      rng.shuffle(index_shuf)
      # Shuffling here versus inside the loop doesn't seem to affect
      # timing very much, but shuffling here makes the code slightly
      # easier to read
      x_train_shuffled = feed_x_train[index_shuf]
      y_train_shuffled = y_train[index_shuf]

    prev = time.time()
    for batch in range(nb_batches):
      if dataset_train is not None:
        x_train_shuffled, y_train_shuffled = sess.run(data_iterator)
        start, end = 0, batch_size
      else:
        # Compute batch start and end indices
        start = batch * batch_size
        end = (batch + 1) * batch_size
        # Perform one training step
        diff = end - start
        assert diff == batch_size

      feed_dict = {epoch_tf: epoch, batch_tf: batch}
      for dev_idx in xrange(num_devices):
        cur_start = start + dev_idx * device_batch_size
        cur_end = start + (dev_idx + 1) * device_batch_size
        feed_dict[xs[dev_idx]] = x_train_shuffled[cur_start:cur_end]
        feed_dict[ys[dev_idx]] = y_train_shuffled[cur_start:cur_end]
      if cur_end != end and dataset_train is None:
        msg = ("batch_size (%d) must be a multiple of num_devices "
               "(%d).\nCUDA_VISIBLE_DEVICES: %s"
               "\ndevices: %s")
        args = (batch_size, num_devices,
                os.environ['CUDA_VISIBLE_DEVICES'],
                str(devices))
        raise ValueError(msg % args)
      if feed is not None:
        feed_dict.update(feed)

      _, loss_numpy = sess.run(
          [train_step, loss_value], feed_dict=feed_dict)

      if np.abs(loss_numpy) > loss_threshold:
        raise ValueError("Extreme loss during training: ", loss_numpy)
      if np.isnan(loss_numpy) or np.isinf(loss_numpy):
        raise ValueError("NaN/Inf loss during training")
    assert (dataset_train is not None or
            end == len(index_shuf))  # Check that all examples were used
    cur = time.time()
    _logger.info("Epoch " + str(epoch) + " took " +
                 str(cur - prev) + " seconds")
    if evaluate is not None:
      if use_ema:
        # Before running evaluation, load the running average
        # parameters into the live slot, so we can see how well
        # the EMA parameters are performing
        sess.run(swap)
      if (epoch + 1) % 10 == 0 or (epoch + 1) == args.nb_epochs:
        evaluate()
      if use_ema:
        # Swap the parameters back, so that we continue training
        # on the live parameters
        sess.run(swap)

    if save and ((epoch + 1) % 50 == 0 or (epoch + 1) == args.nb_epochs):
      with tf.device('/CPU:0'):
        save_path = os.path.join(args.train_dir, args.filename)
        if tf.gfile.Exists(args.train_dir) == False:
          tf.gfile.MakeDirs(args.train_dir)
        saver.save(sess, save_path, global_step=(epoch + 1))
      _logger.info("Reaching save point at " + str(epoch + 1) + ": " +
                   str(save_path))

  if use_ema:
    # When training is done, swap the running average parameters into
    # the live slot, so that we use them when we deploy the model
    sess.run(swap)



  return True