Example #1
0
 def testMovingAverageVariables(self):
   height, width = 3, 3
   with self.test_session():
     images = tf.random_uniform((5, height, width, 3), seed=1)
     ops.batch_norm(images, scale=True)
     moving_mean = tf.moving_average_variables()[0]
     moving_variance = tf.moving_average_variables()[1]
     self.assertEquals(moving_mean.op.name, 'BatchNorm/moving_mean')
     self.assertEquals(moving_variance.op.name, 'BatchNorm/moving_variance')
Example #2
0
 def testCreateVariablesWithoutCenterWithoutScale(self):
   height, width = 3, 3
   with self.test_session():
     images = tf.random_uniform((5, height, width, 3), seed=1)
     ops.batch_norm(images, center=False, scale=False)
     beta = variables.get_variables_by_name('beta')
     self.assertEquals(beta, [])
     gamma = variables.get_variables_by_name('gamma')
     self.assertEquals(gamma, [])
     moving_mean = tf.moving_average_variables()[0]
     moving_variance = tf.moving_average_variables()[1]
     self.assertEquals(moving_mean.op.name, 'BatchNorm/moving_mean')
     self.assertEquals(moving_variance.op.name, 'BatchNorm/moving_variance')
Example #3
0
    def variables_to_restore(self, moving_avg_variables=None):
        """ """

        name_map = {}
        if moving_avg_variables is None:
            moving_avg_variables = tf.trainable_variables()
            moving_avg_variables += tf.moving_average_variables()
        # Remove duplicates
        moving_avg_variables = set(moving_avg_variables)
        # Collect all the variables with moving average,
        for v in moving_avg_variables:
            name_map[self.average_name(v)] = v
        # Make sure we restore variables without moving average as well.
        for v in list(set(tf.all_variables()) - moving_avg_variables):
            if v.op.name not in name_map:
                name_map[v.op.name] = v
        return name_map
def inception_model_fn(features, labels, mode, params):
    """Inception v4 model using Estimator API."""
    num_classes = FLAGS.num_classes
    is_training = (mode == tf.estimator.ModeKeys.TRAIN)
    is_eval = (mode == tf.estimator.ModeKeys.EVAL)

    if isinstance(features, dict):
        features = features['feature']

    features = tensor_transform_fn(features, params['model_transpose_dims'])

    # This nested function allows us to avoid duplicating the logic which
    # builds the network, for different values of --precision.
    def build_network():
        if FLAGS.precision == 'bfloat16':
            with contrib_tpu.bfloat16_scope():
                logits, end_points = inception.inception_v4(
                    features, num_classes, is_training=is_training)
            logits = tf.cast(logits, tf.float32)
        elif FLAGS.precision == 'float32':
            logits, end_points = inception.inception_v4(
                features, num_classes, is_training=is_training)
        return logits, end_points

    if FLAGS.clear_update_collections:
        with arg_scope(
                inception.inception_v4_arg_scope(
                    weight_decay=0.0,
                    batch_norm_decay=BATCH_NORM_DECAY,
                    batch_norm_epsilon=BATCH_NORM_EPSILON,
                    updates_collections=None)):
            logits, end_points = build_network()
    else:
        with arg_scope(
                inception.inception_v4_arg_scope(
                    batch_norm_decay=BATCH_NORM_DECAY,
                    batch_norm_epsilon=BATCH_NORM_EPSILON)):
            logits, end_points = build_network()

    predictions = {
        'classes': tf.argmax(input=logits, axis=1),
        'probabilities': tf.nn.softmax(logits, name='softmax_tensor')
    }

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(
            mode=mode,
            predictions=predictions,
            export_outputs={
                'classify': tf.estimator.export.PredictOutput(predictions)
            })

    if mode == tf.estimator.ModeKeys.EVAL and FLAGS.display_tensors and (
            not FLAGS.use_tpu):
        with tf.control_dependencies([
                tf.Print(predictions['classes'], [predictions['classes']],
                         summarize=FLAGS.eval_batch_size,
                         message='prediction: ')
        ]):
            labels = tf.Print(labels, [labels],
                              summarize=FLAGS.eval_batch_size,
                              message='label: ')

    one_hot_labels = tf.one_hot(labels, FLAGS.num_classes, dtype=tf.int32)

    if 'AuxLogits' in end_points:
        tf.losses.softmax_cross_entropy(onehot_labels=one_hot_labels,
                                        logits=tf.cast(end_points['AuxLogits'],
                                                       tf.float32),
                                        weights=0.4,
                                        label_smoothing=0.1,
                                        scope='aux_loss')

    tf.losses.softmax_cross_entropy(onehot_labels=one_hot_labels,
                                    logits=logits,
                                    weights=1.0,
                                    label_smoothing=0.1)

    losses = tf.add_n(tf.losses.get_losses())
    l2_loss = []
    for v in tf.trainable_variables():
        tf.logging.info(v.name)
        if 'BatchNorm' not in v.name and 'weights' in v.name:
            l2_loss.append(tf.nn.l2_loss(v))
        tf.logging.info(len(l2_loss))
    loss = losses + WEIGHT_DECAY * tf.add_n(l2_loss)

    initial_learning_rate = FLAGS.learning_rate * FLAGS.train_batch_size / 256
    # Adjust the initial learning rate for warmup
    initial_learning_rate /= (
        FLAGS.learning_rate_decay**((FLAGS.warmup_epochs + FLAGS.cold_epochs) /
                                    FLAGS.learning_rate_decay_epochs))
    final_learning_rate = 0.0001 * initial_learning_rate

    host_call = None
    train_op = None
    if is_training:
        batches_per_epoch = _NUM_TRAIN_IMAGES / FLAGS.train_batch_size
        global_step = tf.train.get_or_create_global_step()
        current_epoch = tf.cast(
            (tf.cast(global_step, tf.float32) / batches_per_epoch), tf.int32)

        clr = FLAGS.cold_learning_rate
        wlr = initial_learning_rate / (FLAGS.warmup_epochs + FLAGS.cold_epochs)
        learning_rate = tf.where(
            tf.greater_equal(current_epoch, FLAGS.cold_epochs), (tf.where(
                tf.greater_equal(current_epoch,
                                 FLAGS.warmup_epochs + FLAGS.cold_epochs),
                tf.train.exponential_decay(
                    learning_rate=initial_learning_rate,
                    global_step=global_step,
                    decay_steps=int(
                        FLAGS.learning_rate_decay_epochs * batches_per_epoch),
                    decay_rate=FLAGS.learning_rate_decay,
                    staircase=True),
                tf.multiply(tf.cast(current_epoch, tf.float32), wlr))), clr)

        # Set a minimum boundary for the learning rate.
        learning_rate = tf.maximum(learning_rate,
                                   final_learning_rate,
                                   name='learning_rate')

        if FLAGS.optimizer == 'sgd':
            tf.logging.info('Using SGD optimizer')
            optimizer = tf.train.GradientDescentOptimizer(
                learning_rate=learning_rate)
        elif FLAGS.optimizer == 'momentum':
            tf.logging.info('Using Momentum optimizer')
            optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                                   momentum=0.9)
        elif FLAGS.optimizer == 'RMS':
            tf.logging.info('Using RMS optimizer')
            optimizer = tf.train.RMSPropOptimizer(learning_rate,
                                                  RMSPROP_DECAY,
                                                  momentum=RMSPROP_MOMENTUM,
                                                  epsilon=RMSPROP_EPSILON)
        else:
            tf.logging.fatal('Unknown optimizer:', FLAGS.optimizer)

        if FLAGS.use_tpu:
            optimizer = contrib_tpu.CrossShardOptimizer(optimizer)

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = optimizer.minimize(loss, global_step=global_step)
        if FLAGS.moving_average:
            ema = tf.train.ExponentialMovingAverage(decay=MOVING_AVERAGE_DECAY,
                                                    num_updates=global_step)
            variables_to_average = (tf.trainable_variables() +
                                    tf.moving_average_variables())
            with tf.control_dependencies([train_op
                                          ]), tf.name_scope('moving_average'):
                train_op = ema.apply(variables_to_average)

        # To log the loss, current learning rate, and epoch for Tensorboard, the
        # summary op needs to be run on the host CPU via host_call. host_call
        # expects [batch_size, ...] Tensors, thus reshape to introduce a batch
        # dimension. These Tensors are implicitly concatenated to
        # [params['batch_size']].
        gs_t = tf.reshape(global_step, [1])
        loss_t = tf.reshape(loss, [1])
        lr_t = tf.reshape(learning_rate, [1])
        ce_t = tf.reshape(current_epoch, [1])

        if not FLAGS.skip_host_call:

            def host_call_fn(gs, loss, lr, ce):
                """Training host call. Creates scalar summaries for training metrics.

        This function is executed on the CPU and should not directly reference
        any Tensors in the rest of the `model_fn`. To pass Tensors from the
        model to the `metric_fn`, provide as part of the `host_call`. See
        https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
        for more information.

        Arguments should match the list of `Tensor` objects passed as the second
        element in the tuple passed to `host_call`.

        Args:
          gs: `Tensor with shape `[batch]` for the global_step
          loss: `Tensor` with shape `[batch]` for the training loss.
          lr: `Tensor` with shape `[batch]` for the learning_rate.
          ce: `Tensor` with shape `[batch]` for the current_epoch.

        Returns:
          List of summary ops to run on the CPU host.
        """
                gs = gs[0]
                with summary.create_file_writer(FLAGS.model_dir).as_default():
                    with summary.always_record_summaries():
                        summary.scalar('loss', tf.reduce_mean(loss), step=gs)
                        summary.scalar('learning_rate',
                                       tf.reduce_mean(lr),
                                       step=gs)
                        summary.scalar('current_epoch',
                                       tf.reduce_mean(ce),
                                       step=gs)

                        return summary.all_summary_ops()

            host_call = (host_call_fn, [gs_t, loss_t, lr_t, ce_t])

    eval_metrics = None
    if is_eval:

        def metric_fn(labels, logits):
            """Evaluation metric function. Evaluates accuracy.

      This function is executed on the CPU and should not directly reference
      any Tensors in the rest of the `model_fn`. To pass Tensors from the model
      to the `metric_fn`, provide as part of the `eval_metrics`. See
      https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
      for more information.

      Arguments should match the list of `Tensor` objects passed as the second
      element in the tuple passed to `eval_metrics`.

      Args:
        labels: `Tensor` with shape `[batch, ]`.
        logits: `Tensor` with shape `[batch, num_classes]`.

      Returns:
        A dict of the metrics to return from evaluation.
      """
            predictions = tf.argmax(logits, axis=1)
            top_1_accuracy = tf.metrics.accuracy(labels, predictions)
            in_top_5 = tf.cast(tf.nn.in_top_k(logits, labels, 5), tf.float32)
            top_5_accuracy = tf.metrics.mean(in_top_5)

            return {
                'accuracy': top_1_accuracy,
                'accuracy@5': top_5_accuracy,
            }

        eval_metrics = (metric_fn, [labels, logits])

    return contrib_tpu.TPUEstimatorSpec(mode=mode,
                                        loss=loss,
                                        train_op=train_op,
                                        host_call=host_call,
                                        eval_metrics=eval_metrics)
Example #5
0
    def test_restore_ema(self):

        # Create 100 phony x, y data points in NumPy, y = x * 0.1 + 0.3
        x_data = np.random.rand(100).astype(np.float32)
        y_data = x_data * 0.1 + 0.3

        # Try to find values for W and b that compute y_data = W * x_data + b
        # (We know that W should be 0.1 and b 0.3, but TensorFlow will
        # figure that out for us.)
        W = tf.Variable(tf.random_uniform([1], -1.0, 1.0), name='W')
        b = tf.Variable(tf.zeros([1]), name='b')
        y = W * x_data + b

        # Minimize the mean squared errors.
        loss = tf.reduce_mean(tf.square(y - y_data))
        optimizer = tf.train.GradientDescentOptimizer(0.5)
        opt_op = optimizer.minimize(loss)

        # Track the moving averages of all trainable variables.
        ema = tf.train.ExponentialMovingAverage(decay=0.9999)
        averages_op = ema.apply(tf.trainable_variables())
        with tf.control_dependencies([opt_op]):
            train_op = tf.group(averages_op)

        # Before starting, initialize the variables.  We will 'run' this first.
        init = tf.global_variables_initializer()

        saver = tf.train.Saver(tf.trainable_variables())

        # Launch the graph.
        sess = tf.Session()
        sess.run(init)

        # Fit the line.
        for _ in range(201):
            sess.run(train_op)

        w_reference = sess.run('W/ExponentialMovingAverage:0')
        b_reference = sess.run('b/ExponentialMovingAverage:0')

        saver.save(sess, os.path.join(self.tmp_dir, "model_ex1"))

        tf.reset_default_graph()

        tf.train.import_meta_graph(os.path.join(self.tmp_dir,
                                                "model_ex1.meta"))
        sess = tf.Session()

        print('------------------------------------------------------')
        for var in tf.global_variables():
            print('all variables: ' + var.op.name)
        for var in tf.trainable_variables():
            print('normal variable: ' + var.op.name)
        for var in tf.moving_average_variables():
            print('ema variable: ' + var.op.name)
        print('------------------------------------------------------')

        mode = 1
        restore_vars = {}
        if mode == 0:
            ema = tf.train.ExponentialMovingAverage(1.0)
            for var in tf.trainable_variables():
                print('%s: %s' % (ema.average_name(var), var.op.name))
                restore_vars[ema.average_name(var)] = var
        elif mode == 1:
            for var in tf.trainable_variables():
                ema_name = var.op.name + '/ExponentialMovingAverage'
                print('%s: %s' % (ema_name, var.op.name))
                restore_vars[ema_name] = var

        saver = tf.train.Saver(restore_vars, name='ema_restore')

        saver.restore(sess, os.path.join(self.tmp_dir, "model_ex1"))

        w_restored = sess.run('W:0')
        b_restored = sess.run('b:0')

        self.assertAlmostEqual(
            w_reference, w_restored,
            'Restored model modes not use the EMA filtered weight')
        self.assertAlmostEqual(
            b_reference, b_restored,
            'Restored model modes not use the EMA filtered bias')
Example #6
0
def model_fn(features, labels, mode, params):
  """Mobilenet v1 model using Estimator API."""
  num_classes = params['num_classes']
  training_active = (mode == tf.estimator.ModeKeys.TRAIN)
  eval_active = (mode == tf.estimator.ModeKeys.EVAL)

  if isinstance(features, dict):
    features = features['feature']

  features = supervised_images.tensor_transform_fn(
      features, params['input_perm'])

  model = tf.keras.applications.MobileNet(
      input_tensor=features,
      include_top=True,
      weights=None,
      classes=num_classes)

  logits = model(features, training=training_active)

  predictions = {
      'classes': tf.argmax(input=logits, axis=1),
      'probabilities': tf.nn.softmax(logits, name='softmax_tensor')
  }

  if mode == tf.estimator.ModeKeys.PREDICT:
    return tf.estimator.EstimatorSpec(
        mode=mode,
        predictions=predictions,
        export_outputs={
            'classify': tf.estimator.export.PredictOutput(predictions)
        })

  if mode == tf.estimator.ModeKeys.EVAL and FLAGS.display_tensors and (
      not params['use_tpu']):
    with tf.control_dependencies([
        tf.Print(
            predictions['classes'], [predictions['classes']],
            summarize=params['eval_batch_size'],
            message='prediction: ')
    ]):
      labels = tf.Print(
          labels, [labels],
          summarize=params['eval_batch_size'], message='label: ')

  one_hot_labels = tf.one_hot(labels, params['num_classes'], dtype=tf.int32)

  tf.losses.softmax_cross_entropy(
      onehot_labels=one_hot_labels,
      logits=logits,
      weights=1.0,
      label_smoothing=0.1)
  loss = tf.losses.get_total_loss(add_regularization_losses=True)

  initial_learning_rate = params['learning_rate'] * params['train_batch_size'] / 256   # pylint: disable=line-too-long
  final_learning_rate = 0.0001 * initial_learning_rate

  train_op = None
  if training_active:
    batches_per_epoch = params['num_train_images'] // params['train_batch_size']
    global_step = tf.train.get_or_create_global_step()

    learning_rate = tf.train.exponential_decay(
        learning_rate=initial_learning_rate,
        global_step=global_step,
        decay_steps=params['learning_rate_decay_epochs'] * batches_per_epoch,
        decay_rate=params['learning_rate_decay'],
        staircase=True)

    # Set a minimum boundary for the learning rate.
    learning_rate = tf.maximum(
        learning_rate, final_learning_rate, name='learning_rate')

    if params['optimizer'] == 'sgd':
      absl.logging.info('Using SGD optimizer')
      optimizer = tf.train.GradientDescentOptimizer(
          learning_rate=learning_rate)
    elif params['optimizer'] == 'momentum':
      absl.logging.info('Using Momentum optimizer')
      optimizer = tf.train.MomentumOptimizer(
          learning_rate=learning_rate, momentum=0.9)
    elif params['optimizer'] == 'RMS':
      absl.logging.info('Using RMS optimizer')
      optimizer = tf.train.RMSPropOptimizer(
          learning_rate,
          RMSPROP_DECAY,
          momentum=RMSPROP_MOMENTUM,
          epsilon=RMSPROP_EPSILON)
    else:
      absl.logging.fatal('Unknown optimizer:', params['optimizer'])

    if params['use_tpu']:
      optimizer = tf.tpu.CrossShardOptimizer(optimizer)

    update_ops = model.updates
    with tf.control_dependencies(update_ops):
      train_op = optimizer.minimize(loss, global_step=global_step)
    if params['moving_average']:
      ema = tf.train.ExponentialMovingAverage(
          decay=MOVING_AVERAGE_DECAY, num_updates=global_step)
      variables_to_average = (tf.trainable_variables() +
                              tf.moving_average_variables())
      with tf.control_dependencies([train_op]), tf.name_scope('moving_average'):
        train_op = ema.apply(variables_to_average)

  eval_metrics = None
  if eval_active:
    def metric_fn(labels, predictions):
      accuracy = tf.metrics.accuracy(labels, tf.argmax(
          input=predictions, axis=1))
      return {'accuracy': accuracy}

    if params['use_logits']:
      eval_predictions = logits

    eval_metrics = (metric_fn, [labels, eval_predictions])

  return tf.estimator.tpu.TPUEstimatorSpec(
      mode=mode, loss=loss, train_op=train_op, eval_metrics=eval_metrics)
    def model_fn(self, features, labels, mode, params):
        """Build the model based on features, labels, and mode.

    Args:
      features: The features dictionary containing the data Tensor
        and the number of examples.
      labels: The labels Tensor resulting from calling the model.
      mode: A string indicating the training mode.
      params: A dictionary of hyperparameters.

    Returns:
      A tf.estimator.EstimatorSpec.
    """
        del params
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        eval_active = (mode == tf.estimator.ModeKeys.EVAL)
        is_predict = (mode == tf.estimator.ModeKeys.PREDICT)
        if is_training:
            features = tf.transpose(features, [3, 0, 1, 2])  # HWCN to NHWC
        loss, logits = self._build_network(features, labels, mode)

        if is_predict:
            predictions = {'logits': logits}
            if self.hparams.use_tpu:
                return contrib_tpu.TPUEstimatorSpec(mode=mode,
                                                    predictions=predictions)
            else:
                return tf.estimator.EstimatorSpec(mode=mode,
                                                  predictions=predictions)
        host_call = None
        train_op = None

        if is_training:
            global_step = tf.train.get_or_create_global_step()
            gs_t = tf.reshape(tf.cast(global_step, tf.int32), [1])

            # Setup learning rate schedule
            learning_rate = self._build_learning_rate_schedule(global_step)

            # Setup optimizer.
            optimizer = self._build_optimizer(learning_rate)

            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                train_op = self._build_train_op(optimizer,
                                                loss,
                                                global_step=global_step)
            if self.hparams.moving_average_decay > 0:
                ema = tf.train.ExponentialMovingAverage(
                    decay=self.hparams.moving_average_decay,
                    num_updates=global_step)
                variables_to_average = (tf.trainable_variables() +
                                        tf.moving_average_variables())
                with tf.control_dependencies([train_op]):
                    with tf.name_scope('moving_average'):
                        train_op = ema.apply(variables_to_average)

            lr_t = tf.reshape(learning_rate, [1])
            host_call = None
            if self.hparams.enable_hostcall:

                def host_call_fn(gs, lr):
                    # Outfeed supports int32 but global_step is expected to be int64.
                    gs = tf.cast(tf.reduce_mean(gs), tf.int64)
                    with tf.summary.create_file_writer(
                            self.model_dir).as_default():
                        with tf.summary.always_record_summaries():
                            tf.summary.scalar('learning_rate',
                                              tf.reduce_mean(lr),
                                              step=gs)
                            return tf.summary.all_summary_ops()

                host_call = (host_call_fn, [gs_t, lr_t])

        eval_metrics = None
        eval_metric_ops = None
        if eval_active:

            def metric_fn(labels, logits):
                """Evaluation metric fn. Performed on CPU, do not reference TPU ops."""
                # Outfeed supports int32 but global_step is expected to be int64.
                predictions = tf.argmax(logits, axis=1)
                categorical_labels = labels
                top_1_accuracy = tf.metrics.accuracy(categorical_labels,
                                                     predictions)
                in_top_5 = tf.cast(
                    tf.nn.in_top_k(logits, categorical_labels, 5), tf.float32)
                top_5_accuracy = tf.metrics.mean(in_top_5)

                return {
                    'top_1_accuracy': top_1_accuracy,
                    'top_5_accuracy': top_5_accuracy,
                }

            eval_metrics = (metric_fn, [labels, logits])
            eval_metric_ops = metric_fn(labels, logits)

        if self.hparams.use_tpu:
            return contrib_tpu.tpu.TPUEstimatorSpec(mode=mode,
                                                    loss=loss,
                                                    train_op=train_op,
                                                    host_call=host_call,
                                                    eval_metrics=eval_metrics)
        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=loss,
                                          train_op=train_op,
                                          eval_metric_ops=eval_metric_ops)