Python bfloat16_scopeの例

プログラミング言語: Python

名前空間/パッケージ名: tensorflow.contrib.tpu.python.tpu.bfloat16

メソッド/関数: bfloat16_scope

hotexamples.comのコード掲載数: 17

Python bfloat16_scope - 17件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのtensorflow.contrib.tpu.python.tpu.bfloat16.bfloat16_scopeの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

def _build_network(features, mode, params):
    """Builds the network for different values of params['use_bfloat16']."""
    if params['use_bfloat16']:
        with bfloat16.bfloat16_scope():
            outputs_to_scales_to_logits = multi_scale_logits(
                features,
                params['model_options'],
                params['image_pyramid'],
                weight_decay=0.0,
                is_training=mode == tf.estimator.ModeKeys.TRAIN,
                fine_tune_batch_norm=(params['fine_tune_batch_norm']
                                      if mode == tf.estimator.ModeKeys.TRAIN
                                      else False))
        for level, output in outputs_to_scales_to_logits.iteritems():
            for scale, logits in output.iteritems():
                outputs_to_scales_to_logits[level][scale] = tf.cast(
                    logits, tf.float32)
    else:
        outputs_to_scales_to_logits = multi_scale_logits(
            features,
            params['model_options'],
            params['image_pyramid'],
            weight_decay=params['weight_decay'],
            is_training=mode == tf.estimator.ModeKeys.TRAIN,
            fine_tune_batch_norm=(params['fine_tune_batch_norm'] if mode
                                  == tf.estimator.ModeKeys.TRAIN else False))
    return outputs_to_scales_to_logits

コード例 #2

ファイルを表示

 def build_network():
     if FLAGS.precision == 'bfloat16':
         with bfloat16.bfloat16_scope():
             logits, end_points = inception.inception_v3(
                 features, num_classes, is_training=is_training)
         logits = tf.cast(logits, tf.float32)
     elif FLAGS.precision == 'float32':
         logits, end_points = inception.inception_v3(
             features, num_classes, is_training=is_training)
     return logits, end_points

コード例 #3

ファイルを表示

 def testRequestedDType(self):
   """Test if requested dtype is honored in the getter.
   """
   with bfloat16.bfloat16_scope() as scope:
     v1 = variable_scope.get_variable("v1", [])
     self.assertEqual(v1.dtype.base_dtype, dtypes.float32)
     v2 = variable_scope.get_variable("v2", [], dtype=dtypes.bfloat16)
     self.assertEqual(v2.dtype.base_dtype, dtypes.bfloat16)
     self.assertEqual([dtypes.float32, dtypes.float32],
                      [v.dtype.base_dtype for v in scope.global_variables()])

コード例 #4

ファイルを表示

ファイル: ssd_model.py プロジェクト: sead0812/inference_results_v0.5

def _model_fn(images, source_id, raw_shape, params, model):
    """Model defination for the SSD model based on ResNet-50.

  Args:
    images: the input image tensor with shape [batch_size, height, width, 3].
      The height and width are fixed and equal.
    source_id: a Tensor with shape [batch_size]
    raw_shape: a Tensor with shape [batch_size, 3]
    params: the dictionary defines hyperparameters of model. The default
      settings are in default_hparams function in this file.
    model: the SSD model outputs class logits and box regression outputs.

  Returns:
    spec: the EstimatorSpec or TPUEstimatorSpec to run training, evaluation,
      or prediction.
  """
    features = images

    def _model_outputs():
        return model(features, params, is_training_bn=False)

    if params['use_bfloat16']:
        with bfloat16.bfloat16_scope():
            cls_outputs, box_outputs = _model_outputs()
            levels = cls_outputs.keys()
            for level in levels:
                cls_outputs[level] = tf.cast(cls_outputs[level], tf.float32)
                box_outputs[level] = tf.cast(box_outputs[level], tf.float32)
    else:
        cls_outputs, box_outputs = _model_outputs()
        levels = cls_outputs.keys()

    flattened_cls, flattened_box = concat_outputs(cls_outputs, box_outputs)

    y_min, x_min, y_max, x_max = tf.split(flattened_box, 4, axis=1)
    flattened_box = tf.concat([x_min, y_min, x_max, y_max], axis=1)
    # [batch_size, 4, N] to [batch_size, N, 4]
    flattened_box = tf.transpose(flattened_box, [0, 2, 1])

    anchors = tf.convert_to_tensor(DefaultBoxes()('ltrb'))

    decoded_boxes = decode_boxes(encoded_boxes=flattened_box,
                                 anchors=anchors,
                                 weights=ssd_constants.BOX_CODER_SCALES)

    pred_scores = tf.nn.softmax(flattened_cls, axis=1)
    pred_scores, indices = select_top_k_scores(
        pred_scores, ssd_constants.MAX_NUM_EVAL_BOXES)
    detections = non_max_suppression(scores_in=pred_scores,
                                     boxes_in=decoded_boxes,
                                     top_k_indices=indices,
                                     source_id=source_id,
                                     raw_shape=raw_shape)

    return detections

コード例 #5

ファイルを表示

ファイル: squeezenet_model_bfloat16.py プロジェクト: vinhngx/tpu

def model_fn(features, labels, mode, params):
    """TPUEstimatorSpec for the Squeezenet model."""
    is_training = mode == tf.estimator.ModeKeys.TRAIN
    with bfloat16.bfloat16_scope():
        logits = squeezenet(features,
                            is_training=is_training,
                            num_classes=params["num_classes"])
        logits = tf.cast(logits, tf.float32)

    loss = tf.reduce_mean(
        tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=labels))

    global_batch_size = params["num_shards"] * params["batch_size"]
    decay_steps = 1300 * 1000 * params["num_epochs"] // global_batch_size
    learning_rate = tf.train.polynomial_decay(
        params["lr"],
        global_step=tf.train.get_or_create_global_step(),
        end_learning_rate=params["min_lr"],
        decay_steps=decay_steps,
        power=1.0,
        cycle=False)

    # TODO(power): Hack copied from resnet: remove when summaries are working.
    lr_repeat = tf.reshape(
        tf.tile(tf.expand_dims(learning_rate, 0), [
            params["batch_size"],
        ]), [params["batch_size"], 1])

    if params["optimizer"] == "adam":
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    elif params["optimizer"] == "rmsprop":
        optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate,
                                              momentum=params["momentum"],
                                              epsilon=1.0)
    else:
        optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                               momentum=params["momentum"],
                                               use_nesterov=True)

    if params["use_tpu"]:
        optimizer = tpu_optimizer.CrossShardOptimizer(optimizer)

    train_op = optimizer.minimize(loss, tf.train.get_global_step())

    return tpu_estimator.TPUEstimatorSpec(
        mode=mode,
        loss=loss,
        train_op=train_op,
        eval_metrics=(metric_fn, [labels, logits, lr_repeat]),
        predictions={
            "classes": tf.argmax(input=logits, axis=1),
            "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
        },
    )

コード例 #6

ファイルを表示

def unet_separator(features, labels, mode, params):

    # Define host call function
    def host_call_fn(gs,
                     loss,
                     lr,
                     mix=None,
                     gt_sources=None,
                     est_sources=None):
        """Training host call. Creates scalar summaries for training metrics.
            This function is executed on the CPU and should not directly reference
            any Tensors in the rest of the `model_fn`. To pass Tensors from the
            model to the `metric_fn`, provide as part of the `host_call`. See
            https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
            for more information.
            Arguments should match the list of `Tensor` objects passed as the second
            element in the tuple passed to `host_call`.
            Args:
              gs: `Tensor with shape `[batch]` for the global_step
              loss: `Tensor` with shape `[batch]` for the training loss.
              lr: `Tensor` with shape `[batch]` for the learning_rate.
              input: `Tensor` with shape `[batch, mix_samples, 1]`
              gt_sources: `Tensor` with shape `[batch, sources_n, output_samples, 1]`
              est_sources: `Tensor` with shape `[batch, sources_n, output_samples, 1]`
            Returns:
              List of summary ops to run on the CPU host.
            """
        gs = gs[0]
        with summary.create_file_writer(
                model_config["model_base_dir"] + os.path.sep +
                str(model_config["experiment_id"])).as_default():
            with summary.always_record_summaries():
                summary.scalar('loss', loss[0], step=gs)
                summary.scalar('learning_rate', lr[0], step=gs)
            if gs % 10000 == 0:
                with summary.record_summaries_every_n_global_steps(
                        model_config["audio_summaries_every_n_steps"]):
                    summary.audio('mix',
                                  mix,
                                  model_config['expected_sr'],
                                  max_outputs=model_config["num_sources"])
                    for source_id in range(gt_sources.shape[1].value):
                        summary.audio('gt_sources_{source_id}'.format(
                            source_id=source_id),
                                      gt_sources[:, source_id, :, :],
                                      model_config['expected_sr'],
                                      max_outputs=model_config["num_sources"])
                        summary.audio('est_sources_{source_id}'.format(
                            source_id=source_id),
                                      est_sources[:, source_id, :, :],
                                      model_config['expected_sr'],
                                      max_outputs=model_config["num_sources"])
        return summary.all_summary_ops()

    mix = features['mix']
    conditioning = features['labels']
    sources = labels
    model_config = params
    disc_input_shape = [
        model_config["batch_size"], model_config["num_frames"], 0
    ]

    with bfloat16.bfloat16_scope():
        separator_class = Models.ConditionalUnetAudioSeparator.UnetAudioSeparator(
            model_config["num_layers"],
            model_config["num_initial_filters"],
            output_type=model_config["output_type"],
            context=model_config["context"],
            mono=model_config["mono_downmix"],
            upsampling=model_config["upsampling"],
            num_sources=model_config["num_sources"],
            filter_size=model_config["filter_size"],
            merge_filter_size=model_config["merge_filter_size"])

    sep_input_shape, sep_output_shape = separator_class.get_padding(
        np.array(disc_input_shape))

    # Input context that the input audio has to be padded ON EACH SIDE
    # TODO move this to dataset function
    assert mix.shape[1].value == sep_input_shape[1]
    if mode != tf.estimator.ModeKeys.PREDICT:
        pad_tensor = tf.constant([[0, 0], [0, 0], [2, 3], [0, 0]])
        sources = tf.pad(sources, pad_tensor, "CONSTANT")

    separator_func = separator_class.get_output

    # Compute loss.
    separator_sources = tf.stack(separator_func(
        mix,
        conditioning,
        True,
        not model_config["raw_audio_loss"],
        reuse=False),
                                 axis=1)

    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {
            'mix': mix,
            'sources': separator_sources,
            'filename': features['filename'],
            'sample_id': features['sample_id']
        }
        return tpu_estimator.TPUEstimatorSpec(mode, predictions=predictions)

    separator_loss = tf.cast(
        tf.reduce_sum(tf.squared_difference(sources, separator_sources)),
        tf.float32)

    if mode != tf.estimator.ModeKeys.PREDICT:
        global_step = tf.train.get_global_step()
        sep_lr = tf.train.exponential_decay(model_config['init_sup_sep_lr'],
                                            global_step,
                                            model_config['decay_steps'],
                                            model_config['decay_rate'],
                                            staircase=False,
                                            name=None)

        gs_t = tf.reshape(global_step, [1])
        loss_t = tf.reshape(separator_loss, [1])
        lr_t = tf.reshape(sep_lr, [1])

        if model_config["write_audio_summaries"]:
            host_call = (host_call_fn,
                         [gs_t, loss_t, lr_t, mix, sources, separator_sources])
        else:
            host_call = (host_call_fn, [
                gs_t, loss_t, lr_t,
                tf.zeros((1)),
                tf.zeros((1)),
                tf.zeros((1))
            ])

    # Creating evaluation estimator
    if mode == tf.estimator.ModeKeys.EVAL:

        def metric_fn(labels, predictions):
            mean_mse_loss = tf.metrics.mean_squared_error(labels, predictions)
            return {'mse': mean_mse_loss}

        eval_params = {'labels': sources, 'predictions': separator_sources}

        return tpu_estimator.TPUEstimatorSpec(mode=mode,
                                              loss=separator_loss,
                                              host_call=host_call,
                                              eval_metrics=(metric_fn,
                                                            eval_params))

    # Create training op.
    # TODO add learning rate schedule
    # TODO add early stopping
    if mode == tf.estimator.ModeKeys.TRAIN:
        separator_vars = Utils.getTrainableVariables("separator")
        print("Sep_Vars: " + str(Utils.getNumParams(separator_vars)))
        print("Num of variables: " + str(len(tf.global_variables())))

        separator_solver = tf.train.AdamOptimizer(learning_rate=sep_lr)
        if model_config["use_tpu"]:
            separator_solver = tpu_optimizer.CrossShardOptimizer(
                separator_solver)

        train_op = separator_solver.minimize(separator_loss,
                                             var_list=separator_vars,
                                             global_step=global_step)
        return tpu_estimator.TPUEstimatorSpec(mode=mode,
                                              loss=separator_loss,
                                              host_call=host_call,
                                              train_op=train_op)

コード例 #7

ファイルを表示

ファイル: resnet_main.py プロジェクト: zhujl1991/pipelines

def resnet_model_fn(features, labels, mode, params):
    """The model_fn for ResNet to be used with TPUEstimator.

  Args:
    features: `Tensor` of batched images.
    labels: `Tensor` of labels for the data samples
    mode: one of `tf.estimator.ModeKeys.{TRAIN,EVAL,PREDICT}`
    params: `dict` of parameters passed to the model from the TPUEstimator,
        `params['batch_size']` is always provided and should be used as the
        effective batch size.

  Returns:
    A `TPUEstimatorSpec` for the model
  """
    if isinstance(features, dict):
        features = features['feature']

    if FLAGS.data_format == 'channels_first':
        assert not FLAGS.transpose_input  # channels_first only for GPU
        features = tf.transpose(features, [0, 3, 1, 2])

    if FLAGS.transpose_input and mode != tf.estimator.ModeKeys.PREDICT:
        features = tf.transpose(features, [3, 0, 1, 2])  # HWCN to NHWC

    # Normalize the image to zero mean and unit variance.
    features -= tf.constant(MEAN_RGB, shape=[1, 1, 3], dtype=features.dtype)
    features /= tf.constant(STDDEV_RGB, shape=[1, 1, 3], dtype=features.dtype)

    # This nested function allows us to avoid duplicating the logic which
    # builds the network, for different values of --precision.
    def build_network():
        network = resnet_model.resnet_v1(resnet_depth=FLAGS.resnet_depth,
                                         num_classes=FLAGS.num_label_classes,
                                         data_format=FLAGS.data_format)
        return network(inputs=features,
                       is_training=(mode == tf.estimator.ModeKeys.TRAIN))

    if FLAGS.precision == 'bfloat16':
        with bfloat16.bfloat16_scope():
            logits = build_network()
        logits = tf.cast(logits, tf.float32)
    elif FLAGS.precision == 'float32':
        logits = build_network()

    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {
            'classes': tf.argmax(logits, axis=1),
            'probabilities': tf.nn.softmax(logits, name='softmax_tensor')
        }
        return tf.estimator.EstimatorSpec(
            mode=mode,
            predictions=predictions,
            export_outputs={
                'classify': tf.estimator.export.PredictOutput(predictions)
            })

    # If necessary, in the model_fn, use params['batch_size'] instead the batch
    # size flags (--train_batch_size or --eval_batch_size).
    batch_size = params['batch_size']  # pylint: disable=unused-variable

    # Calculate loss, which includes softmax cross entropy and L2 regularization.
    one_hot_labels = tf.one_hot(labels, FLAGS.num_label_classes)
    cross_entropy = tf.losses.softmax_cross_entropy(
        logits=logits, onehot_labels=one_hot_labels)

    # Add weight decay to the loss for non-batch-normalization variables.
    loss = cross_entropy + FLAGS.weight_decay * tf.add_n([
        tf.nn.l2_loss(v) for v in tf.trainable_variables()
        if 'batch_normalization' not in v.name
    ])

    host_call = None
    if mode == tf.estimator.ModeKeys.TRAIN:
        # Compute the current epoch and associated learning rate from global_step.
        global_step = tf.train.get_global_step()
        batches_per_epoch = FLAGS.num_train_images / FLAGS.train_batch_size
        current_epoch = (tf.cast(global_step, tf.float32) / batches_per_epoch)
        learning_rate = learning_rate_schedule(current_epoch)

        optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                               momentum=FLAGS.momentum,
                                               use_nesterov=True)
        if FLAGS.use_tpu:
            # When using TPU, wrap the optimizer with CrossShardOptimizer which
            # handles synchronization details between different TPU cores. To the
            # user, this should look like regular synchronous training.
            optimizer = tpu_optimizer.CrossShardOptimizer(optimizer)

        # Batch normalization requires UPDATE_OPS to be added as a dependency to
        # the train operation.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = optimizer.minimize(loss, global_step)

        if not FLAGS.skip_host_call:

            def host_call_fn(gs, loss, lr, ce):
                """Training host call. Creates scalar summaries for training metrics.

        This function is executed on the CPU and should not directly reference
        any Tensors in the rest of the `model_fn`. To pass Tensors from the
        model to the `metric_fn`, provide as part of the `host_call`. See
        https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
        for more information.

        Arguments should match the list of `Tensor` objects passed as the second
        element in the tuple passed to `host_call`.

        Args:
          gs: `Tensor with shape `[batch]` for the global_step
          loss: `Tensor` with shape `[batch]` for the training loss.
          lr: `Tensor` with shape `[batch]` for the learning_rate.
          ce: `Tensor` with shape `[batch]` for the current_epoch.

        Returns:
          List of summary ops to run on the CPU host.
        """
                gs = gs[0]
                with summary.create_file_writer(FLAGS.model_dir).as_default():
                    with summary.always_record_summaries():
                        summary.scalar('loss', loss[0], step=gs)
                        summary.scalar('learning_rate', lr[0], step=gs)
                        summary.scalar('current_epoch', ce[0], step=gs)

                        return summary.all_summary_ops()

            # To log the loss, current learning rate, and epoch for Tensorboard, the
            # summary op needs to be run on the host CPU via host_call. host_call
            # expects [batch_size, ...] Tensors, thus reshape to introduce a batch
            # dimension. These Tensors are implicitly concatenated to
            # [params['batch_size']].
            gs_t = tf.reshape(global_step, [1])
            loss_t = tf.reshape(loss, [1])
            lr_t = tf.reshape(learning_rate, [1])
            ce_t = tf.reshape(current_epoch, [1])

            host_call = (host_call_fn, [gs_t, loss_t, lr_t, ce_t])

    else:
        train_op = None

    eval_metrics = None
    if mode == tf.estimator.ModeKeys.EVAL:

        def metric_fn(labels, logits):
            """Evaluation metric function. Evaluates accuracy.

      This function is executed on the CPU and should not directly reference
      any Tensors in the rest of the `model_fn`. To pass Tensors from the model
      to the `metric_fn`, provide as part of the `eval_metrics`. See
      https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
      for more information.

      Arguments should match the list of `Tensor` objects passed as the second
      element in the tuple passed to `eval_metrics`.

      Args:
        labels: `Tensor` with shape `[batch]`.
        logits: `Tensor` with shape `[batch, num_classes]`.

      Returns:
        A dict of the metrics to return from evaluation.
      """
            predictions = tf.argmax(logits, axis=1)
            top_1_accuracy = tf.metrics.accuracy(labels, predictions)
            in_top_5 = tf.cast(tf.nn.in_top_k(logits, labels, 5), tf.float32)
            top_5_accuracy = tf.metrics.mean(in_top_5)

            return {
                'top_1_accuracy': top_1_accuracy,
                'top_5_accuracy': top_5_accuracy,
            }

        eval_metrics = (metric_fn, [labels, logits])

    return tpu_estimator.TPUEstimatorSpec(mode=mode,
                                          loss=loss,
                                          train_op=train_op,
                                          host_call=host_call,
                                          eval_metrics=eval_metrics)

コード例 #8

ファイルを表示

def resnet_model_fn(features, labels, mode, params):
    """The model_fn for ResNet to be used with TPUEstimator.

  Args:
    features: `Tensor` of batched images.
    labels: `Tensor` of labels for the data samples
    mode: one of `tf.estimator.ModeKeys.{TRAIN,EVAL}`
    params: `dict` of parameters passed to the model from the TPUEstimator,
        `params['batch_size']` is always provided and should be used as the
        effective batch size.

  Returns:
    A `TPUEstimatorSpec` for the model
  """
    if isinstance(features, dict):
        features = features['feature']

    # In most cases, the default data format NCHW instead of NHWC should be
    # used for a significant performance boost on GPU/TPU. NHWC should be used
    # only if the network needs to be run on CPU since the pooling operations
    # are only supported on NHWC.
    if FLAGS.data_format == 'channels_first':
        features = tf.transpose(features, [0, 3, 1, 2])

    if FLAGS.use_transpose:
        features = tf.transpose(features, [3, 0, 1, 2])  # HWCN to NHCW

    with bfloat16.bfloat16_scope():
        network = resnet_model_v2.resnet_v2(
            resnet_size=FLAGS.resnet_depth,
            num_classes=LABEL_CLASSES,
            #data_format=FLAGS.data_format)
        )

        logits = network(inputs=features,
                         is_training=(mode == tf.estimator.ModeKeys.TRAIN))

        logits = tf.cast(logits, tf.float32)

    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {
            'classes': tf.argmax(logits, axis=1),
            'probabilities': tf.nn.softmax(logits, name='softmax_tensor')
        }
        return tf.estimator.EstimatorSpec(
            mode=mode,
            predictions=predictions,
            export_outputs={
                'classify': tf.estimator.export.PredictOutput(predictions)
            })

    # If necessary, in the model_fn, use params['batch_size'] instead the batch
    # size flags (--train_batch_size or --eval_batch_size).
    batch_size = params['batch_size']  # pylint: disable=unused-variable

    # Calculate loss, which includes softmax cross entropy and L2 regularization.
    one_hot_labels = tf.one_hot(labels, LABEL_CLASSES)
    cross_entropy = tf.losses.softmax_cross_entropy(
        logits=logits, onehot_labels=one_hot_labels)

    # Add weight decay to the loss for non-batch-normalization variables.
    loss = cross_entropy + WEIGHT_DECAY * tf.add_n([
        tf.nn.l2_loss(v) for v in tf.trainable_variables()
        if 'batch_normalization' not in v.name
    ])

    host_call = None
    if mode == tf.estimator.ModeKeys.TRAIN:
        # Compute the current epoch and associated learning rate from global_step.
        global_step = tf.train.get_global_step()
        steps_per_epoch = NUM_TRAIN_IMAGES / FLAGS.train_batch_size
        current_epoch = (tf.cast(global_step, tf.float32) / steps_per_epoch)
        learning_rate = learning_rate_schedule(current_epoch)

        optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                               momentum=MOMENTUM,
                                               use_nesterov=True)
        if FLAGS.use_tpu:
            # When using TPU, wrap the optimizer with CrossShardOptimizer which
            # handles synchronization details between different TPU cores. To the
            # user, this should look like regular synchronous training.
            optimizer = tpu_optimizer.CrossShardOptimizer(optimizer)

        # Batch normalization requires UPDATE_OPS to be added as a dependency to
        # the train operation.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = optimizer.minimize(loss, global_step)

        # To log the loss, current learning rate, and epoch for Tensorboard, the
        # summary op needs to be run on the host CPU via host_call. host_call
        # expects [batch_size, ...] Tensors, thus reshape to introduce a batch
        # dimension. These Tensors are implicitly broadcasted to
        # [params['batch_size'], ].
        gs_t = tf.reshape(tf.cast(global_step, tf.int32), [1])
        loss_t = tf.reshape(loss, [1])
        lr_t = tf.reshape(learning_rate, [1])
        ce_t = tf.reshape(current_epoch, [1])

        def host_call_fn(gs, loss, lr, ce):
            """Training host call. Creates scalar summaries for training metrics.

      This function is executed on the CPU and should not directly reference
      any Tensors in the rest of the `model_fn`. To pass Tensors from the model
      to the `metric_fn`, provide as part of the `host_call`. See
      https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
      for more information.

      Arguments should match the list of `Tensor` objects passed as the second
      element in the tuple passed to `host_call`.

      Args:
        gs: `Tensor with shape `[batch, ]` for the global_step
        loss: `Tensor` with shape `[batch, ]` for the training loss.
        lr: `Tensor` with shape `[batch, ]` for the learning_rate.
        ce: `Tensor` with shape `[batch, ]` for the current_epoch.

      Returns:
        List of summary ops to run on the CPU host.
      """
            # Outfeed supports int32 but global_step is expected to be int64.
            gs = tf.cast(tf.reduce_mean(gs), tf.int64)
            with summary.create_file_writer(FLAGS.model_dir).as_default():
                with summary.always_record_summaries():
                    summary.scalar('loss', tf.reduce_mean(loss), step=gs)
                    summary.scalar('learning_rate',
                                   tf.reduce_mean(lr),
                                   step=gs)
                    summary.scalar('current_epoch',
                                   tf.reduce_mean(ce),
                                   step=gs)

                    return summary.all_summary_ops()

        if FLAGS.enable_hostcall:
            host_call = (host_call_fn, [gs_t, loss_t, lr_t, ce_t])

    else:
        train_op = None

    eval_metrics = None
    if mode == tf.estimator.ModeKeys.EVAL:

        def metric_fn(labels, logits):
            """Evaluation metric function. Evaluates accuracy.

      This function is executed on the CPU and should not directly reference
      any Tensors in the rest of the `model_fn`. To pass Tensors from the model
      to the `metric_fn`, provide as part of the `eval_metrics`. See
      https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
      for more information.

      Arguments should match the list of `Tensor` objects passed as the second
      element in the tuple passed to `eval_metrics`.

      Args:
        labels: `Tensor` with shape `[batch, ]`.
        logits: `Tensor` with shape `[batch, num_classes]`.

      Returns:
        A dict of the metrics to return from evaluation.
      """
            predictions = tf.argmax(logits, axis=1)
            top_1_accuracy = tf.metrics.accuracy(labels, predictions)
            in_top_5 = tf.cast(tf.nn.in_top_k(logits, labels, 5), tf.float32)
            top_5_accuracy = tf.metrics.mean(in_top_5)

            return {
                'top_1_accuracy': top_1_accuracy,
                'top_5_accuracy': top_5_accuracy,
            }

        eval_metrics = (metric_fn, [labels, logits])

    return tpu_estimator.TPUEstimatorSpec(mode=mode,
                                          loss=loss,
                                          train_op=train_op,
                                          host_call=host_call,
                                          eval_metrics=eval_metrics)

コード例 #9

ファイルを表示

ファイル: retinanet_model.py プロジェクト: xuyithu/tpu

def _model_fn(features, labels, mode, params, model, variable_filter_fn=None):
  """Model defination for the RetinaNet model based on ResNet.

  Args:
    features: the input image tensor with shape [batch_size, height, width, 3].
      The height and width are fixed and equal.
    labels: the input labels in a dictionary. The labels include class targets
      and box targets which are dense label maps. The labels are generated from
      get_input_fn function in data/dataloader.py
    mode: the mode of TPUEstimator including TRAIN, EVAL, and PREDICT.
    params: the dictionary defines hyperparameters of model. The default
      settings are in default_hparams function in this file.
    model: the RetinaNet model outputs class logits and box regression outputs.
    variable_filter_fn: the filter function that takes trainable_variables and
      returns the variable list after applying the filter rule.

  Returns:
    tpu_spec: the TPUEstimatorSpec to run training, evaluation, or prediction.
  """
  def _model_outputs():
    return model(
        features,
        min_level=params['min_level'],
        max_level=params['max_level'],
        num_classes=params['num_classes'],
        num_anchors=len(params['aspect_ratios'] * params['num_scales']),
        resnet_depth=params['resnet_depth'],
        is_training_bn=params['is_training_bn'])

  if params['use_bfloat16']:
    with bfloat16.bfloat16_scope():
      cls_outputs, box_outputs = _model_outputs()
      levels = cls_outputs.keys()
      for level in levels:
        cls_outputs[level] = tf.cast(cls_outputs[level], tf.float32)
        box_outputs[level] = tf.cast(box_outputs[level], tf.float32)
  else:
    cls_outputs, box_outputs = _model_outputs()
    levels = cls_outputs.keys()

  # First check if it is in PREDICT mode.
  if mode == tf.estimator.ModeKeys.PREDICT:
    predictions = {
        'image': features,
    }
    for level in levels:
      predictions['cls_outputs_%d' % level] = cls_outputs[level]
      predictions['box_outputs_%d' % level] = box_outputs[level]
    return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

  # Load pretrained model from checkpoint.
  if params['resnet_checkpoint'] and mode == tf.estimator.ModeKeys.TRAIN:

    def scaffold_fn():
      """Loads pretrained model through scaffold function."""
      tf.train.init_from_checkpoint(params['resnet_checkpoint'], {
          '/': 'resnet%s/' % params['resnet_depth'],
      })
      return tf.train.Scaffold()
  else:
    scaffold_fn = None

  # Set up training loss and learning rate.
  global_step = tf.train.get_global_step()
  learning_rate = _learning_rate_schedule(
      params['learning_rate'], params['lr_warmup_init'],
      params['lr_warmup_step'], params['lr_drop_step'], global_step)
  # cls_loss and box_loss are for logging. only total_loss is optimized.
  total_loss, cls_loss, box_loss = _detection_loss(cls_outputs, box_outputs,
                                                   labels, params)

  if mode == tf.estimator.ModeKeys.TRAIN:
    optimizer = tf.train.MomentumOptimizer(
        learning_rate, momentum=params['momentum'])
    if params['use_tpu']:
      optimizer = tpu_optimizer.CrossShardOptimizer(optimizer)

    # Batch norm requires update_ops to be added as a train_op dependency.
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    var_list = variable_filter_fn(
        tf.trainable_variables(),
        params['resnet_depth']) if variable_filter_fn else None
    with tf.control_dependencies(update_ops):
      train_op = optimizer.minimize(total_loss, global_step, var_list=var_list)
  else:
    train_op = None

  # Evaluation only works on GPU/CPU host and batch_size=1
  eval_metrics = None
  if mode == tf.estimator.ModeKeys.EVAL:

    def metric_fn(**kwargs):
      """Evaluation metric fn. Performed on CPU, do not reference TPU ops."""
      eval_anchors = anchors.Anchors(params['min_level'],
                                     params['max_level'],
                                     params['num_scales'],
                                     params['aspect_ratios'],
                                     params['anchor_scale'],
                                     params['image_size'])
      anchor_labeler = anchors.AnchorLabeler(eval_anchors,
                                             params['num_classes'])
      cls_loss = tf.metrics.mean(kwargs['cls_loss_repeat'])
      box_loss = tf.metrics.mean(kwargs['box_loss_repeat'])
      # add metrics to output
      cls_outputs = {}
      box_outputs = {}
      for level in range(params['min_level'], params['max_level'] + 1):
        cls_outputs[level] = kwargs['cls_outputs_%d' % level]
        box_outputs[level] = kwargs['box_outputs_%d' % level]
      detections = anchor_labeler.generate_detections(
          cls_outputs, box_outputs, kwargs['source_ids'])
      eval_metric = coco_metric.EvaluationMetric(params['val_json_file'])
      coco_metrics = eval_metric.estimator_metric_fn(detections,
                                                     kwargs['image_scales'])
      # Add metrics to output.
      output_metrics = {
          'cls_loss': cls_loss,
          'box_loss': box_loss,
      }
      output_metrics.update(coco_metrics)
      return output_metrics

    batch_size = params['batch_size']
    cls_loss_repeat = tf.reshape(
        tf.tile(tf.expand_dims(cls_loss, 0), [
            batch_size,
        ]), [batch_size, 1])
    box_loss_repeat = tf.reshape(
        tf.tile(tf.expand_dims(box_loss, 0), [
            batch_size,
        ]), [batch_size, 1])
    metric_fn_inputs = {
        'cls_loss_repeat': cls_loss_repeat,
        'box_loss_repeat': box_loss_repeat,
        'source_ids': labels['source_ids'],
        'image_scales': labels['image_scales'],
    }
    for level in range(params['min_level'], params['max_level'] + 1):
      metric_fn_inputs['cls_outputs_%d' % level] = cls_outputs[level]
      metric_fn_inputs['box_outputs_%d' % level] = box_outputs[level]
    eval_metrics = (metric_fn, metric_fn_inputs)

  return tpu_estimator.TPUEstimatorSpec(
      mode=mode,
      loss=total_loss,
      train_op=train_op,
      eval_metrics=eval_metrics,
      scaffold_fn=scaffold_fn)

コード例 #10

ファイルを表示

ファイル: resnet_main.py プロジェクト: zhang01GA/cloudml-samples

def resnet_model_fn(features, labels, mode, params):
  """The model_fn for ResNet to be used with TPUEstimator.

  Args:
    features: `Tensor` of batched images.
    labels: `Tensor` of labels for the data samples
    mode: one of `tf.estimator.ModeKeys.{TRAIN,EVAL,PREDICT}`
    params: `dict` of parameters passed to the model from the TPUEstimator,
        `params['batch_size']` is always provided and should be used as the
        effective batch size.

  Returns:
    A `TPUEstimatorSpec` for the model
  """
  if isinstance(features, dict):
    features = features['feature']

  if FLAGS.data_format == 'channels_first':
    assert not FLAGS.transpose_input    # channels_first only for GPU
    features = tf.transpose(features, [0, 3, 1, 2])

  if FLAGS.transpose_input and mode != tf.estimator.ModeKeys.PREDICT:
    features = tf.transpose(features, [3, 0, 1, 2])  # HWCN to NHWC

  # Normalize the image to zero mean and unit variance.
  features -= tf.constant(MEAN_RGB, shape=[1, 1, 3], dtype=features.dtype)
  features /= tf.constant(STDDEV_RGB, shape=[1, 1, 3], dtype=features.dtype)

  # This nested function allows us to avoid duplicating the logic which
  # builds the network, for different values of --precision.
  def build_network():
    network = resnet_model.resnet_v1(
        resnet_depth=FLAGS.resnet_depth,
        num_classes=LABEL_CLASSES,
        data_format=FLAGS.data_format)
    return network(
        inputs=features, is_training=(mode == tf.estimator.ModeKeys.TRAIN))

  if FLAGS.precision == 'bfloat16':
    with bfloat16.bfloat16_scope():
      logits = build_network()
    logits = tf.cast(logits, tf.float32)
  elif FLAGS.precision == 'float32':
    logits = build_network()

  if mode == tf.estimator.ModeKeys.PREDICT:
    predictions = {
        'classes': tf.argmax(logits, axis=1),
        'probabilities': tf.nn.softmax(logits, name='softmax_tensor')
    }
    return tf.estimator.EstimatorSpec(
        mode=mode,
        predictions=predictions,
        export_outputs={
            'classify': tf.estimator.export.PredictOutput(predictions)
        })

  # If necessary, in the model_fn, use params['batch_size'] instead the batch
  # size flags (--train_batch_size or --eval_batch_size).
  batch_size = params['batch_size']   # pylint: disable=unused-variable

  # Calculate loss, which includes softmax cross entropy and L2 regularization.
  one_hot_labels = tf.one_hot(labels, LABEL_CLASSES)
  cross_entropy = tf.losses.softmax_cross_entropy(
      logits=logits, onehot_labels=one_hot_labels)

  # Add weight decay to the loss for non-batch-normalization variables.
  loss = cross_entropy + FLAGS.weight_decay * tf.add_n(
      [tf.nn.l2_loss(v) for v in tf.trainable_variables()
       if 'batch_normalization' not in v.name])

  host_call = None
  if mode == tf.estimator.ModeKeys.TRAIN:
    # Compute the current epoch and associated learning rate from global_step.
    global_step = tf.train.get_global_step()
    batches_per_epoch = NUM_TRAIN_IMAGES / FLAGS.train_batch_size
    current_epoch = (tf.cast(global_step, tf.float32) /
                     batches_per_epoch)
    learning_rate = learning_rate_schedule(current_epoch)

    optimizer = tf.train.MomentumOptimizer(
        learning_rate=learning_rate, momentum=FLAGS.momentum, use_nesterov=True)
    if FLAGS.use_tpu:
      # When using TPU, wrap the optimizer with CrossShardOptimizer which
      # handles synchronization details between different TPU cores. To the
      # user, this should look like regular synchronous training.
      optimizer = tpu_optimizer.CrossShardOptimizer(optimizer)

    # Batch normalization requires UPDATE_OPS to be added as a dependency to
    # the train operation.
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
      train_op = optimizer.minimize(loss, global_step)

    if not FLAGS.skip_host_call:
      def host_call_fn(gs, loss, lr, ce):
        """Training host call. Creates scalar summaries for training metrics.

        This function is executed on the CPU and should not directly reference
        any Tensors in the rest of the `model_fn`. To pass Tensors from the
        model to the `metric_fn`, provide as part of the `host_call`. See
        https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
        for more information.

        Arguments should match the list of `Tensor` objects passed as the second
        element in the tuple passed to `host_call`.

        Args:
          gs: `Tensor with shape `[batch]` for the global_step
          loss: `Tensor` with shape `[batch]` for the training loss.
          lr: `Tensor` with shape `[batch]` for the learning_rate.
          ce: `Tensor` with shape `[batch]` for the current_epoch.

        Returns:
          List of summary ops to run on the CPU host.
        """
        gs = gs[0]
        with summary.create_file_writer(FLAGS.model_dir).as_default():
          with summary.always_record_summaries():
            summary.scalar('loss', loss[0], step=gs)
            summary.scalar('learning_rate', lr[0], step=gs)
            summary.scalar('current_epoch', ce[0], step=gs)

            return summary.all_summary_ops()

      # To log the loss, current learning rate, and epoch for Tensorboard, the
      # summary op needs to be run on the host CPU via host_call. host_call
      # expects [batch_size, ...] Tensors, thus reshape to introduce a batch
      # dimension. These Tensors are implicitly concatenated to
      # [params['batch_size']].
      gs_t = tf.reshape(global_step, [1])
      loss_t = tf.reshape(loss, [1])
      lr_t = tf.reshape(learning_rate, [1])
      ce_t = tf.reshape(current_epoch, [1])

      host_call = (host_call_fn, [gs_t, loss_t, lr_t, ce_t])

  else:
    train_op = None

  eval_metrics = None
  if mode == tf.estimator.ModeKeys.EVAL:
    def metric_fn(labels, logits):
      """Evaluation metric function. Evaluates accuracy.

      This function is executed on the CPU and should not directly reference
      any Tensors in the rest of the `model_fn`. To pass Tensors from the model
      to the `metric_fn`, provide as part of the `eval_metrics`. See
      https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
      for more information.

      Arguments should match the list of `Tensor` objects passed as the second
      element in the tuple passed to `eval_metrics`.

      Args:
        labels: `Tensor` with shape `[batch]`.
        logits: `Tensor` with shape `[batch, num_classes]`.

      Returns:
        A dict of the metrics to return from evaluation.
      """
      predictions = tf.argmax(logits, axis=1)
      top_1_accuracy = tf.metrics.accuracy(labels, predictions)
      in_top_5 = tf.cast(tf.nn.in_top_k(logits, labels, 5), tf.float32)
      top_5_accuracy = tf.metrics.mean(in_top_5)

      return {
          'top_1_accuracy': top_1_accuracy,
          'top_5_accuracy': top_5_accuracy,
      }

    eval_metrics = (metric_fn, [labels, logits])

  return tpu_estimator.TPUEstimatorSpec(
      mode=mode,
      loss=loss,
      train_op=train_op,
      host_call=host_call,
      eval_metrics=eval_metrics)

コード例 #11

ファイルを表示

def _model_fn(features, labels, mode, params, model):
    """Model defination for the SSD model based on ResNet-50.

  Args:
    features: the input image tensor with shape [batch_size, height, width, 3].
      The height and width are fixed and equal.
    labels: the input labels in a dictionary. The labels include class targets
      and box targets which are dense label maps. The labels are generated from
      get_input_fn function in data/dataloader.py
    mode: the mode of TPUEstimator including TRAIN, EVAL, and PREDICT.
    params: the dictionary defines hyperparameters of model. The default
      settings are in default_hparams function in this file.
    model: the SSD model outputs class logits and box regression outputs.

  Returns:
    spec: the EstimatorSpec or TPUEstimatorSpec to run training, evaluation,
      or prediction.
  """
    if mode == tf.estimator.ModeKeys.PREDICT:
        labels = features
        features = labels.pop('image')

    # Manually apply the double transpose trick for training data.
    if params['transpose_input'] and mode != tf.estimator.ModeKeys.PREDICT:
        features = tf.transpose(features, [3, 0, 1, 2])
        labels[ssd_constants.BOXES] = tf.transpose(labels[ssd_constants.BOXES],
                                                   [2, 0, 1])
        labels[ssd_constants.CLASSES] = tf.transpose(
            labels[ssd_constants.CLASSES], [2, 0, 1])

    # Normalize the image to zero mean and unit variance.
    mlperf_log.ssd_print(key=mlperf_log.DATA_NORMALIZATION_MEAN,
                         value=ssd_constants.NORMALIZATION_MEAN)
    mlperf_log.ssd_print(key=mlperf_log.DATA_NORMALIZATION_STD,
                         value=ssd_constants.NORMALIZATION_STD)

    features -= tf.constant(ssd_constants.NORMALIZATION_MEAN,
                            shape=[1, 1, 3],
                            dtype=features.dtype)

    features /= tf.constant(ssd_constants.NORMALIZATION_STD,
                            shape=[1, 1, 3],
                            dtype=features.dtype)

    def _model_outputs():
        return model(features,
                     params,
                     is_training_bn=(mode == tf.estimator.ModeKeys.TRAIN))

    if params['use_bfloat16']:
        with bfloat16.bfloat16_scope():
            cls_outputs, box_outputs = _model_outputs()
            levels = cls_outputs.keys()
            for level in levels:
                cls_outputs[level] = tf.cast(cls_outputs[level], tf.float32)
                box_outputs[level] = tf.cast(box_outputs[level], tf.float32)
    else:
        cls_outputs, box_outputs = _model_outputs()
        levels = cls_outputs.keys()

    # First check if it is in PREDICT mode.
    if mode == tf.estimator.ModeKeys.PREDICT:
        flattened_cls, flattened_box = concat_outputs(cls_outputs, box_outputs)
        mlperf_log.ssd_print(key=mlperf_log.SCALES,
                             value=ssd_constants.BOX_CODER_SCALES)
        ssd_box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
            scale_factors=ssd_constants.BOX_CODER_SCALES)

        anchors = box_list.BoxList(
            tf.convert_to_tensor(dataloader.DefaultBoxes()('ltrb')))

        decoded_boxes = box_coder.batch_decode(encoded_boxes=flattened_box,
                                               box_coder=ssd_box_coder,
                                               anchors=anchors)

        pred_scores = tf.nn.softmax(flattened_cls, axis=2)

        pred_scores, indices = select_top_k_scores(
            pred_scores, ssd_constants.MAX_NUM_EVAL_BOXES)

        predictions = dict(
            labels,
            indices=indices,
            pred_scores=pred_scores,
            pred_box=decoded_boxes,
        )

        if params['visualize_dataloader']:
            # this is for inference visualization.
            predictions['image'] = features

        if params['use_tpu']:
            return tpu_estimator.TPUEstimatorSpec(mode=mode,
                                                  predictions=predictions)

        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    # Load pretrained model from checkpoint.
    if params['resnet_checkpoint'] and mode == tf.estimator.ModeKeys.TRAIN:

        def scaffold_fn():
            """Loads pretrained model through scaffold function."""
            tf.train.init_from_checkpoint(
                params['resnet_checkpoint'], {
                    '/': 'resnet%s/' % ssd_constants.RESNET_DEPTH,
                })
            return tf.train.Scaffold()
    else:
        scaffold_fn = None

    # Set up training loss and learning rate.
    update_learning_rate_schedule_parameters(params)
    global_step = tf.train.get_or_create_global_step()
    learning_rate = learning_rate_schedule(params, global_step)
    mlperf_log.ssd_print(key=mlperf_log.OPT_LR, deferred=True)
    # cls_loss and box_loss are for logging. only total_loss is optimized.
    total_loss, cls_loss, box_loss = detection_loss(cls_outputs, box_outputs,
                                                    labels)

    total_loss += params['weight_decay'] * tf.add_n(
        [tf.nn.l2_loss(v) for v in tf.trainable_variables()])

    host_call = None
    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.MomentumOptimizer(learning_rate,
                                               momentum=ssd_constants.MOMENTUM)
        if params['use_tpu']:
            optimizer = tpu_optimizer.CrossShardOptimizer(optimizer)

        mlperf_log.ssd_print(key=mlperf_log.OPT_NAME,
                             value='tf.train.MomentumOptimizer')
        # TODO(wangtao): figure out how to log learning rate.
        # mlperf_log.ssd_print(key=mlperf_log.OPT_LR, value=learning_rate)
        mlperf_log.ssd_print(key=mlperf_log.OPT_MOMENTUM,
                             value=ssd_constants.MOMENTUM)
        mlperf_log.ssd_print(key=mlperf_log.OPT_WEIGHT_DECAY,
                             value=params['weight_decay'])

        # Batch norm requires update_ops to be added as a train_op dependency.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        if params['device'] == 'gpu':
            # GPU uses tf.group to avoid dependency overhead on update_ops; also,
            # multi-GPU requires a different EstimatorSpec class object
            train_op = tf.group(optimizer.minimize(total_loss, global_step),
                                update_ops)
            return model_fn_lib.EstimatorSpec(mode=mode,
                                              loss=total_loss,
                                              train_op=train_op,
                                              scaffold=scaffold_fn())
        else:
            with tf.control_dependencies(update_ops):
                train_op = optimizer.minimize(total_loss, global_step)

        if params['use_host_call']:

            def host_call_fn(global_step, total_loss, cls_loss, box_loss,
                             learning_rate):
                """Training host call. Creates scalar summaries for training metrics.

        This function is executed on the CPU and should not directly reference
        any Tensors in the rest of the `model_fn`. To pass Tensors from the
        model to the `metric_fn`, provide as part of the `host_call`. See
        https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
        for more information.

        Arguments should match the list of `Tensor` objects passed as the second
        element in the tuple passed to `host_call`.

        Args:
          global_step: `Tensor with shape `[batch, ]` for the global_step.
          total_loss: `Tensor` with shape `[batch, ]` for the training loss.
          cls_loss: `Tensor` with shape `[batch, ]` for the training cls loss.
          box_loss: `Tensor` with shape `[batch, ]` for the training box loss.
          learning_rate: `Tensor` with shape `[batch, ]` for the learning_rate.

        Returns:
          List of summary ops to run on the CPU host.
        """
                # Outfeed supports int32 but global_step is expected to be int64.
                global_step = tf.reduce_mean(global_step)
                # Host call fns are executed FLAGS.iterations_per_loop times after one
                # TPU loop is finished, setting max_queue value to the same as number of
                # iterations will make the summary writer only flush the data to storage
                # once per loop.
                with (tf.contrib.summary.create_file_writer(
                        params['model_dir'],
                        max_queue=params['iterations_per_loop']).as_default()):
                    with tf.contrib.summary.always_record_summaries():
                        tf.contrib.summary.scalar('total_loss',
                                                  tf.reduce_mean(total_loss),
                                                  step=global_step)
                        tf.contrib.summary.scalar('cls_loss',
                                                  tf.reduce_mean(cls_loss),
                                                  step=global_step)
                        tf.contrib.summary.scalar('box_loss',
                                                  tf.reduce_mean(box_loss),
                                                  step=global_step)
                        tf.contrib.summary.scalar(
                            'learning_rate',
                            tf.reduce_mean(learning_rate),
                            step=global_step)

                        return tf.contrib.summary.all_summary_ops()

            # To log the loss, current learning rate, and epoch for Tensorboard, the
            # summary op needs to be run on the host CPU via host_call. host_call
            # expects [batch_size, ...] Tensors, thus reshape to introduce a batch
            # dimension. These Tensors are implicitly concatenated to
            # [params['batch_size']].
            global_step_t = tf.reshape(global_step, [1])
            total_loss_t = tf.reshape(total_loss, [1])
            cls_loss_t = tf.reshape(cls_loss, [1])
            box_loss_t = tf.reshape(box_loss, [1])
            learning_rate_t = tf.reshape(learning_rate, [1])
            host_call = (host_call_fn, [
                global_step_t, total_loss_t, cls_loss_t, box_loss_t,
                learning_rate_t
            ])
    else:
        train_op = None

    eval_metrics = None
    if mode == tf.estimator.ModeKeys.EVAL:
        raise NotImplementedError

    return tpu_estimator.TPUEstimatorSpec(mode=mode,
                                          loss=total_loss,
                                          train_op=train_op,
                                          host_call=host_call,
                                          eval_metrics=eval_metrics,
                                          scaffold_fn=scaffold_fn)

コード例 #12

ファイルを表示

 def testScopeName(self):
   """Test if name for the variable scope is propogated correctly.
   """
   with bfloat16.bfloat16_scope() as bf:
     self.assertEqual(bf.name, "bfloat16")

コード例 #13

ファイルを表示

ファイル: trainer_tpu.py プロジェクト: samsgates/dont-be-turtle

def model_fn(features, labels, mode, params):
    """
    The model_fn for dontbeturtle model to be used with TPUEstimator.

    Args:
        features:   `Tensor` of batched input images <batchNum x M x M x 3>.
        labels: labels_heatmap_list
        labels =
                        [ [labels_head],
                          [label_neck],
                          [label_rshoulder],
                          [label_lshoulder] ]
                        where has shape <batchNum N x N x 4>

        mode:       one of `tf.estimator.ModeKeys.
                    {
                     - TRAIN (default)  : for weight training ( running forward + backward + metric)
                     - EVAL,            : for validation (running forward + metric)
                     - PREDICT          : for prediction ( running forward only )
                     }`

        Returns:
        A `TPUEstimatorSpec` for the model
    """
    del params  # unused

    if isinstance(features, dict):
        features = features['feature']
    if FLAGS.data_format == 'channels_first':
        assert not FLAGS.transpose_input  # channels_first only for GPU
        features = tf.transpose(features, [0, 3, 1, 2])
    if FLAGS.transpose_input and mode != tf.estimator.ModeKeys.PREDICT:
        features = tf.transpose(features, [3, 0, 1, 2])  # HWCN to NHWC

    with tf.name_scope(name='feature_norm', values=[features]):
        # Standardization to the image by zero mean and unit variance.
        features -= tf.constant(MEAN_RGB,
                                shape=[1, 1, 3],
                                dtype=features.dtype)
        features /= tf.constant(STDDEV_RGB,
                                shape=[1, 1, 3],
                                dtype=features.dtype)

        # set input_shape
        features.set_shape(features.get_shape().merge_with(
            tf.TensorShape([
                None, model_config.input_height, model_config.input_width, None
            ])))

    # Model building ============================
    # This nested function allows us to avoid duplicating the logic which
    # builds the network, for different values of --precision.
    def build_network():
        with tf.name_scope(name='build_network'):
            ''' get model '''
            out_heatmap, mid_heatmap, end_points\
                = get_model(ch_in           = features,
                            model_config    = model_config,
                            scope           = 'model')
            '''specify is_trainable on model '''
            if mode == tf.estimator.ModeKeys.TRAIN:
                model_config.hg_config.is_trainable = True
                model_config.sv_config.is_trainable = True
                model_config.rc_config.is_trainable = True
                model_config.out_config.is_trainable = True
            elif (mode == tf.estimator.ModeKeys.EVAL) or \
                    (mode == tf.estimator.ModeKeys.PREDICT):
                model_config.hg_config.is_trainable = False
                model_config.sv_config.is_trainable = False
                model_config.rc_config.is_trainable = False
                model_config.out_config.is_trainable = False

            tf.logging.info('[model_fn] feature shape=%s' %
                            features.get_shape().as_list())
            tf.logging.info('[model_fn] labels  shape=%s' %
                            labels.get_shape().as_list())
            tf.logging.info('[model_fn] out_heatmap  shape=%s' %
                            out_heatmap.get_shape().as_list())
            tf.logging.info(
                '-----------------------------------------------------------')

            for n in range(0, model_config.num_of_hgstacking):
                tf.logging.info('[model_fn] mid_heatmap%d  shape=%s' %
                                (n, mid_heatmap[n].get_shape().as_list()))

        return out_heatmap, mid_heatmap, end_points

    if FLAGS.precision == 'bfloat16':
        with bfloat16.bfloat16_scope():
            logits_out_heatmap, \
            logits_mid_heatmap, \
            end_points = build_network()
            logits_out_heatmap = tf.cast(logits_out_heatmap, tf.float32)
    else:
        # FLAGS.precision == 'float32':
        logits_out_heatmap, \
        logits_mid_heatmap, \
        end_points = build_network()

    #--------------------------------------------------------
    # mode == prediction case manipulation ===================
    # [[[ here need to change ]]] -----
    # if mode == tf.estimator.ModeKeys.PREDICT:
    #     predictions = {
    #
    #         # output format should be clarify here
    #         'pred_head': tf.argmax(logits_heatmap_out[-1,], axis=1),
    #         'conf_head': tf.nn.softmax(logits, name='confidence_head')
    #     }
    #
    #     # if the prediction case return here
    #     return tf.estimator.EstimatorSpec(
    #         mode=mode,
    #         predictions=predictions,
    #         export_outputs={
    #             'classify': tf.estimator.export.PredictOutput(predictions)
    #         })
    # -----------------------------

    ### output layer ===
    with tf.name_scope(name='out_post_proc',
                       values=[logits_out_heatmap, labels]):
        # heatmap activation of output layer out
        act_out_heatmaps = get_heatmap_activation(logits=logits_out_heatmap,
                                                  scope='out_heatmap')
        # heatmap loss
        total_out_losssum = \
            get_loss_heatmap(pred_heatmaps=act_out_heatmaps,
                             label_heatmaps=labels,
                             scope='out_loss')

    ### middle layer ===
    with tf.name_scope(name='mid_post_proc',
                       values=[logits_mid_heatmap, labels]):
        ### supervision layers ===
        act_mid_heatmap_list = []
        total_mid_losssum_list = []
        total_mid_losssum_acc = 0.0

        for stacked_hg_index in range(0, model_config.num_of_hgstacking):
            # heatmap activation of supervision layer out
            act_mid_heatmap_temp = \
                get_heatmap_activation(logits=logits_mid_heatmap[stacked_hg_index],
                                       scope='mid_heatmap_' + str(stacked_hg_index))
            # heatmap loss
            total_mid_losssum_temp = \
                get_loss_heatmap(pred_heatmaps=act_mid_heatmap_temp,
                                 label_heatmaps=labels,
                                 scope='mid_loss_' + str(stacked_hg_index))

            # collect loss and heatmap in list
            act_mid_heatmap_list.append(act_mid_heatmap_temp)
            total_mid_losssum_list.append(total_mid_losssum_temp)
            total_mid_losssum_acc += total_mid_losssum_temp

    ### total loss ===
    with tf.name_scope(name='total_loss',
                       values=[total_out_losssum, total_mid_losssum_acc]):
        # Collect weight regularizer loss =====
        loss_regularizer = tf.losses.get_regularization_loss()
        # sum up all losses =====
        loss = total_out_losssum + total_mid_losssum_acc + loss_regularizer

    host_call = None
    summary_hook = None
    train_op = None

    if mode == tf.estimator.ModeKeys.TRAIN:
        # Compute the current epoch and associated learning rate from global_step.
        global_step = tf.train.get_global_step()
        batchnum_per_epoch = np.floor(FLAGS.num_train_images /
                                      FLAGS.train_batch_size)

        current_epoch = (tf.cast(global_step, tf.float32) / batchnum_per_epoch)
        learning_rate = learning_rate_schedule(current_epoch=current_epoch)
        optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate,
                                              name='RMSprop_opt')

        if FLAGS.use_tpu:
            # When using TPU, wrap the optimizer with CrossShardOptimizer which
            # handles synchronization details between different TPU cores. To the
            # user, this should look like regular synchronous training.
            optimizer = tpu_optimizer.CrossShardOptimizer(optimizer)
        '''
            # Batch normalization requires UPDATE_OPS to be added as a dependency to
            # the train operation.
            # when training, the moving_mean and moving_variance need to be updated.
        '''
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = optimizer.minimize(loss, global_step)

        if FLAGS.is_tensorboard_summary:
            # To log the loss, current learning rate, and epoch for Tensorboard, the
            # summary op needs to be run on the host CPU via host_call. host_call
            # expects [batch_size, ...] Tensors, thus reshape to introduce a batch
            # dimension. These Tensors are implicitly concatenated to
            # [model_config['batch_size']].
            gs_t = tf.reshape(global_step, [1])
            loss_t = tf.reshape(loss, [1])

            # mid_loss_list_t = []
            # for n in range(0,model_config.num_of_hgstacking):
            #     mid_loss_list_t[n] = tf.reshape(mid_loss_list[n],[1])

            lr_t = tf.reshape(learning_rate, [1])
            ce_t = tf.reshape(current_epoch, [1])

            if FLAGS.use_tpu:
                # host_call = (tb_summary_fn_tpu, [gs_t, loss_t,mid_loss_list_t, lr_t, ce_t])
                host_call = (tb_summary_fn_tpu, [gs_t, loss_t, lr_t, ce_t])
            else:

                ## create tflog dir
                now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
                tb_logdir_path = FLAGS.tflogs_dir

                tb_logdir = "{}/run-{}/".format(tb_logdir_path, now)
                tf.logging.info('[model_fn] tf summary at %s' % tb_logdir)

                if not tf.gfile.Exists(tb_logdir_path):
                    tf.gfile.MakeDirs(tb_logdir_path)
                tf.summary.scalar('loss', loss)

                for n in range(0, model_config.num_of_hgstacking):
                    summary.scalar('mid_loss_head' + str(n),
                                   total_mid_losssum_list[n])
                    summary.scalar('mid_loss_neck' + str(n),
                                   total_mid_losssum_list[n])
                    summary.scalar('mid_loss_Rshoulder' + str(n),
                                   total_mid_losssum_list[n])
                    summary.scalar('mid_loss_Lshoulder' + str(n),
                                   total_mid_losssum_list[n])

                    tf.summary.scalar('learning_rate', learning_rate)
                    tf.summary.scalar('current_epoch', current_epoch)

                    tf.logging.info('Create SummarySaveHook.')
                    summary_hook = tf.train.SummarySaverHook(
                        save_steps=FLAGS.summary_step,
                        output_dir=tb_logdir,
                        summary_op=tf.summary.merge_all())

    if FLAGS.use_tpu:
        # in case of TPUEstimator metric_ops must be in a form of tuple
        metric_ops = (metric_fn, [labels, logits_out_heatmap])
        tfestimator = tpu_estimator.TPUEstimatorSpec(mode=mode,
                                                     loss=loss,
                                                     train_op=train_op,
                                                     host_call=host_call,
                                                     eval_metrics=metric_ops)
    else:
        # in case of Estimator metric_ops must be in a form of dictionary
        metric_ops = metric_fn(labels, logits_out_heatmap)
        tfestimator = tf.estimator.EstimatorSpec(mode=mode,
                                                 loss=loss,
                                                 train_op=train_op,
                                                 eval_metric_ops=metric_ops,
                                                 training_hooks=[summary_hook])
    return tfestimator

コード例 #14

ファイルを表示

ファイル: inception_v2_bfloat16.py プロジェクト: vinhngx/tpu

def inception_model_fn(features, labels, mode, params):
    """Inception v2 model using Estimator API."""
    num_classes = FLAGS.num_classes
    is_training = (mode == tf.estimator.ModeKeys.TRAIN)
    is_eval = (mode == tf.estimator.ModeKeys.EVAL)
    features = tensor_transform_fn(features, params['input_perm'])

    with bfloat16.bfloat16_scope():
        if FLAGS.clear_update_collections:
            # updates_collections must be set to None in order to use fused batchnorm
            with arg_scope(
                    inception.inception_v2_arg_scope(
                        batch_norm_decay=BATCH_NORM_DECAY,
                        batch_norm_epsilon=BATCH_NORM_EPSILON,
                        updates_collections=None)):
                logits, end_points = inception.inception_v2(
                    features,
                    num_classes,
                    is_training=is_training,
                    replace_separable_convolution=True)
        else:
            with arg_scope(
                    inception.inception_v2_arg_scope(
                        batch_norm_decay=BATCH_NORM_DECAY,
                        batch_norm_epsilon=BATCH_NORM_EPSILON)):
                logits, end_points = inception.inception_v2(
                    features,
                    num_classes,
                    is_training=is_training,
                    replace_separable_convolution=True)

        logits = tf.cast(logits, tf.float32)
        for k in end_points.keys():
            end_points[k] = tf.cast(end_points[k], tf.float32)

    predictions = end_points
    predictions.update({
        'classes':
        tf.argmax(input=logits, axis=1),
        'probabilities':
        tf.nn.softmax(logits, name='softmax_tensor')
    })

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    if mode == tf.estimator.ModeKeys.EVAL and FLAGS.display_tensors and (
            not FLAGS.use_tpu):
        with tf.control_dependencies([
                tf.Print(predictions['classes'], [predictions['classes']],
                         summarize=FLAGS.eval_batch_size,
                         message='prediction: ')
        ]):
            labels = tf.Print(labels, [labels],
                              summarize=FLAGS.eval_batch_size,
                              message='label: ')

    one_hot_labels = tf.one_hot(labels, FLAGS.num_classes, dtype=tf.int32)

    loss = tf.losses.softmax_cross_entropy(onehot_labels=one_hot_labels,
                                           logits=logits,
                                           weights=1.0,
                                           label_smoothing=0.1)
    #loss = tf.losses.get_total_loss(add_regularization_losses=True)
    loss += WEIGHT_DECAY * tf.add_n([
        tf.nn.l2_loss(v) for v in tf.trainable_variables()
        if 'batch_normalization' not in v.name
    ])

    initial_learning_rate = FLAGS.learning_rate * FLAGS.train_batch_size / 256
    if FLAGS.use_learning_rate_warmup:
        # Adjust initial learning rate to match final warmup rate
        warmup_decay = FLAGS.learning_rate_decay**(
            (FLAGS.warmup_epochs + FLAGS.cold_epochs) /
            FLAGS.learning_rate_decay_epochs)
        adj_initial_learning_rate = initial_learning_rate * warmup_decay

    final_learning_rate = 0.0001 * initial_learning_rate

    host_call = None
    train_op = None
    if is_training:
        batches_per_epoch = _NUM_TRAIN_IMAGES / FLAGS.train_batch_size
        global_step = tf.train.get_or_create_global_step()
        current_epoch = tf.cast(
            (tf.cast(global_step, tf.float32) / batches_per_epoch), tf.int32)

        learning_rate = tf.train.exponential_decay(
            learning_rate=initial_learning_rate,
            global_step=global_step,
            decay_steps=int(FLAGS.learning_rate_decay_epochs *
                            batches_per_epoch),
            decay_rate=FLAGS.learning_rate_decay,
            staircase=True)

        if FLAGS.use_learning_rate_warmup:
            wlr = 0.1 * adj_initial_learning_rate
            wlr_height = tf.cast(
                0.9 * adj_initial_learning_rate /
                (FLAGS.warmup_epochs + FLAGS.learning_rate_decay_epochs - 1),
                tf.float32)
            epoch_offset = tf.cast(FLAGS.cold_epochs - 1, tf.int32)
            exp_decay_start = (FLAGS.warmup_epochs + FLAGS.cold_epochs +
                               FLAGS.learning_rate_decay_epochs)
            lin_inc_lr = tf.add(
                wlr,
                tf.multiply(
                    tf.cast(tf.subtract(current_epoch, epoch_offset),
                            tf.float32), wlr_height))
            learning_rate = tf.where(
                tf.greater_equal(current_epoch, FLAGS.cold_epochs),
                (tf.where(tf.greater_equal(current_epoch, exp_decay_start),
                          learning_rate, lin_inc_lr)), wlr)

        # Set a minimum boundary for the learning rate.
        learning_rate = tf.maximum(learning_rate,
                                   final_learning_rate,
                                   name='learning_rate')

        if FLAGS.optimizer == 'sgd':
            tf.logging.info('Using SGD optimizer')
            optimizer = tf.train.GradientDescentOptimizer(
                learning_rate=learning_rate)
        elif FLAGS.optimizer == 'momentum':
            tf.logging.info('Using Momentum optimizer')
            optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                                   momentum=0.9)
        elif FLAGS.optimizer == 'RMS':
            tf.logging.info('Using RMS optimizer')
            optimizer = tf.train.RMSPropOptimizer(learning_rate,
                                                  RMSPROP_DECAY,
                                                  momentum=RMSPROP_MOMENTUM,
                                                  epsilon=RMSPROP_EPSILON)
        else:
            tf.logging.fatal('Unknown optimizer:', FLAGS.optimizer)

        if FLAGS.use_tpu:
            optimizer = tpu_optimizer.CrossShardOptimizer(optimizer)

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = optimizer.minimize(loss, global_step=global_step)
        if FLAGS.moving_average:
            ema = tf.train.ExponentialMovingAverage(decay=MOVING_AVERAGE_DECAY,
                                                    num_updates=global_step)
            variables_to_average = (tf.trainable_variables() +
                                    tf.moving_average_variables())
            with tf.control_dependencies([train_op
                                          ]), tf.name_scope('moving_average'):
                train_op = ema.apply(variables_to_average)

        # To log the loss, current learning rate, and epoch for Tensorboard, the
        # summary op needs to be run on the host CPU via host_call. host_call
        # expects [batch_size, ...] Tensors, thus reshape to introduce a batch
        # dimension. These Tensors are implicitly concatenated to
        # [params['batch_size']].
        gs_t = tf.reshape(global_step, [1])
        loss_t = tf.reshape(loss, [1])
        lr_t = tf.reshape(learning_rate, [1])
        ce_t = tf.reshape(current_epoch, [1])

        def host_call_fn(gs, loss, lr, ce):
            """Training host call. Creates scalar summaries for training metrics.

      This function is executed on the CPU and should not directly reference
      any Tensors in the rest of the `model_fn`. To pass Tensors from the model
      to the `metric_fn`, provide as part of the `host_call`. See
      https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
      for more information.

      Arguments should match the list of `Tensor` objects passed as the second
      element in the tuple passed to `host_call`.

      Args:
        gs: `Tensor with shape `[batch]` for the global_step
        loss: `Tensor` with shape `[batch]` for the training loss.
        lr: `Tensor` with shape `[batch]` for the learning_rate.
        ce: `Tensor` with shape `[batch]` for the current_epoch.

      Returns:
        List of summary ops to run on the CPU host.
      """
            gs = gs[0]
            with summary.create_file_writer(FLAGS.model_dir).as_default():
                with summary.always_record_summaries():
                    summary.scalar('loss', tf.reduce_mean(loss), step=gs)
                    summary.scalar('learning_rate',
                                   tf.reduce_mean(lr),
                                   step=gs)
                    summary.scalar('current_epoch',
                                   tf.reduce_mean(ce),
                                   step=gs)

                    return summary.all_summary_ops()

        host_call = (host_call_fn, [gs_t, loss_t, lr_t, ce_t])

    eval_metrics = None
    if is_eval:

        def metric_fn(labels, logits):
            """Evaluation metric function. Evaluates accuracy.

      This function is executed on the CPU and should not directly reference
      any Tensors in the rest of the `model_fn`. To pass Tensors from the model
      to the `metric_fn`, provide as part of the `eval_metrics`. See
      https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
      for more information.

      Arguments should match the list of `Tensor` objects passed as the second
      element in the tuple passed to `eval_metrics`.

      Args:
        labels: `Tensor` with shape `[batch, ]`.
        logits: `Tensor` with shape `[batch, num_classes]`.

      Returns:
        A dict of the metrics to return from evaluation.
      """
            predictions = tf.argmax(logits, axis=1)
            top_1_accuracy = tf.metrics.accuracy(labels, predictions)
            in_top_5 = tf.cast(tf.nn.in_top_k(logits, labels, 5), tf.float32)
            top_5_accuracy = tf.metrics.mean(in_top_5)

            return {
                'accuracy': top_1_accuracy,
                'accuracy@5': top_5_accuracy,
            }

        eval_metrics = (metric_fn, [labels, logits])

    return tpu_estimator.TPUEstimatorSpec(mode=mode,
                                          loss=loss,
                                          train_op=train_op,
                                          host_call=host_call,
                                          eval_metrics=eval_metrics)

コード例 #15

ファイルを表示

ファイル: mobilenet_bfloat16_2.py プロジェクト: vinhngx/tpu

def model_fn(features, labels, mode, params):
    """Mobilenet v1 model using Estimator API."""
    num_classes = FLAGS.num_classes
    training_active = (mode == tf.estimator.ModeKeys.TRAIN)
    eval_active = (mode == tf.estimator.ModeKeys.EVAL)

    features = tensor_transform_fn(features, params['input_perm'])

    with bfloat16.bfloat16_scope():
        if FLAGS.clear_update_collections:
            # updates_collections must be set to None in order to use fused batchnorm
            with arg_scope(mobilenet_v1.mobilenet_v1_arg_scope()):
                logits, end_points = mobilenet_v1.mobilenet_v1(
                    features,
                    num_classes,
                    is_training=training_active,
                    depth_multiplier=FLAGS.depth_multiplier)
        else:
            with arg_scope(mobilenet_v1.mobilenet_v1_arg_scope()):
                logits, end_points = mobilenet_v1.mobilenet_v1(
                    features,
                    num_classes,
                    is_training=training_active,
                    depth_multiplier=FLAGS.depth_multiplier)

        logits = tf.cast(logits, tf.float32)
        for k in end_points.keys():
            end_points[k] = tf.cast(end_points[k], tf.float32)

    predictions = {
        'classes': tf.argmax(input=logits, axis=1),
        'probabilities': tf.nn.softmax(logits, name='softmax_tensor')
    }

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    if mode == tf.estimator.ModeKeys.EVAL and FLAGS.display_tensors and (
            not FLAGS.use_tpu):
        with tf.control_dependencies([
                tf.Print(predictions['classes'], [predictions['classes']],
                         summarize=FLAGS.eval_batch_size,
                         message='prediction: ')
        ]):
            labels = tf.Print(labels, [labels],
                              summarize=FLAGS.eval_batch_size,
                              message='label: ')

    one_hot_labels = tf.one_hot(labels, FLAGS.num_classes, dtype=tf.int32)

    loss = tf.losses.softmax_cross_entropy(onehot_labels=one_hot_labels,
                                           logits=logits,
                                           weights=1.0,
                                           label_smoothing=0.1)
    #loss = tf.losses.get_total_loss(add_regularization_losses=True)
    loss += WEIGHT_DECAY * tf.add_n([
        tf.nn.l2_loss(v) for v in tf.trainable_variables()
        if 'batch_normalization' not in v.name
    ])

    initial_learning_rate = FLAGS.learning_rate * FLAGS.train_batch_size / 256
    final_learning_rate = 0.0001 * initial_learning_rate

    train_op = None
    if training_active:
        batches_per_epoch = _NUM_TRAIN_IMAGES // FLAGS.train_batch_size
        global_step = tf.train.get_or_create_global_step()

        learning_rate = tf.train.exponential_decay(
            learning_rate=initial_learning_rate,
            global_step=global_step,
            decay_steps=FLAGS.learning_rate_decay_epochs * batches_per_epoch,
            decay_rate=FLAGS.learning_rate_decay,
            staircase=True)

        # Set a minimum boundary for the learning rate.
        learning_rate = tf.maximum(learning_rate,
                                   final_learning_rate,
                                   name='learning_rate')

        if FLAGS.optimizer == 'sgd':
            tf.logging.info('Using SGD optimizer')
            optimizer = tf.train.GradientDescentOptimizer(
                learning_rate=learning_rate)
        elif FLAGS.optimizer == 'momentum':
            tf.logging.info('Using Momentum optimizer')
            optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                                   momentum=0.9)
        elif FLAGS.optimizer == 'RMS':
            tf.logging.info('Using RMS optimizer')
            optimizer = tf.train.RMSPropOptimizer(learning_rate,
                                                  RMSPROP_DECAY,
                                                  momentum=RMSPROP_MOMENTUM,
                                                  epsilon=RMSPROP_EPSILON)
        else:
            tf.logging.fatal('Unknown optimizer:', FLAGS.optimizer)

        if FLAGS.use_tpu:
            optimizer = tpu_optimizer.CrossShardOptimizer(optimizer)

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = optimizer.minimize(loss, global_step=global_step)
        if FLAGS.moving_average:
            ema = tf.train.ExponentialMovingAverage(decay=MOVING_AVERAGE_DECAY,
                                                    num_updates=global_step)
            variables_to_average = (tf.trainable_variables() +
                                    tf.moving_average_variables())
            with tf.control_dependencies([train_op
                                          ]), tf.name_scope('moving_average'):
                train_op = ema.apply(variables_to_average)

    eval_metrics = None
    if eval_active:

        def metric_fn(labels, predictions):
            accuracy = tf.metrics.accuracy(
                labels, tf.argmax(input=predictions, axis=1))
            return {'accuracy': accuracy}

        if FLAGS.use_logits:
            eval_predictions = logits
        else:
            eval_predictions = end_points['Predictions']

        eval_metrics = (metric_fn, [labels, eval_predictions])

    return tpu_estimator.TPUEstimatorSpec(mode=mode,
                                          loss=loss,
                                          train_op=train_op,
                                          eval_metrics=eval_metrics)

コード例 #16

ファイルを表示

def model_fn(features, labels, mode, params):

    # inference will happen in another way
    assert mode != tf.estimator.ModeKeys.PREDICT

    network = lambda images, is_training: shufflenet(
        images,
        is_training,
        num_classes=params['num_classes'],
        depth_multiplier=params['depth_multiplier'])

    # tensor `features` is a half precision tensor with shape [height, width, 3, batch_size],
    # it represents RGB images with values in [0, 1]

    images = features
    images = tf.transpose(images, [3, 0, 1, 2])  # HWCN to NHWC
    is_training = mode == tf.estimator.ModeKeys.TRAIN

    if params['use_bfloat16']:
        with bfloat16.bfloat16_scope():
            logits = network(images, is_training)
        logits = tf.to_float(logits)  # to full precision
    else:
        logits = network(images, is_training)

    with tf.name_scope('weight_decay'):
        add_weight_decay(params['weight_decay'])
        regularization_loss = tf.losses.get_regularization_loss()

    with tf.name_scope('cross_entropy'):
        one_hot_labels = tf.one_hot(labels, params['num_classes'])
        cross_entropy = tf.losses.softmax_cross_entropy(
            logits=logits,
            onehot_labels=one_hot_labels,
            label_smoothing=LABEL_SMOOTHING)

    total_loss = tf.losses.get_total_loss(add_regularization_losses=True)

    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.contrib.tpu.TPUEstimatorSpec(mode=mode,
                                               loss=total_loss,
                                               eval_metrics=(metric_fn,
                                                             [labels, logits]))

    assert mode == tf.estimator.ModeKeys.TRAIN
    with tf.variable_scope('learning_rate_schedule'):
        global_step = tf.train.get_global_step()
        learning_rate = get_learning_rate(global_step, params)

    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops), tf.variable_scope('optimizer'):
        optimizer = tf.train.MomentumOptimizer(learning_rate,
                                               momentum=MOMENTUM,
                                               use_nesterov=USE_NESTEROV)
        optimizer = tpu_optimizer.CrossShardOptimizer(optimizer)
        train_op = optimizer.minimize(total_loss, global_step)

    with tf.control_dependencies([train_op]), tf.name_scope('ema'):
        ema = tf.train.ExponentialMovingAverage(decay=MOVING_AVERAGE_DECAY,
                                                num_updates=global_step)
        train_op = ema.apply(tf.trainable_variables())

    with tf.name_scope('train_accuracy_calculation'):
        predictions = tf.argmax(logits, axis=1, output_type=tf.int32)
        train_accuracy = tf.reduce_mean(tf.to_float(
            tf.equal(labels, predictions)),
                                        axis=0)

    tensors_to_summarize = [
        tf.reshape(global_step, [1]),
        tf.reshape(total_loss, [1]),
        tf.reshape(cross_entropy, [1]),
        tf.reshape(regularization_loss, [1]),
        tf.reshape(learning_rate, [1]),
        tf.reshape(train_accuracy, [1])
    ]

    def host_call_fn(global_step, total_loss, cross_entropy,
                     regularization_loss, learning_rate, train_accuracy):

        global_step = global_step[0]
        with summary.create_file_writer(
                params['model_dir'],
                max_queue=params['iterations_per_loop']).as_default():
            with summary.always_record_summaries():
                summary.scalar('entire_loss', total_loss[0], step=global_step)
                summary.scalar('cross_entropy_loss',
                               cross_entropy[0],
                               step=global_step)
                summary.scalar('regularization_loss',
                               regularization_loss[0],
                               step=global_step)
                summary.scalar('learning_rate',
                               learning_rate[0],
                               step=global_step)
                summary.scalar('train_accuracy',
                               train_accuracy[0],
                               step=global_step)
                return summary.all_summary_ops()

    return tf.contrib.tpu.TPUEstimatorSpec(mode=mode,
                                           loss=total_loss,
                                           train_op=train_op,
                                           host_call=(host_call_fn,
                                                      tensors_to_summarize))

コード例 #17

ファイルを表示

ファイル: densenet_imagenet_bfloat16.py プロジェクト: vinhngx/tpu

def model_fn(features, labels, mode, params):
    """Our model_fn for Densenet to be used with our Estimator."""
    tf.logging.info("model_fn")

    with bfloat16.bfloat16_scope():
        if FLAGS.network_depth == 169:
            logits = densenet_model.densenet_imagenet_169(
                features, is_training=(mode == tf.estimator.ModeKeys.TRAIN))
        elif FLAGS.network_depth == 201:
            logits = densenet_model.densenet_imagenet_201(
                features, is_training=(mode == tf.estimator.ModeKeys.TRAIN))
        elif FLAGS.network_depth == 121:
            logits = densenet_model.densenet_imagenet_121(
                features, is_training=(mode == tf.estimator.ModeKeys.TRAIN))
        else:
            tf.logging.info("Number of layers not supported, revert to 121")
            logits = densenet_model.densenet_imagenet_121(
                features, is_training=(mode == tf.estimator.ModeKeys.TRAIN))
        logits = tf.cast(logits, tf.float32)

    # Calculate loss, which includes softmax cross entropy and L2 regularization.
    cross_entropy = tf.losses.softmax_cross_entropy(logits=logits,
                                                    onehot_labels=labels)

    # Add weight decay to the loss. We exclude weight decay on the batch
    # normalization variables because it slightly improves accuracy.
    loss = cross_entropy + _WEIGHT_DECAY * tf.add_n([
        tf.nn.l2_loss(v) for v in tf.trainable_variables()
        if "batch_normalization" not in v.name
    ])

    global_step = tf.train.get_global_step()
    current_epoch = (tf.cast(global_step, tf.float32) /
                     params["batches_per_epoch"])
    learning_rate = learning_rate_schedule(current_epoch)

    # TODO(chrisying): this is a hack to get the LR and epoch for Tensorboard.
    # Reimplement this when TPU training summaries are supported.
    lr_repeat = tf.reshape(
        tf.tile(tf.expand_dims(learning_rate, 0), [
            params["batch_size"],
        ]), [params["batch_size"], 1])
    ce_repeat = tf.reshape(
        tf.tile(tf.expand_dims(current_epoch, 0), [
            params["batch_size"],
        ]), [params["batch_size"], 1])

    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                               momentum=_MOMENTUM)
        optimizer = tpu_optimizer.CrossShardOptimizer(optimizer)

        # Batch norm requires update_ops to be added as a train_op dependency.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = optimizer.minimize(loss, global_step)
    else:
        train_op = None

    eval_metrics = None
    if mode == tf.estimator.ModeKeys.EVAL:

        def metric_fn(labels, logits, lr_repeat, ce_repeat):
            """Evaluation metric fn. Performed on CPU, do not reference TPU ops."""
            predictions = tf.argmax(logits, axis=1)
            accuracy = tf.metrics.accuracy(tf.argmax(labels, axis=1),
                                           predictions)
            lr = tf.metrics.mean(lr_repeat)
            ce = tf.metrics.mean(ce_repeat)
            return {
                "accuracy": accuracy,
                "learning_rate": lr,
                "current_epoch": ce
            }

        eval_metrics = (metric_fn, [labels, logits, lr_repeat, ce_repeat])

    return tpu_estimator.TPUEstimatorSpec(mode=mode,
                                          loss=loss,
                                          train_op=train_op,
                                          eval_metrics=eval_metrics)