Exemple #1
0
def _compute_losses_and_predictions_dicts(model,
                                          features,
                                          labels,
                                          add_regularization_loss=True):
    model_lib.provide_groundtruth(model, labels)
    preprocessed_images = features[fields.InputDataFields.image]
    prediction_dict = model.predict(
        preprocessed_images, features[fields.InputDataFields.true_image_shape],
        **model.get_side_inputs(features))
    prediction_dict = ops.bfloat16_to_float32_nested(prediction_dict)
    losses_dict = model.loss(prediction_dict,
                             features[fields.InputDataFields.true_image_shape])
    losses = [loss_tensor for loss_tensor in losses_dict.values()]
    if add_regularization_loss:
        regularization_losses = model.regularization_losses()
        if regularization_losses:
            regularization_losses = ops.bfloat16_to_float32_nested(
                regularization_losses)
            regularization_loss = tf.add_n(regularization_losses,
                                           name="regularization_loss")
            losses.append(regularization_loss)
            losses_dict["Loss/regularization_loss"] = regularization_loss
    total_loss = tf.add_n(losses, name="total_loss")
    losses_dict["Loss/total_loss"] = total_loss
    return losses_dict, prediction_dict
def _compute_losses_and_predictions_dicts(model,
                                          features,
                                          labels,
                                          add_regularization_loss=True,
                                          use_tpu=False,
                                          use_bfloat16=False):

    model_lib.provide_groundtruth(model, labels)
    preprocessed_images = features[fields.InputDataFields.image]

    # TODO(kaftan): Check how we're supposed to do this mixed precision stuff
    ## in TF2 TPUStrategy + Keras
    if use_tpu and use_bfloat16:
        with tf.contrib.tpu.bfloat16_scope():
            prediction_dict = model.predict(
                preprocessed_images,
                features[fields.InputDataFields.true_image_shape])
            prediction_dict = ops.bfloat16_to_float32_nested(prediction_dict)
    else:
        prediction_dict = model.predict(
            preprocessed_images,
            features[fields.InputDataFields.true_image_shape])

    losses_dict = model.loss(prediction_dict,
                             features[fields.InputDataFields.true_image_shape])
    losses = [loss_tensor for loss_tensor in losses_dict.values()]
    if add_regularization_loss:
        # TODO(kaftan): As we figure out mixed precision & bfloat 16, we may
        ## need to convert these regularization losses from bfloat16 to float32
        ## as well.
        regularization_losses = model.regularization_losses()
        if regularization_losses:
            regularization_loss = tf.add_n(regularization_losses,
                                           name='regularization_loss')
            losses.append(regularization_loss)
            losses_dict['Loss/regularization_loss'] = regularization_loss

    total_loss = tf.add_n(losses, name='total_loss')
    losses_dict['Loss/total_loss'] = total_loss

    return losses_dict, prediction_dict
Exemple #3
0
    def model_fn(features, labels, mode, params=None):
        """Constructs the object detection model.

    Args:
      features: Dictionary of feature tensors, returned from `input_fn`.
      labels: Dictionary of groundtruth tensors if mode is TRAIN or EVAL,
        otherwise None.
      mode: Mode key from tf.estimator.ModeKeys.
      params: Parameter dictionary passed from the estimator.

    Returns:
      An `EstimatorSpec` that encapsulates the model and its serving
        configurations.
    """
        params = params or {}
        total_loss, train_op, detections, export_outputs = None, None, None, None
        is_training = mode == tf.estimator.ModeKeys.TRAIN

        # Make sure to set the Keras learning phase. True during training,
        # False for inference.
        tf.keras.backend.set_learning_phase(is_training)
        # Set policy for mixed-precision training with Keras-based models.
        if use_tpu and train_config.use_bfloat16:
            from tensorflow.python.keras.engine import base_layer_utils  # pylint: disable=g-import-not-at-top
            # Enable v2 behavior, as `mixed_bfloat16` is only supported in TF 2.0.
            base_layer_utils.enable_v2_dtype_behavior()
            tf2.keras.mixed_precision.experimental.set_policy('mixed_bfloat16')
        detection_model = detection_model_fn(is_training=is_training,
                                             add_summaries=(not use_tpu))
        scaffold_fn = None

        if mode == tf.estimator.ModeKeys.TRAIN:
            labels = unstack_batch(labels,
                                   unpad_groundtruth_tensors=train_config.
                                   unpad_groundtruth_tensors)
        elif mode == tf.estimator.ModeKeys.EVAL:
            # For evaling on train data, it is necessary to check whether groundtruth
            # must be unpadded.
            boxes_shape = (labels[fields.InputDataFields.groundtruth_boxes].
                           get_shape().as_list())
            unpad_groundtruth_tensors = boxes_shape[
                1] is not None and not use_tpu
            labels = unstack_batch(
                labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors)

        if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
            provide_groundtruth(detection_model, labels)

        preprocessed_images = features[fields.InputDataFields.image]

        side_inputs = detection_model.get_side_inputs(features)

        if use_tpu and train_config.use_bfloat16:
            with tf.tpu.bfloat16_scope():
                prediction_dict = detection_model.predict(
                    preprocessed_images,
                    features[fields.InputDataFields.true_image_shape],
                    **side_inputs)
                prediction_dict = ops.bfloat16_to_float32_nested(
                    prediction_dict)
        else:
            prediction_dict = detection_model.predict(
                preprocessed_images,
                features[fields.InputDataFields.true_image_shape],
                **side_inputs)

        def postprocess_wrapper(args):
            return detection_model.postprocess(args[0], args[1])

        if mode in (tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT):
            if use_tpu and postprocess_on_cpu:
                detections = tf.tpu.outside_compilation(
                    postprocess_wrapper,
                    (prediction_dict,
                     features[fields.InputDataFields.true_image_shape]))
            else:
                detections = postprocess_wrapper(
                    (prediction_dict,
                     features[fields.InputDataFields.true_image_shape]))

        if mode == tf.estimator.ModeKeys.TRAIN:
            load_pretrained = hparams.load_pretrained if hparams else False
            if train_config.fine_tune_checkpoint and load_pretrained:
                if not train_config.fine_tune_checkpoint_type:
                    # train_config.from_detection_checkpoint field is deprecated. For
                    # backward compatibility, set train_config.fine_tune_checkpoint_type
                    # based on train_config.from_detection_checkpoint.
                    if train_config.from_detection_checkpoint:
                        train_config.fine_tune_checkpoint_type = 'detection'
                    else:
                        train_config.fine_tune_checkpoint_type = 'classification'
                asg_map = detection_model.restore_map(
                    fine_tune_checkpoint_type=train_config.
                    fine_tune_checkpoint_type,
                    load_all_detection_checkpoint_vars=(
                        train_config.load_all_detection_checkpoint_vars))
                available_var_map = (
                    variables_helper.get_variables_available_in_checkpoint(
                        asg_map,
                        train_config.fine_tune_checkpoint,
                        include_global_step=False))
                if use_tpu:

                    def tpu_scaffold():
                        tf.train.init_from_checkpoint(
                            train_config.fine_tune_checkpoint,
                            available_var_map)
                        return tf.train.Scaffold()

                    scaffold_fn = tpu_scaffold
                else:
                    tf.train.init_from_checkpoint(
                        train_config.fine_tune_checkpoint, available_var_map)

        if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
            if (mode == tf.estimator.ModeKeys.EVAL
                    and eval_config.use_dummy_loss_in_eval):
                total_loss = tf.constant(1.0)
                losses_dict = {'Loss/total_loss': total_loss}
            else:
                losses_dict = detection_model.loss(
                    prediction_dict,
                    features[fields.InputDataFields.true_image_shape])
                losses = [loss_tensor for loss_tensor in losses_dict.values()]
                if train_config.add_regularization_loss:
                    regularization_losses = detection_model.regularization_losses(
                    )
                    if use_tpu and train_config.use_bfloat16:
                        regularization_losses = ops.bfloat16_to_float32_nested(
                            regularization_losses)
                    if regularization_losses:
                        regularization_loss = tf.add_n(
                            regularization_losses, name='regularization_loss')
                        losses.append(regularization_loss)
                        losses_dict[
                            'Loss/regularization_loss'] = regularization_loss
                total_loss = tf.add_n(losses, name='total_loss')
                losses_dict['Loss/total_loss'] = total_loss

            if 'graph_rewriter_config' in configs:
                graph_rewriter_fn = graph_rewriter_builder.build(
                    configs['graph_rewriter_config'], is_training=is_training)
                graph_rewriter_fn()

            # TODO(rathodv): Stop creating optimizer summary vars in EVAL mode once we
            # can write learning rate summaries on TPU without host calls.
            global_step = tf.train.get_or_create_global_step()
            training_optimizer, optimizer_summary_vars = optimizer_builder.build(
                train_config.optimizer)

        if mode == tf.estimator.ModeKeys.TRAIN:
            if use_tpu:
                training_optimizer = tf.tpu.CrossShardOptimizer(
                    training_optimizer)

            # Optionally freeze some layers by setting their gradients to be zero.
            trainable_variables = None
            include_variables = (train_config.update_trainable_variables
                                 if train_config.update_trainable_variables
                                 else None)
            exclude_variables = (train_config.freeze_variables
                                 if train_config.freeze_variables else None)
            trainable_variables = slim.filter_variables(
                tf.trainable_variables(),
                include_patterns=include_variables,
                exclude_patterns=exclude_variables)

            clip_gradients_value = None
            if train_config.gradient_clipping_by_norm > 0:
                clip_gradients_value = train_config.gradient_clipping_by_norm

            if not use_tpu:
                for var in optimizer_summary_vars:
                    tf.summary.scalar(var.op.name, var)
            summaries = [] if use_tpu else None
            if train_config.summarize_gradients:
                summaries = [
                    'gradients', 'gradient_norm', 'global_gradient_norm'
                ]
            train_op = slim.optimizers.optimize_loss(
                loss=total_loss,
                global_step=global_step,
                learning_rate=None,
                clip_gradients=clip_gradients_value,
                optimizer=training_optimizer,
                update_ops=detection_model.updates(),
                variables=trainable_variables,
                summaries=summaries,
                name='')  # Preventing scope prefix on all variables.

        if mode == tf.estimator.ModeKeys.PREDICT:
            exported_output = exporter_lib.add_output_tensor_nodes(detections)
            export_outputs = {
                tf.saved_model.signature_constants.PREDICT_METHOD_NAME:
                tf.estimator.export.PredictOutput(exported_output)
            }

        eval_metric_ops = None
        scaffold = None
        if mode == tf.estimator.ModeKeys.EVAL:
            class_agnostic = (fields.DetectionResultFields.detection_classes
                              not in detections)
            groundtruth = _prepare_groundtruth_for_eval(
                detection_model, class_agnostic,
                eval_input_config.max_number_of_boxes)
            use_original_images = fields.InputDataFields.original_image in features
            if use_original_images:
                eval_images = features[fields.InputDataFields.original_image]
                true_image_shapes = tf.slice(
                    features[fields.InputDataFields.true_image_shape], [0, 0],
                    [-1, 3])
                original_image_spatial_shapes = features[
                    fields.InputDataFields.original_image_spatial_shape]
            else:
                eval_images = features[fields.InputDataFields.image]
                true_image_shapes = None
                original_image_spatial_shapes = None

            eval_dict = eval_util.result_dict_for_batched_example(
                eval_images,
                features[inputs.HASH_KEY],
                detections,
                groundtruth,
                class_agnostic=class_agnostic,
                scale_to_absolute=True,
                original_image_spatial_shapes=original_image_spatial_shapes,
                true_image_shapes=true_image_shapes)

            if fields.InputDataFields.image_additional_channels in features:
                eval_dict[fields.InputDataFields.
                          image_additional_channels] = features[
                              fields.InputDataFields.image_additional_channels]

            if class_agnostic:
                category_index = label_map_util.create_class_agnostic_category_index(
                )
            else:
                category_index = label_map_util.create_category_index_from_labelmap(
                    eval_input_config.label_map_path)
            vis_metric_ops = None
            if not use_tpu and use_original_images:
                keypoint_edges = [(kp.start, kp.end)
                                  for kp in eval_config.keypoint_edge]

                eval_metric_op_vis = vis_utils.VisualizeSingleFrameDetections(
                    category_index,
                    max_examples_to_draw=eval_config.num_visualizations,
                    max_boxes_to_draw=eval_config.max_num_boxes_to_visualize,
                    min_score_thresh=eval_config.min_score_threshold,
                    use_normalized_coordinates=False,
                    keypoint_edges=keypoint_edges or None)
                vis_metric_ops = eval_metric_op_vis.get_estimator_eval_metric_ops(
                    eval_dict)

            # Eval metrics on a single example.
            eval_metric_ops = eval_util.get_eval_metric_ops_for_evaluators(
                eval_config, list(category_index.values()), eval_dict)
            for loss_key, loss_tensor in iter(losses_dict.items()):
                eval_metric_ops[loss_key] = tf.metrics.mean(loss_tensor)
            for var in optimizer_summary_vars:
                eval_metric_ops[var.op.name] = (var, tf.no_op())
            if vis_metric_ops is not None:
                eval_metric_ops.update(vis_metric_ops)
            eval_metric_ops = {str(k): v for k, v in eval_metric_ops.items()}

            if eval_config.use_moving_averages:
                variable_averages = tf.train.ExponentialMovingAverage(0.0)
                variables_to_restore = variable_averages.variables_to_restore()
                keep_checkpoint_every_n_hours = (
                    train_config.keep_checkpoint_every_n_hours)
                saver = tf.train.Saver(
                    variables_to_restore,
                    keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours
                )
                scaffold = tf.train.Scaffold(saver=saver)

        # EVAL executes on CPU, so use regular non-TPU EstimatorSpec.
        if use_tpu and mode != tf.estimator.ModeKeys.EVAL:
            return tf.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                scaffold_fn=scaffold_fn,
                predictions=detections,
                loss=total_loss,
                train_op=train_op,
                eval_metrics=eval_metric_ops,
                export_outputs=export_outputs)
        else:
            if scaffold is None:
                keep_checkpoint_every_n_hours = (
                    train_config.keep_checkpoint_every_n_hours)
                saver = tf.train.Saver(
                    sharded=True,
                    keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours,
                    save_relative_paths=True)
                tf.add_to_collection(tf.GraphKeys.SAVERS, saver)
                scaffold = tf.train.Scaffold(saver=saver)
            return tf.estimator.EstimatorSpec(mode=mode,
                                              predictions=detections,
                                              loss=total_loss,
                                              train_op=train_op,
                                              eval_metric_ops=eval_metric_ops,
                                              export_outputs=export_outputs,
                                              scaffold=scaffold)
def _compute_losses_and_predictions_dicts(
        model, features, labels,
        add_regularization_loss=True):
    """Computes the losses dict and predictions dict for a model on inputs.
    Args:
      model: a DetectionModel (based on Keras).
      features: Dictionary of feature tensors from the input dataset.
        Should be in the format output by `inputs.train_input` and
        `inputs.eval_input`.
          features[fields.InputDataFields.image] is a [batch_size, H, W, C]
            float32 tensor with preprocessed images.
          features[HASH_KEY] is a [batch_size] int32 tensor representing unique
            identifiers for the images.
          features[fields.InputDataFields.true_image_shape] is a [
              batch_size, 3]
            int32 tensor representing the true image shapes, as preprocessed
            images could be padded.
          features[fields.InputDataFields.original_image] (optional) is a
            [batch_size, H, W, C] float32 tensor with original images.
      labels: A dictionary of groundtruth tensors post-unstacking. The original
        labels are of the form returned by `inputs.train_input` and
        `inputs.eval_input`. The shapes may have been modified by unstacking with
        `model_lib.unstack_batch`. However, the dictionary includes the following
        fields.
          labels[fields.InputDataFields.num_groundtruth_boxes] is a
            int32 tensor indicating the number of valid groundtruth boxes
            per image.
          labels[fields.InputDataFields.groundtruth_boxes] is a float32 tensor
            containing the corners of the groundtruth boxes.
          labels[fields.InputDataFields.groundtruth_classes] is a float32
            one-hot tensor of classes.
          labels[fields.InputDataFields.groundtruth_weights] is a float32 tensor
            containing groundtruth weights for the boxes.
          -- Optional --
          labels[fields.InputDataFields.groundtruth_instance_masks] is a
            float32 tensor containing only binary values, which represent
            instance masks for objects.
          labels[fields.InputDataFields.groundtruth_keypoints] is a
            float32 tensor containing keypoints for each box.
          labels[fields.InputDataFields.groundtruth_dp_num_points] is an int32
            tensor with the number of sampled DensePose points per object.
          labels[fields.InputDataFields.groundtruth_dp_part_ids] is an int32
            tensor with the DensePose part ids (0-indexed) per object.
          labels[fields.InputDataFields.groundtruth_dp_surface_coords] is a
            float32 tensor with the DensePose surface coordinates.
          labels[fields.InputDataFields.groundtruth_group_of] is a tf.bool tensor
            containing group_of annotations.
          labels[fields.InputDataFields.groundtruth_labeled_classes] is a float32
            k-hot tensor of classes.
          labels[fields.InputDataFields.groundtruth_track_ids] is a int32
            tensor of track IDs.
          labels[fields.InputDataFields.groundtruth_keypoint_depths] is a
            float32 tensor containing keypoint depths information.
          labels[fields.InputDataFields.groundtruth_keypoint_depth_weights] is a
            float32 tensor containing the weights of the keypoint depth feature.
      add_regularization_loss: Whether or not to include the model's
        regularization loss in the losses dictionary.
    Returns:
      A tuple containing the losses dictionary (with the total loss under
      the key 'Loss/total_loss'), and the predictions dictionary produced by
      `model.predict`.
    """
    model_lib.provide_groundtruth(model, labels)
    preprocessed_images = features[fields.InputDataFields.image]

    prediction_dict = model.predict(
        preprocessed_images,
        features[fields.InputDataFields.true_image_shape],
        **model.get_side_inputs(features))
    prediction_dict = ops.bfloat16_to_float32_nested(prediction_dict)

    losses_dict = model.loss(
        prediction_dict, features[fields.InputDataFields.true_image_shape])
    losses = [loss_tensor for loss_tensor in losses_dict.values()]
    if add_regularization_loss:
        # TODO(kaftan): As we figure out mixed precision & bfloat 16, we may
        # need to convert these regularization losses from bfloat16 to float32
        # as well.
        regularization_losses = model.regularization_losses()
        if regularization_losses:
            regularization_losses = ops.bfloat16_to_float32_nested(
                regularization_losses)
            regularization_loss = tf.add_n(
                regularization_losses, name='regularization_loss')
            losses.append(regularization_loss)
            losses_dict['Loss/regularization_loss'] = regularization_loss

    total_loss = tf.add_n(losses, name='total_loss')
    losses_dict['Loss/total_loss'] = total_loss

    return losses_dict, prediction_dict
Exemple #5
0
def _compute_losses_and_predictions_dicts(model,
                                          features,
                                          labels,
                                          add_regularization_loss=True,
                                          use_tpu=False,
                                          use_bfloat16=False):
    """Computes the losses dict and predictions dict for a model on inputs.

  Args:
    model: a DetectionModel (based on Keras).
    features: Dictionary of feature tensors from the input dataset.
      Should be in the format output by `inputs.train_input` and
      `inputs.eval_input`.
        features[fields.InputDataFields.image] is a [batch_size, H, W, C]
          float32 tensor with preprocessed images.
        features[HASH_KEY] is a [batch_size] int32 tensor representing unique
          identifiers for the images.
        features[fields.InputDataFields.true_image_shape] is a [batch_size, 3]
          int32 tensor representing the true image shapes, as preprocessed
          images could be padded.
        features[fields.InputDataFields.original_image] (optional) is a
          [batch_size, H, W, C] float32 tensor with original images.
    labels: A dictionary of groundtruth tensors post-unstacking. The original
      labels are of the form returned by `inputs.train_input` and
      `inputs.eval_input`. The shapes may have been modified by unstacking with
      `model_lib.unstack_batch`. However, the dictionary includes the following
      fields.
        labels[fields.InputDataFields.num_groundtruth_boxes] is a
          int32 tensor indicating the number of valid groundtruth boxes
          per image.
        labels[fields.InputDataFields.groundtruth_boxes] is a float32 tensor
          containing the corners of the groundtruth boxes.
        labels[fields.InputDataFields.groundtruth_classes] is a float32
          one-hot tensor of classes.
        labels[fields.InputDataFields.groundtruth_weights] is a float32 tensor
          containing groundtruth weights for the boxes.
        -- Optional --
        labels[fields.InputDataFields.groundtruth_instance_masks] is a
          float32 tensor containing only binary values, which represent
          instance masks for objects.
        labels[fields.InputDataFields.groundtruth_keypoints] is a
          float32 tensor containing keypoints for each box.
    add_regularization_loss: Whether or not to include the model's
      regularization loss in the losses dictionary.
    use_tpu: Whether computation should happen on a TPU.
    use_bfloat16: Whether computation on a TPU should use bfloat16.

  Returns:
    A tuple containing the losses dictionary (with the total loss under
    the key 'Loss/total_loss'), and the predictions dictionary produced by
    `model.predict`.

  """
    model_lib.provide_groundtruth(model, labels)
    preprocessed_images = features[fields.InputDataFields.image]

    # TODO(kaftan): Check how we're supposed to do this mixed precision stuff
    ## in TF2 TPUStrategy + Keras
    if use_tpu and use_bfloat16:
        with tf.contrib.tpu.bfloat16_scope():
            prediction_dict = model.predict(
                preprocessed_images,
                features[fields.InputDataFields.true_image_shape])
            prediction_dict = ops.bfloat16_to_float32_nested(prediction_dict)
    else:
        prediction_dict = model.predict(
            preprocessed_images,
            features[fields.InputDataFields.true_image_shape])

    losses_dict = model.loss(prediction_dict,
                             features[fields.InputDataFields.true_image_shape])
    losses = [loss_tensor for loss_tensor in losses_dict.values()]
    if add_regularization_loss:
        # TODO(kaftan): As we figure out mixed precision & bfloat 16, we may
        ## need to convert these regularization losses from bfloat16 to float32
        ## as well.
        regularization_losses = model.regularization_losses()
        if regularization_losses:
            regularization_loss = tf.add_n(regularization_losses,
                                           name='regularization_loss')
            losses.append(regularization_loss)
            losses_dict['Loss/regularization_loss'] = regularization_loss

    total_loss = tf.add_n(losses, name='total_loss')
    losses_dict['Loss/total_loss'] = total_loss

    return losses_dict, prediction_dict