def _make_evaluation_dict(self, resized_groundtruth_masks=False):
        input_data_fields = fields.InputDataFields
        detection_fields = fields.DetectionResultFields

        image = tf.zeros(shape=[1, 20, 20, 3], dtype=tf.uint8)
        key = tf.constant('image1')
        detection_boxes = tf.constant([[[0., 0., 1., 1.]]])
        detection_scores = tf.constant([[0.8]])
        detection_classes = tf.constant([[0]])
        detection_masks = tf.ones(shape=[1, 1, 20, 20], dtype=tf.float32)
        num_detections = tf.constant([1])
        groundtruth_boxes = tf.constant([[0., 0., 1., 1.]])
        groundtruth_classes = tf.constant([1])
        groundtruth_instance_masks = tf.ones(shape=[1, 20, 20], dtype=tf.uint8)
        if resized_groundtruth_masks:
            groundtruth_instance_masks = tf.ones(shape=[1, 10, 10],
                                                 dtype=tf.uint8)
        detections = {
            detection_fields.detection_boxes: detection_boxes,
            detection_fields.detection_scores: detection_scores,
            detection_fields.detection_classes: detection_classes,
            detection_fields.detection_masks: detection_masks,
            detection_fields.num_detections: num_detections
        }
        groundtruth = {
            input_data_fields.groundtruth_boxes:
            groundtruth_boxes,
            input_data_fields.groundtruth_classes:
            groundtruth_classes,
            input_data_fields.groundtruth_instance_masks:
            groundtruth_instance_masks
        }
        return eval_util.result_dict_for_single_example(
            image, key, detections, groundtruth)
예제 #2
0
  def _make_evaluation_dict(self, resized_groundtruth_masks=False):
    input_data_fields = fields.InputDataFields
    detection_fields = fields.DetectionResultFields

    image = tf.zeros(shape=[1, 20, 20, 3], dtype=tf.uint8)
    key = tf.constant('image1')
    detection_boxes = tf.constant([[[0., 0., 1., 1.]]])
    detection_scores = tf.constant([[0.8]])
    detection_classes = tf.constant([[0]])
    detection_masks = tf.ones(shape=[1, 1, 20, 20], dtype=tf.float32)
    num_detections = tf.constant([1])
    groundtruth_boxes = tf.constant([[0., 0., 1., 1.]])
    groundtruth_classes = tf.constant([1])
    groundtruth_instance_masks = tf.ones(shape=[1, 20, 20], dtype=tf.uint8)
    if resized_groundtruth_masks:
      groundtruth_instance_masks = tf.ones(shape=[1, 10, 10], dtype=tf.uint8)
    detections = {
        detection_fields.detection_boxes: detection_boxes,
        detection_fields.detection_scores: detection_scores,
        detection_fields.detection_classes: detection_classes,
        detection_fields.detection_masks: detection_masks,
        detection_fields.num_detections: num_detections
    }
    groundtruth = {
        input_data_fields.groundtruth_boxes: groundtruth_boxes,
        input_data_fields.groundtruth_classes: groundtruth_classes,
        input_data_fields.groundtruth_instance_masks: groundtruth_instance_masks
    }
    return eval_util.result_dict_for_single_example(image, key, detections,
                                                    groundtruth)
예제 #3
0
  def _make_evaluation_dict(self,
                            resized_groundtruth_masks=False,
                            batch_size=1,
                            max_gt_boxes=None,
                            scale_to_absolute=False):
    input_data_fields = fields.InputDataFields
    detection_fields = fields.DetectionResultFields

    image = tf.zeros(shape=[batch_size, 20, 20, 3], dtype=tf.uint8)
    if batch_size == 1:
      key = tf.constant('image1')
    else:
      key = tf.constant([str(i) for i in range(batch_size)])
    detection_boxes = tf.tile(tf.constant([[[0., 0., 1., 1.]]]),
                              multiples=[batch_size, 1, 1])
    detection_scores = tf.tile(tf.constant([[0.8]]), multiples=[batch_size, 1])
    detection_classes = tf.tile(tf.constant([[0]]), multiples=[batch_size, 1])
    detection_masks = tf.tile(tf.ones(shape=[1, 1, 20, 20], dtype=tf.float32),
                              multiples=[batch_size, 1, 1, 1])
    num_detections = tf.ones([batch_size])
    groundtruth_boxes = tf.constant([[0., 0., 1., 1.]])
    groundtruth_classes = tf.constant([1])
    groundtruth_instance_masks = tf.ones(shape=[1, 20, 20], dtype=tf.uint8)
    if resized_groundtruth_masks:
      groundtruth_instance_masks = tf.ones(shape=[1, 10, 10], dtype=tf.uint8)

    if batch_size > 1:
      groundtruth_boxes = tf.tile(tf.expand_dims(groundtruth_boxes, 0),
                                  multiples=[batch_size, 1, 1])
      groundtruth_classes = tf.tile(tf.expand_dims(groundtruth_classes, 0),
                                    multiples=[batch_size, 1])
      groundtruth_instance_masks = tf.tile(
          tf.expand_dims(groundtruth_instance_masks, 0),
          multiples=[batch_size, 1, 1, 1])

    detections = {
        detection_fields.detection_boxes: detection_boxes,
        detection_fields.detection_scores: detection_scores,
        detection_fields.detection_classes: detection_classes,
        detection_fields.detection_masks: detection_masks,
        detection_fields.num_detections: num_detections
    }
    groundtruth = {
        input_data_fields.groundtruth_boxes: groundtruth_boxes,
        input_data_fields.groundtruth_classes: groundtruth_classes,
        input_data_fields.groundtruth_instance_masks: groundtruth_instance_masks
    }
    if batch_size > 1:
      return eval_util.result_dict_for_batched_example(
          image, key, detections, groundtruth,
          scale_to_absolute=scale_to_absolute,
          max_gt_boxes=max_gt_boxes)
    else:
      return eval_util.result_dict_for_single_example(
          image, key, detections, groundtruth,
          scale_to_absolute=scale_to_absolute)
예제 #4
0
def _extract_prediction_tensors(model,
                                create_input_dict_fn,
                                ignore_groundtruth=False):
    """Restores the model in a tensorflow session.

  Args:
    model: model to perform predictions with.
    create_input_dict_fn: function to create input tensor dictionaries.
    ignore_groundtruth: whether groundtruth should be ignored.

  Returns:
    tensor_dict: A tensor dictionary with evaluations.
  """
    input_dict = create_input_dict_fn()
    prefetch_queue = prefetcher.prefetch(input_dict, capacity=500)
    input_dict = prefetch_queue.dequeue()
    original_image = tf.expand_dims(input_dict[fields.InputDataFields.image],
                                    0)
    original_audio = tf.expand_dims(input_dict[fields.InputDataFields.audio],
                                    0)
    preprocessed_image = model.preprocess(tf.to_float(original_image), False)
    preprocessed_audio = model.preprocess(tf.to_float(original_audio), True)
    prediction_dict = model.predict(preprocessed_image, preprocessed_audio)
    detections = model.postprocess(prediction_dict)

    groundtruth = None
    if not ignore_groundtruth:
        groundtruth = {
            fields.InputDataFields.groundtruth_boxes:
            input_dict[fields.InputDataFields.groundtruth_boxes],
            fields.InputDataFields.groundtruth_classes:
            input_dict[fields.InputDataFields.groundtruth_classes],
            fields.InputDataFields.groundtruth_area:
            input_dict[fields.InputDataFields.groundtruth_area],
            fields.InputDataFields.groundtruth_is_crowd:
            input_dict[fields.InputDataFields.groundtruth_is_crowd],
            fields.InputDataFields.groundtruth_difficult:
            input_dict[fields.InputDataFields.groundtruth_difficult],
            fields.InputDataFields.groundtruth_image_classes:
            input_dict[fields.InputDataFields.groundtruth_image_classes]
        }
        if fields.InputDataFields.groundtruth_group_of in input_dict:
            groundtruth[fields.InputDataFields.groundtruth_group_of] = (
                input_dict[fields.InputDataFields.groundtruth_group_of])
        if fields.DetectionResultFields.detection_masks in detections:
            groundtruth[fields.InputDataFields.groundtruth_instance_masks] = (
                input_dict[fields.InputDataFields.groundtruth_instance_masks])

    return eval_util.result_dict_for_single_example(
        original_image,
        input_dict[fields.InputDataFields.source_id],
        detections,
        groundtruth,
        class_agnostic=(fields.DetectionResultFields.detection_classes
                        not in detections),
        scale_to_absolute=True)
예제 #5
0
def _extract_prediction_tensors(model,
                                create_input_dict_fn,
                                ignore_groundtruth=False):
  """Restores the model in a tensorflow session.

  Args:
    model: model to perform predictions with.
    create_input_dict_fn: function to create input tensor dictionaries.
    ignore_groundtruth: whether groundtruth should be ignored.

  Returns:
    tensor_dict: A tensor dictionary with evaluations.
  """
  input_dict = create_input_dict_fn()
  prefetch_queue = prefetcher.prefetch(input_dict, capacity=500)
  input_dict = prefetch_queue.dequeue()
  original_image = tf.expand_dims(input_dict[fields.InputDataFields.image], 0)
  preprocessed_image, true_image_shapes = model.preprocess(
      tf.to_float(original_image))
  prediction_dict = model.predict(preprocessed_image, true_image_shapes)
  detections = model.postprocess(prediction_dict, true_image_shapes)

  groundtruth = None
  if not ignore_groundtruth:
    groundtruth = {
        fields.InputDataFields.groundtruth_boxes:
            input_dict[fields.InputDataFields.groundtruth_boxes],
        fields.InputDataFields.groundtruth_classes:
            input_dict[fields.InputDataFields.groundtruth_classes],
        fields.InputDataFields.groundtruth_area:
            input_dict[fields.InputDataFields.groundtruth_area],
        fields.InputDataFields.groundtruth_is_crowd:
            input_dict[fields.InputDataFields.groundtruth_is_crowd],
        fields.InputDataFields.groundtruth_difficult:
            input_dict[fields.InputDataFields.groundtruth_difficult]
    }
    if fields.InputDataFields.groundtruth_group_of in input_dict:
      groundtruth[fields.InputDataFields.groundtruth_group_of] = (
          input_dict[fields.InputDataFields.groundtruth_group_of])
    if fields.DetectionResultFields.detection_masks in detections:
      groundtruth[fields.InputDataFields.groundtruth_instance_masks] = (
          input_dict[fields.InputDataFields.groundtruth_instance_masks])

  return eval_util.result_dict_for_single_example(
      original_image,
      input_dict[fields.InputDataFields.source_id],
      detections,
      groundtruth,
      class_agnostic=(
          fields.DetectionResultFields.detection_classes not in detections),
      scale_to_absolute=True)
예제 #6
0
def _extract_predictions_and_losses(model,
                                    create_input_dict_fn,
                                    ignore_groundtruth=False):
    """Constructs tensorflow detection graph and returns output tensors.

  Args:
    model: model to perform predictions with.
    create_input_dict_fn: function to create input tensor dictionaries.
    ignore_groundtruth: whether groundtruth should be ignored.

  Returns:
    prediction_groundtruth_dict: A dictionary with postprocessed tensors (keyed
      by standard_fields.DetectionResultsFields) and optional groundtruth
      tensors (keyed by standard_fields.InputDataFields).
    losses_dict: A dictionary containing detection losses. This is empty when
      ignore_groundtruth is true.
  """
    input_dict = create_input_dict_fn()
    prefetch_queue = prefetcher.prefetch(input_dict, capacity=500)
    input_dict = prefetch_queue.dequeue()
    original_image = tf.expand_dims(input_dict[fields.InputDataFields.image],
                                    0)
    preprocessed_image, true_image_shapes = model.preprocess(
        tf.cast(original_image, dtype=tf.float32))
    prediction_dict = model.predict(preprocessed_image, true_image_shapes)
    detections = model.postprocess(prediction_dict, true_image_shapes)

    groundtruth = None
    losses_dict = {}
    if not ignore_groundtruth:
        groundtruth = {
            fields.InputDataFields.groundtruth_boxes:
            input_dict[fields.InputDataFields.groundtruth_boxes],
            fields.InputDataFields.groundtruth_classes:
            input_dict[fields.InputDataFields.groundtruth_classes],
            fields.InputDataFields.groundtruth_area:
            input_dict[fields.InputDataFields.groundtruth_area],
            fields.InputDataFields.groundtruth_is_crowd:
            input_dict[fields.InputDataFields.groundtruth_is_crowd],
            fields.InputDataFields.groundtruth_difficult:
            input_dict[fields.InputDataFields.groundtruth_difficult]
        }
        if fields.InputDataFields.groundtruth_group_of in input_dict:
            groundtruth[fields.InputDataFields.groundtruth_group_of] = (
                input_dict[fields.InputDataFields.groundtruth_group_of])
        groundtruth_masks_list = None
        if fields.DetectionResultFields.detection_masks in detections:
            groundtruth[fields.InputDataFields.groundtruth_instance_masks] = (
                input_dict[fields.InputDataFields.groundtruth_instance_masks])
            groundtruth_masks_list = [
                input_dict[fields.InputDataFields.groundtruth_instance_masks]
            ]
        groundtruth_keypoints_list = None
        if fields.DetectionResultFields.detection_keypoints in detections:
            groundtruth[fields.InputDataFields.groundtruth_keypoints] = (
                input_dict[fields.InputDataFields.groundtruth_keypoints])
            groundtruth_keypoints_list = [
                input_dict[fields.InputDataFields.groundtruth_keypoints]
            ]
        label_id_offset = 1
        model.provide_groundtruth(
            [input_dict[fields.InputDataFields.groundtruth_boxes]], [
                tf.one_hot(
                    input_dict[fields.InputDataFields.groundtruth_classes] -
                    label_id_offset,
                    depth=model.num_classes)
            ],
            groundtruth_masks_list=groundtruth_masks_list,
            groundtruth_keypoints_list=groundtruth_keypoints_list)
        losses_dict.update(model.loss(prediction_dict, true_image_shapes))

    result_dict = eval_util.result_dict_for_single_example(
        original_image,
        input_dict[fields.InputDataFields.source_id],
        detections,
        groundtruth,
        class_agnostic=(fields.DetectionResultFields.detection_classes
                        not in detections),
        scale_to_absolute=True)
    return result_dict, losses_dict
예제 #7
0
def _extract_prediction_tensors(model,
                                create_input_dict_fn,
                                ignore_groundtruth=False):
  """Restores the model in a tensorflow session.

  Args:
    model: model to perform predictions with.
    create_input_dict_fn: function to create input tensor dictionaries.
    ignore_groundtruth: whether groundtruth should be ignored.


  Returns:
    tensor_dict: A tensor dictionary with evaluations.
  """
  input_dict = create_input_dict_fn()
  batch = None
  if 'batch' in input_dict:
    batch = input_dict.pop('batch')
  else:
    prefetch_queue = prefetcher.prefetch(input_dict, capacity=500)
    input_dict = prefetch_queue.dequeue()
    # consistent format for images and videos
    for key, value in input_dict.iteritems():
      input_dict[key] = (value,)

  detections = _create_detection_op(model, input_dict, batch)

  # Print out anaylsis of the model.
  tf.contrib.tfprof.model_analyzer.print_model_analysis(
      tf.get_default_graph(),
      tfprof_options=tf.contrib.tfprof.model_analyzer.
      TRAINABLE_VARS_PARAMS_STAT_OPTIONS)
  tf.contrib.tfprof.model_analyzer.print_model_analysis(
      tf.get_default_graph(),
      tfprof_options=tf.contrib.tfprof.model_analyzer.FLOAT_OPS_OPTIONS)

  num_frames = len(input_dict[fields.InputDataFields.image])
  ret = []
  for i in range(num_frames):
    original_image = tf.expand_dims(input_dict[fields.InputDataFields.image][i],
                                    0)
    groundtruth = None
    if not ignore_groundtruth:
      groundtruth = {
          fields.InputDataFields.groundtruth_boxes:
              input_dict[fields.InputDataFields.groundtruth_boxes][i],
          fields.InputDataFields.groundtruth_classes:
              input_dict[fields.InputDataFields.groundtruth_classes][i],
      }
      optional_keys = (
          fields.InputDataFields.groundtruth_area,
          fields.InputDataFields.groundtruth_is_crowd,
          fields.InputDataFields.groundtruth_difficult,
          fields.InputDataFields.groundtruth_group_of,
      )
      for opt_key in optional_keys:
        if opt_key in input_dict:
          groundtruth[opt_key] = input_dict[opt_key][i]
      if fields.DetectionResultFields.detection_masks in detections:
        groundtruth[fields.InputDataFields.groundtruth_instance_masks] = (
            input_dict[fields.InputDataFields.groundtruth_instance_masks][i])

    detections_frame = {
        key: tf.expand_dims(value[i], 0)
        for key, value in detections.iteritems()
    }

    source_id = (
        batch.key[0] if batch is not None else
        input_dict[fields.InputDataFields.source_id][i])
    ret.append(
        eval_util.result_dict_for_single_example(
            original_image,
            source_id,
            detections_frame,
            groundtruth,
            class_agnostic=(fields.DetectionResultFields.detection_classes
                            not in detections),
            scale_to_absolute=True))
  return ret
예제 #8
0
  def model_fn(features, labels, mode, params=None):
    """Constructs the object detection model.

    Args:
      features: Dictionary of feature tensors, returned from `input_fn`.
      labels: Dictionary of groundtruth tensors if mode is TRAIN or EVAL,
        otherwise None.
      mode: Mode key from tf.estimator.ModeKeys.
      params: Parameter dictionary passed from the estimator.

    Returns:
      An `EstimatorSpec` that encapsulates the model and its serving
        configurations.
    """
    params = params or {}
    total_loss, train_op, detections, export_outputs = None, None, None, None
    is_training = mode == tf.estimator.ModeKeys.TRAIN
    detection_model = detection_model_fn(is_training=is_training,
                                         add_summaries=(not use_tpu))
    scaffold_fn = None

    if mode == tf.estimator.ModeKeys.TRAIN:
      labels = unstack_batch(
          labels,
          unpad_groundtruth_tensors=train_config.unpad_groundtruth_tensors)
    elif mode == tf.estimator.ModeKeys.EVAL:
      labels = unstack_batch(labels, unpad_groundtruth_tensors=False)

    if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
      gt_boxes_list = labels[fields.InputDataFields.groundtruth_boxes]
      gt_classes_list = labels[fields.InputDataFields.groundtruth_classes]
      gt_masks_list = None
      if fields.InputDataFields.groundtruth_instance_masks in labels:
        gt_masks_list = labels[
            fields.InputDataFields.groundtruth_instance_masks]
      gt_keypoints_list = None
      if fields.InputDataFields.groundtruth_keypoints in labels:
        gt_keypoints_list = labels[fields.InputDataFields.groundtruth_keypoints]
      detection_model.provide_groundtruth(
          groundtruth_boxes_list=gt_boxes_list,
          groundtruth_classes_list=gt_classes_list,
          groundtruth_masks_list=gt_masks_list,
          groundtruth_keypoints_list=gt_keypoints_list)

    preprocessed_images = features[fields.InputDataFields.image]
    prediction_dict = detection_model.predict(
        preprocessed_images, features[fields.InputDataFields.true_image_shape])
    detections = detection_model.postprocess(
        prediction_dict, features[fields.InputDataFields.true_image_shape])

    if mode == tf.estimator.ModeKeys.TRAIN:
      if train_config.fine_tune_checkpoint and hparams.load_pretrained:
        asg_map = detection_model.restore_map(
            from_detection_checkpoint=train_config.from_detection_checkpoint,
            load_all_detection_checkpoint_vars=(
                train_config.load_all_detection_checkpoint_vars))
        available_var_map = (
            variables_helper.get_variables_available_in_checkpoint(
                asg_map, train_config.fine_tune_checkpoint,
                include_global_step=False))
        if use_tpu:
          def tpu_scaffold():
            tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint,
                                          available_var_map)
            return tf.train.Scaffold()
          scaffold_fn = tpu_scaffold
        else:
          tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint,
                                        available_var_map)

    if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
      losses_dict = detection_model.loss(
          prediction_dict, features[fields.InputDataFields.true_image_shape])
      losses = [loss_tensor for loss_tensor in losses_dict.itervalues()]
      total_loss = tf.add_n(losses, name='total_loss')

    if mode == tf.estimator.ModeKeys.TRAIN:
      global_step = tf.train.get_or_create_global_step()
      training_optimizer, optimizer_summary_vars = optimizer_builder.build(
          train_config.optimizer)

      if use_tpu:
        training_optimizer = tpu_optimizer.CrossShardOptimizer(
            training_optimizer)

      # Optionally freeze some layers by setting their gradients to be zero.
      trainable_variables = None
      if train_config.freeze_variables:
        trainable_variables = tf.contrib.framework.filter_variables(
            tf.trainable_variables(),
            exclude_patterns=train_config.freeze_variables)

      clip_gradients_value = None
      if train_config.gradient_clipping_by_norm > 0:
        clip_gradients_value = train_config.gradient_clipping_by_norm

      if not use_tpu:
        for var in optimizer_summary_vars:
          tf.summary.scalar(var.op.name, var)
      summaries = [] if use_tpu else None
      train_op = tf.contrib.layers.optimize_loss(
          loss=total_loss,
          global_step=global_step,
          learning_rate=None,
          clip_gradients=clip_gradients_value,
          optimizer=training_optimizer,
          variables=trainable_variables,
          summaries=summaries,
          name='')  # Preventing scope prefix on all variables.

    if mode == tf.estimator.ModeKeys.PREDICT:
      export_outputs = {
          tf.saved_model.signature_constants.PREDICT_METHOD_NAME:
              tf.estimator.export.PredictOutput(detections)
      }

    eval_metric_ops = None
    if mode == tf.estimator.ModeKeys.EVAL:
      # Detection summaries during eval.
      class_agnostic = (fields.DetectionResultFields.detection_classes
                        not in detections)
      groundtruth = _get_groundtruth_data(detection_model, class_agnostic)
      eval_dict = eval_util.result_dict_for_single_example(
          tf.expand_dims(features[fields.InputDataFields.original_image][0], 0),
          features[inputs.HASH_KEY][0],
          detections,
          groundtruth,
          class_agnostic=class_agnostic,
          scale_to_absolute=False)

      if class_agnostic:
        category_index = label_map_util.create_class_agnostic_category_index()
      else:
        category_index = label_map_util.create_category_index_from_labelmap(
            eval_input_config.label_map_path)
      detection_and_groundtruth = vis_utils.draw_side_by_side_evaluation_image(
          eval_dict, category_index, max_boxes_to_draw=20, min_score_thresh=0.2)
      if not use_tpu:
        tf.summary.image('Detections_Left_Groundtruth_Right',
                         detection_and_groundtruth)

      # Eval metrics on a single image.
      detection_fields = fields.DetectionResultFields()
      input_data_fields = fields.InputDataFields()
      coco_evaluator = coco_evaluation.CocoDetectionEvaluator(
          category_index.values())
      eval_metric_ops = coco_evaluator.get_estimator_eval_metric_ops(
          image_id=eval_dict[input_data_fields.key],
          groundtruth_boxes=eval_dict[input_data_fields.groundtruth_boxes],
          groundtruth_classes=eval_dict[input_data_fields.groundtruth_classes],
          detection_boxes=eval_dict[detection_fields.detection_boxes],
          detection_scores=eval_dict[detection_fields.detection_scores],
          detection_classes=eval_dict[detection_fields.detection_classes])

    if use_tpu:
      return tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          scaffold_fn=scaffold_fn,
          predictions=detections,
          loss=total_loss,
          train_op=train_op,
          eval_metrics=eval_metric_ops,
          export_outputs=export_outputs)
    else:
      return tf.estimator.EstimatorSpec(
          mode=mode,
          predictions=detections,
          loss=total_loss,
          train_op=train_op,
          eval_metric_ops=eval_metric_ops,
          export_outputs=export_outputs)
예제 #9
0
    def model_fn(features, labels, mode, params=None):
        """Constructs the object detection model.

    Args:
      features: Dictionary of feature tensors, returned from `input_fn`.
      labels: Dictionary of groundtruth tensors if mode is TRAIN or EVAL,
        otherwise None.
      mode: Mode key from tf.estimator.ModeKeys.
      params: Parameter dictionary passed from the estimator.

    Returns:
      An `EstimatorSpec` that encapsulates the model and its serving
        configurations.
    """
        params = params or {}
        total_loss, train_op, detections, export_outputs = None, None, None, None
        is_training = mode == tf.estimator.ModeKeys.TRAIN

        # Make sure to set the Keras learning phase. True during training,
        # False for inference.
        tf.keras.backend.set_learning_phase(is_training)
        detection_model = detection_model_fn(is_training=is_training,
                                             add_summaries=(not use_tpu))
        scaffold_fn = None

        if mode == tf.estimator.ModeKeys.TRAIN:
            labels = unstack_batch(labels,
                                   unpad_groundtruth_tensors=train_config.
                                   unpad_groundtruth_tensors)
        elif mode == tf.estimator.ModeKeys.EVAL:
            # For evaling on train data, it is necessary to check whether groundtruth
            # must be unpadded.
            boxes_shape = (labels[fields.InputDataFields.groundtruth_boxes].
                           get_shape().as_list())
            unpad_groundtruth_tensors = True if boxes_shape[
                1] is not None else False
            labels = unstack_batch(
                labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors)

        if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
            gt_boxes_list = labels[fields.InputDataFields.groundtruth_boxes]
            gt_classes_list = labels[
                fields.InputDataFields.groundtruth_classes]
            gt_masks_list = None
            if fields.InputDataFields.groundtruth_instance_masks in labels:
                gt_masks_list = labels[
                    fields.InputDataFields.groundtruth_instance_masks]
            gt_keypoints_list = None
            if fields.InputDataFields.groundtruth_keypoints in labels:
                gt_keypoints_list = labels[
                    fields.InputDataFields.groundtruth_keypoints]
            gt_weights_list = None
            if fields.InputDataFields.groundtruth_weights in labels:
                gt_weights_list = labels[
                    fields.InputDataFields.groundtruth_weights]
            if fields.InputDataFields.groundtruth_is_crowd in labels:
                gt_is_crowd_list = labels[
                    fields.InputDataFields.groundtruth_is_crowd]
            detection_model.provide_groundtruth(
                groundtruth_boxes_list=gt_boxes_list,
                groundtruth_classes_list=gt_classes_list,
                groundtruth_masks_list=gt_masks_list,
                groundtruth_keypoints_list=gt_keypoints_list,
                groundtruth_weights_list=gt_weights_list,
                groundtruth_is_crowd_list=gt_is_crowd_list)

        preprocessed_images = features[fields.InputDataFields.image]
        prediction_dict = detection_model.predict(
            preprocessed_images,
            features[fields.InputDataFields.true_image_shape])
        if mode in (tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT):
            detections = detection_model.postprocess(
                prediction_dict,
                features[fields.InputDataFields.true_image_shape])

        if mode == tf.estimator.ModeKeys.TRAIN:
            if train_config.fine_tune_checkpoint and hparams.load_pretrained:
                if not train_config.fine_tune_checkpoint_type:
                    # train_config.from_detection_checkpoint field is deprecated. For
                    # backward compatibility, set train_config.fine_tune_checkpoint_type
                    # based on train_config.from_detection_checkpoint.
                    if train_config.from_detection_checkpoint:
                        train_config.fine_tune_checkpoint_type = 'detection'
                    else:
                        train_config.fine_tune_checkpoint_type = 'classification'
                asg_map = detection_model.restore_map(
                    fine_tune_checkpoint_type=train_config.
                    fine_tune_checkpoint_type,
                    load_all_detection_checkpoint_vars=(
                        train_config.load_all_detection_checkpoint_vars))
                available_var_map = (
                    variables_helper.get_variables_available_in_checkpoint(
                        asg_map,
                        train_config.fine_tune_checkpoint,
                        include_global_step=False))
                if use_tpu:

                    def tpu_scaffold():
                        tf.train.init_from_checkpoint(
                            train_config.fine_tune_checkpoint,
                            available_var_map)
                        return tf.train.Scaffold()

                    scaffold_fn = tpu_scaffold
                else:
                    tf.train.init_from_checkpoint(
                        train_config.fine_tune_checkpoint, available_var_map)

        if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
            losses_dict = detection_model.loss(
                prediction_dict,
                features[fields.InputDataFields.true_image_shape])
            losses = [loss_tensor for loss_tensor in losses_dict.values()]
            if train_config.add_regularization_loss:
                regularization_losses = tf.get_collection(
                    tf.GraphKeys.REGULARIZATION_LOSSES)
                if regularization_losses:
                    regularization_loss = tf.add_n(regularization_losses,
                                                   name='regularization_loss')
                    losses.append(regularization_loss)
                    losses_dict[
                        'Loss/regularization_loss'] = regularization_loss
            total_loss = tf.add_n(losses, name='total_loss')
            losses_dict['Loss/total_loss'] = total_loss

            if 'graph_rewriter_config' in configs:
                graph_rewriter_fn = graph_rewriter_builder.build(
                    configs['graph_rewriter_config'], is_training=is_training)
                graph_rewriter_fn()

            # TODO(rathodv): Stop creating optimizer summary vars in EVAL mode once we
            # can write learning rate summaries on TPU without host calls.
            global_step = tf.train.get_or_create_global_step()
            training_optimizer, optimizer_summary_vars = optimizer_builder.build(
                train_config.optimizer)

        if mode == tf.estimator.ModeKeys.TRAIN:
            if use_tpu:
                training_optimizer = tf.contrib.tpu.CrossShardOptimizer(
                    training_optimizer)

            # Optionally freeze some layers by setting their gradients to be zero.
            trainable_variables = None
            include_variables = (train_config.update_trainable_variables
                                 if train_config.update_trainable_variables
                                 else None)
            exclude_variables = (train_config.freeze_variables
                                 if train_config.freeze_variables else None)
            trainable_variables = tf.contrib.framework.filter_variables(
                tf.trainable_variables(),
                include_patterns=include_variables,
                exclude_patterns=exclude_variables)

            clip_gradients_value = None
            if train_config.gradient_clipping_by_norm > 0:
                clip_gradients_value = train_config.gradient_clipping_by_norm

            if not use_tpu:
                for var in optimizer_summary_vars:
                    tf.summary.scalar(var.op.name, var)
            summaries = [] if use_tpu else None
            train_op = tf.contrib.layers.optimize_loss(
                loss=total_loss,
                global_step=global_step,
                learning_rate=None,
                clip_gradients=clip_gradients_value,
                optimizer=training_optimizer,
                variables=trainable_variables,
                summaries=summaries,
                name='')  # Preventing scope prefix on all variables.

        if mode == tf.estimator.ModeKeys.PREDICT:
            export_outputs = {
                tf.saved_model.signature_constants.PREDICT_METHOD_NAME:
                tf.estimator.export.PredictOutput(detections)
            }

        eval_metric_ops = None
        scaffold = None
        if mode == tf.estimator.ModeKeys.EVAL:
            class_agnostic = (fields.DetectionResultFields.detection_classes
                              not in detections)
            groundtruth = _prepare_groundtruth_for_eval(
                detection_model, class_agnostic)
            use_original_images = fields.InputDataFields.original_image in features
            eval_images = (features[fields.InputDataFields.original_image]
                           if use_original_images else
                           features[fields.InputDataFields.image])
            eval_dict = eval_util.result_dict_for_single_example(
                eval_images[0:1],
                features[inputs.HASH_KEY][0],
                detections,
                groundtruth,
                class_agnostic=class_agnostic,
                scale_to_absolute=True)

            if class_agnostic:
                category_index = label_map_util.create_class_agnostic_category_index(
                )
            else:
                category_index = label_map_util.create_category_index_from_labelmap(
                    eval_input_config.label_map_path)
            img_summary = None
            if not use_tpu and use_original_images:
                detection_and_groundtruth = (
                    vis_utils.draw_side_by_side_evaluation_image(
                        eval_dict,
                        category_index,
                        max_boxes_to_draw=20,
                        min_score_thresh=0.2,
                        use_normalized_coordinates=False))
                img_summary = tf.summary.image(
                    'Detections_Left_Groundtruth_Right',
                    detection_and_groundtruth)

            # Eval metrics on a single example.
            eval_metric_ops = eval_util.get_eval_metric_ops_for_evaluators(
                eval_config, category_index.values(), eval_dict)
            for loss_key, loss_tensor in iter(losses_dict.items()):
                eval_metric_ops[loss_key] = tf.metrics.mean(loss_tensor)
            for var in optimizer_summary_vars:
                eval_metric_ops[var.op.name] = (var, tf.no_op())
            if img_summary is not None:
                eval_metric_ops['Detections_Left_Groundtruth_Right'] = (
                    img_summary, tf.no_op())
            eval_metric_ops = {str(k): v for k, v in eval_metric_ops.items()}

            if eval_config.use_moving_averages:
                variable_averages = tf.train.ExponentialMovingAverage(0.0)
                variables_to_restore = variable_averages.variables_to_restore()
                keep_checkpoint_every_n_hours = (
                    train_config.keep_checkpoint_every_n_hours)
                saver = tf.train.Saver(
                    variables_to_restore,
                    keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours
                )
                scaffold = tf.train.Scaffold(saver=saver)

        # EVAL executes on CPU, so use regular non-TPU EstimatorSpec.
        if use_tpu and mode != tf.estimator.ModeKeys.EVAL:
            return tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                scaffold_fn=scaffold_fn,
                predictions=detections,
                loss=total_loss,
                train_op=train_op,
                eval_metrics=eval_metric_ops,
                export_outputs=export_outputs)
        else:
            return tf.estimator.EstimatorSpec(mode=mode,
                                              predictions=detections,
                                              loss=total_loss,
                                              train_op=train_op,
                                              eval_metric_ops=eval_metric_ops,
                                              export_outputs=export_outputs,
                                              scaffold=scaffold)
예제 #10
0
def _extract_predictions_and_losses(model,
                                    create_input_dict_fn,
                                    ignore_groundtruth=False):
  """Constructs tensorflow detection graph and returns output tensors.

  Args:
    model: model to perform predictions with.
    create_input_dict_fn: function to create input tensor dictionaries.
    ignore_groundtruth: whether groundtruth should be ignored.

  Returns:
    prediction_groundtruth_dict: A dictionary with postprocessed tensors (keyed
      by standard_fields.DetectionResultsFields) and optional groundtruth
      tensors (keyed by standard_fields.InputDataFields).
    losses_dict: A dictionary containing detection losses. This is empty when
      ignore_groundtruth is true.
  """
  input_dict = create_input_dict_fn()
  prefetch_queue = prefetcher.prefetch(input_dict, capacity=500)
  input_dict = prefetch_queue.dequeue()
  original_image = tf.expand_dims(input_dict[fields.InputDataFields.image], 0)
  preprocessed_image, true_image_shapes = model.preprocess(
      tf.to_float(original_image))
  prediction_dict = model.predict(preprocessed_image, true_image_shapes)
  detections = model.postprocess(prediction_dict, true_image_shapes)

  groundtruth = None
  losses_dict = {}
  if not ignore_groundtruth:
    groundtruth = {
        fields.InputDataFields.groundtruth_boxes:
            input_dict[fields.InputDataFields.groundtruth_boxes],
        fields.InputDataFields.groundtruth_classes:
            input_dict[fields.InputDataFields.groundtruth_classes],
        fields.InputDataFields.groundtruth_area:
            input_dict[fields.InputDataFields.groundtruth_area],
        fields.InputDataFields.groundtruth_is_crowd:
            input_dict[fields.InputDataFields.groundtruth_is_crowd],
        fields.InputDataFields.groundtruth_difficult:
            input_dict[fields.InputDataFields.groundtruth_difficult]
    }
    if fields.InputDataFields.groundtruth_group_of in input_dict:
      groundtruth[fields.InputDataFields.groundtruth_group_of] = (
          input_dict[fields.InputDataFields.groundtruth_group_of])
    groundtruth_masks_list = None
    if fields.DetectionResultFields.detection_masks in detections:
      groundtruth[fields.InputDataFields.groundtruth_instance_masks] = (
          input_dict[fields.InputDataFields.groundtruth_instance_masks])
      groundtruth_masks_list = [
          input_dict[fields.InputDataFields.groundtruth_instance_masks]]
    groundtruth_keypoints_list = None
    if fields.DetectionResultFields.detection_keypoints in detections:
      groundtruth[fields.InputDataFields.groundtruth_keypoints] = (
          input_dict[fields.InputDataFields.groundtruth_keypoints])
      groundtruth_keypoints_list = [
          input_dict[fields.InputDataFields.groundtruth_keypoints]]
    label_id_offset = 1
    model.provide_groundtruth(
        [input_dict[fields.InputDataFields.groundtruth_boxes]],
        [tf.one_hot(input_dict[fields.InputDataFields.groundtruth_classes]
                    - label_id_offset, depth=model.num_classes)],
        groundtruth_masks_list, groundtruth_keypoints_list)
    losses_dict.update(model.loss(prediction_dict, true_image_shapes))

  result_dict = eval_util.result_dict_for_single_example(
      original_image,
      input_dict[fields.InputDataFields.source_id],
      detections,
      groundtruth,
      class_agnostic=(
          fields.DetectionResultFields.detection_classes not in detections),
      scale_to_absolute=True)
  return result_dict, losses_dict
예제 #11
0
def _extract_prediction_tensors(model,
                                create_input_dict_fn,
                                ignore_groundtruth=False):
    """Restores the model in a tensorflow session.

  Args:
    model: model to perform predictions with.
    create_input_dict_fn: function to create input tensor dictionaries.
    ignore_groundtruth: whether groundtruth should be ignored.


  Returns:
    tensor_dict: A tensor dictionary with evaluations.
  """
    input_dict = create_input_dict_fn()
    batch = None
    if 'batch' in input_dict:
        batch = input_dict.pop('batch')
    else:
        prefetch_queue = prefetcher.prefetch(input_dict, capacity=500)
        input_dict = prefetch_queue.dequeue()
        # consistent format for images and videos
        for key, value in input_dict.iteritems():
            input_dict[key] = (value, )

    detections = _create_detection_op(model, input_dict, batch)

    # Print out anaylsis of the model.
    tf.contrib.tfprof.model_analyzer.print_model_analysis(
        tf.get_default_graph(),
        tfprof_options=tf.contrib.tfprof.model_analyzer.
        TRAINABLE_VARS_PARAMS_STAT_OPTIONS)
    tf.contrib.tfprof.model_analyzer.print_model_analysis(
        tf.get_default_graph(),
        tfprof_options=tf.contrib.tfprof.model_analyzer.FLOAT_OPS_OPTIONS)

    num_frames = len(input_dict[fields.InputDataFields.image])
    ret = []
    for i in range(num_frames):
        original_image = tf.expand_dims(
            input_dict[fields.InputDataFields.image][i], 0)
        groundtruth = None
        if not ignore_groundtruth:
            groundtruth = {
                fields.InputDataFields.groundtruth_boxes:
                input_dict[fields.InputDataFields.groundtruth_boxes][i],
                fields.InputDataFields.groundtruth_classes:
                input_dict[fields.InputDataFields.groundtruth_classes][i],
            }
            optional_keys = (
                fields.InputDataFields.groundtruth_area,
                fields.InputDataFields.groundtruth_is_crowd,
                fields.InputDataFields.groundtruth_difficult,
                fields.InputDataFields.groundtruth_group_of,
            )
            for opt_key in optional_keys:
                if opt_key in input_dict:
                    groundtruth[opt_key] = input_dict[opt_key][i]
            if fields.DetectionResultFields.detection_masks in detections:
                groundtruth[
                    fields.InputDataFields.groundtruth_instance_masks] = (
                        input_dict[fields.InputDataFields.
                                   groundtruth_instance_masks][i])

        detections_frame = {
            key: tf.expand_dims(value[i], 0)
            for key, value in detections.iteritems()
        }

        source_id = (batch.key[0] if batch is not None else
                     input_dict[fields.InputDataFields.source_id][i])
        ret.append(
            eval_util.result_dict_for_single_example(
                original_image,
                source_id,
                detections_frame,
                groundtruth,
                class_agnostic=(fields.DetectionResultFields.detection_classes
                                not in detections),
                scale_to_absolute=True))
    return ret
    def _make_evaluation_dict(self,
                              resized_groundtruth_masks=False,
                              batch_size=1,
                              max_gt_boxes=None,
                              scale_to_absolute=False):
        input_data_fields = standard_fields.InputDataFields
        detection_fields = standard_fields.DetectionResultFields

        image = tf.zeros(shape=[batch_size, 20, 20, 3], dtype=tf.uint8)
        if batch_size == 1:
            key = tf.constant('image1')
        else:
            key = tf.constant([str(i) for i in range(batch_size)])
        detection_boxes = tf.concat([
            tf.tile(tf.constant([[[0., 0., 1., 1.]]]),
                    multiples=[batch_size - 1, 1, 1]),
            tf.constant([[[0., 0., 0.5, 0.5]]])
        ],
                                    axis=0)
        detection_scores = tf.concat([
            tf.tile(tf.constant([[0.5]]), multiples=[batch_size - 1, 1]),
            tf.constant([[0.8]])
        ],
                                     axis=0)
        detection_classes = tf.tile(tf.constant([[0]]),
                                    multiples=[batch_size, 1])
        detection_masks = tf.tile(tf.ones(shape=[1, 1, 20, 20],
                                          dtype=tf.float32),
                                  multiples=[batch_size, 1, 1, 1])
        groundtruth_boxes = tf.constant([[0., 0., 1., 1.]])
        groundtruth_classes = tf.constant([1])
        groundtruth_instance_masks = tf.ones(shape=[1, 20, 20], dtype=tf.uint8)
        num_detections = tf.ones([batch_size])
        if resized_groundtruth_masks:
            groundtruth_instance_masks = tf.ones(shape=[1, 10, 10],
                                                 dtype=tf.uint8)

        if batch_size > 1:
            groundtruth_boxes = tf.tile(tf.expand_dims(groundtruth_boxes, 0),
                                        multiples=[batch_size, 1, 1])
            groundtruth_classes = tf.tile(tf.expand_dims(
                groundtruth_classes, 0),
                                          multiples=[batch_size, 1])
            groundtruth_instance_masks = tf.tile(
                tf.expand_dims(groundtruth_instance_masks, 0),
                multiples=[batch_size, 1, 1, 1])

        detections = {
            detection_fields.detection_boxes: detection_boxes,
            detection_fields.detection_scores: detection_scores,
            detection_fields.detection_classes: detection_classes,
            detection_fields.detection_masks: detection_masks,
            detection_fields.num_detections: num_detections
        }

        groundtruth = {
            input_data_fields.groundtruth_boxes:
            groundtruth_boxes,
            input_data_fields.groundtruth_classes:
            groundtruth_classes,
            input_data_fields.groundtruth_instance_masks:
            groundtruth_instance_masks,
        }
        if batch_size > 1:
            return eval_util.result_dict_for_batched_example(
                image,
                key,
                detections,
                groundtruth,
                scale_to_absolute=scale_to_absolute,
                max_gt_boxes=max_gt_boxes)
        else:
            return eval_util.result_dict_for_single_example(
                image,
                key,
                detections,
                groundtruth,
                scale_to_absolute=scale_to_absolute)
예제 #13
0
파일: model.py 프로젝트: Toyben/models
  def model_fn(features, labels, mode, params=None):
    """Constructs the object detection model.

    Args:
      features: Dictionary of feature tensors, returned from `input_fn`.
      labels: Dictionary of groundtruth tensors if mode is TRAIN or EVAL,
        otherwise None.
      mode: Mode key from tf.estimator.ModeKeys.
      params: Parameter dictionary passed from the estimator.

    Returns:
      An `EstimatorSpec` that encapsulates the model and its serving
        configurations.
    """
    params = params or {}
    total_loss, train_op, detections, export_outputs = None, None, None, None
    is_training = mode == tf.estimator.ModeKeys.TRAIN
    detection_model = detection_model_fn(is_training=is_training,
                                         add_summaries=(not use_tpu))
    scaffold_fn = None

    if mode == tf.estimator.ModeKeys.TRAIN:
      labels = unstack_batch(
          labels,
          unpad_groundtruth_tensors=train_config.unpad_groundtruth_tensors)
    elif mode == tf.estimator.ModeKeys.EVAL:
      labels = unstack_batch(labels, unpad_groundtruth_tensors=False)

    if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
      gt_boxes_list = labels[fields.InputDataFields.groundtruth_boxes]
      gt_classes_list = labels[fields.InputDataFields.groundtruth_classes]
      gt_masks_list = None
      if fields.InputDataFields.groundtruth_instance_masks in labels:
        gt_masks_list = labels[
            fields.InputDataFields.groundtruth_instance_masks]
      gt_keypoints_list = None
      if fields.InputDataFields.groundtruth_keypoints in labels:
        gt_keypoints_list = labels[fields.InputDataFields.groundtruth_keypoints]
      detection_model.provide_groundtruth(
          groundtruth_boxes_list=gt_boxes_list,
          groundtruth_classes_list=gt_classes_list,
          groundtruth_masks_list=gt_masks_list,
          groundtruth_keypoints_list=gt_keypoints_list)

    preprocessed_images = features[fields.InputDataFields.image]
    prediction_dict = detection_model.predict(
        preprocessed_images, features[fields.InputDataFields.true_image_shape])
    detections = detection_model.postprocess(
        prediction_dict, features[fields.InputDataFields.true_image_shape])

    if mode == tf.estimator.ModeKeys.TRAIN:
      if not train_config.fine_tune_checkpoint_type:
        # train_config.from_detection_checkpoint field is deprecated. For
        # backward compatibility, sets finetune_checkpoint_type based on
        # from_detection_checkpoint.
        if train_config.from_detection_checkpoint:
          train_config.fine_tune_checkpoint_type = 'detection'
        else:
          train_config.fine_tune_checkpoint_type = 'classification'
      if train_config.fine_tune_checkpoint and hparams.load_pretrained:
        if not train_config.fine_tune_checkpoint_type:
          # train_config.from_detection_checkpoint field is deprecated. For
          # backward compatibility, set train_config.fine_tune_checkpoint_type
          # based on train_config.from_detection_checkpoint.
          if train_config.from_detection_checkpoint:
            train_config.fine_tune_checkpoint_type = 'detection'
          else:
            train_config.fine_tune_checkpoint_type = 'classification'
        asg_map = detection_model.restore_map(
            fine_tune_checkpoint_type=train_config.fine_tune_checkpoint_type,
            load_all_detection_checkpoint_vars=(
                train_config.load_all_detection_checkpoint_vars))
        available_var_map = (
            variables_helper.get_variables_available_in_checkpoint(
                asg_map, train_config.fine_tune_checkpoint,
                include_global_step=False))
        if use_tpu:
          def tpu_scaffold():
            tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint,
                                          available_var_map)
            return tf.train.Scaffold()
          scaffold_fn = tpu_scaffold
        else:
          tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint,
                                        available_var_map)

    if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
      losses_dict = detection_model.loss(
          prediction_dict, features[fields.InputDataFields.true_image_shape])
      losses = [loss_tensor for loss_tensor in losses_dict.itervalues()]
      if train_config.add_regularization_loss:
        regularization_losses = tf.get_collection(
            tf.GraphKeys.REGULARIZATION_LOSSES)
        if regularization_losses:
          regularization_loss = tf.add_n(regularization_losses,
                                         name='regularization_loss')
          losses.append(regularization_loss)
          if not use_tpu:
            tf.summary.scalar('regularization_loss', regularization_loss)
      total_loss = tf.add_n(losses, name='total_loss')

    if mode == tf.estimator.ModeKeys.TRAIN:
      global_step = tf.train.get_or_create_global_step()
      training_optimizer, optimizer_summary_vars = optimizer_builder.build(
          train_config.optimizer)

      if use_tpu:
        training_optimizer = tpu_optimizer.CrossShardOptimizer(
            training_optimizer)

      # Optionally freeze some layers by setting their gradients to be zero.
      trainable_variables = None
      if train_config.freeze_variables:
        trainable_variables = tf.contrib.framework.filter_variables(
            tf.trainable_variables(),
            exclude_patterns=train_config.freeze_variables)

      clip_gradients_value = None
      if train_config.gradient_clipping_by_norm > 0:
        clip_gradients_value = train_config.gradient_clipping_by_norm

      if not use_tpu:
        for var in optimizer_summary_vars:
          tf.summary.scalar(var.op.name, var)
      summaries = [] if use_tpu else None
      train_op = tf.contrib.layers.optimize_loss(
          loss=total_loss,
          global_step=global_step,
          learning_rate=None,
          clip_gradients=clip_gradients_value,
          optimizer=training_optimizer,
          variables=trainable_variables,
          summaries=summaries,
          name='')  # Preventing scope prefix on all variables.

    if mode == tf.estimator.ModeKeys.PREDICT:
      export_outputs = {
          tf.saved_model.signature_constants.PREDICT_METHOD_NAME:
              tf.estimator.export.PredictOutput(detections)
      }

    eval_metric_ops = None
    if mode == tf.estimator.ModeKeys.EVAL:
      # Detection summaries during eval.
      class_agnostic = (fields.DetectionResultFields.detection_classes
                        not in detections)
      groundtruth = _get_groundtruth_data(detection_model, class_agnostic)
      use_original_images = fields.InputDataFields.original_image in features
      eval_images = (
          features[fields.InputDataFields.original_image] if use_original_images
          else features[fields.InputDataFields.image])
      eval_dict = eval_util.result_dict_for_single_example(
          eval_images[0:1],
          features[inputs.HASH_KEY][0],
          detections,
          groundtruth,
          class_agnostic=class_agnostic,
          scale_to_absolute=False)

      if class_agnostic:
        category_index = label_map_util.create_class_agnostic_category_index()
      else:
        category_index = label_map_util.create_category_index_from_labelmap(
            eval_input_config.label_map_path)
      if not use_tpu and use_original_images:
        detection_and_groundtruth = (
            vis_utils.draw_side_by_side_evaluation_image(
                eval_dict, category_index, max_boxes_to_draw=20,
                min_score_thresh=0.2))
        tf.summary.image('Detections_Left_Groundtruth_Right',
                         detection_and_groundtruth)

      # Eval metrics on a single image.
      eval_metrics = eval_config.metrics_set
      if not eval_metrics:
        eval_metrics = ['coco_detection_metrics']
      eval_metric_ops = eval_util.get_eval_metric_ops_for_evaluators(
          eval_metrics, category_index.values(), eval_dict,
          include_metrics_per_category=False)

    if use_tpu:
      return tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          scaffold_fn=scaffold_fn,
          predictions=detections,
          loss=total_loss,
          train_op=train_op,
          eval_metrics=eval_metric_ops,
          export_outputs=export_outputs)
    else:
      return tf.estimator.EstimatorSpec(
          mode=mode,
          predictions=detections,
          loss=total_loss,
          train_op=train_op,
          eval_metric_ops=eval_metric_ops,
          export_outputs=export_outputs)
예제 #14
0
  def model_fn(features, labels, mode, params=None):
    """Constructs the object detection model.

    Args:
      features: Dictionary of feature tensors, returned from `input_fn`.
      labels: Dictionary of groundtruth tensors if mode is TRAIN or EVAL,
        otherwise None.
      mode: Mode key from tf.estimator.ModeKeys.
      params: Parameter dictionary passed from the estimator.

    Returns:
      An `EstimatorSpec` that encapsulates the model and its serving
        configurations.
    """
    params = params or {}
    total_loss, train_op, detections, export_outputs = None, None, None, None
    is_training = mode == tf.estimator.ModeKeys.TRAIN
    detection_model = detection_model_fn(is_training=is_training,
                                         add_summaries=(not use_tpu))
    scaffold_fn = None

    if mode == tf.estimator.ModeKeys.TRAIN:
      labels = unstack_batch(
          labels,
          unpad_groundtruth_tensors=train_config.unpad_groundtruth_tensors)
    elif mode == tf.estimator.ModeKeys.EVAL:
      # For evaling on train data, it is necessary to check whether groundtruth
      # must be unpadded.
      boxes_shape = (
          labels[fields.InputDataFields.groundtruth_boxes].get_shape()
          .as_list())
      unpad_groundtruth_tensors = True if boxes_shape[1] is not None else False
      labels = unstack_batch(
          labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors)

    if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
      gt_boxes_list = labels[fields.InputDataFields.groundtruth_boxes]
      gt_classes_list = labels[fields.InputDataFields.groundtruth_classes]
      gt_masks_list = None
      if fields.InputDataFields.groundtruth_instance_masks in labels:
        gt_masks_list = labels[
            fields.InputDataFields.groundtruth_instance_masks]
      gt_keypoints_list = None
      if fields.InputDataFields.groundtruth_keypoints in labels:
        gt_keypoints_list = labels[fields.InputDataFields.groundtruth_keypoints]
      if fields.InputDataFields.groundtruth_is_crowd in labels:
        gt_is_crowd_list = labels[fields.InputDataFields.groundtruth_is_crowd]
      detection_model.provide_groundtruth(
          groundtruth_boxes_list=gt_boxes_list,
          groundtruth_classes_list=gt_classes_list,
          groundtruth_masks_list=gt_masks_list,
          groundtruth_keypoints_list=gt_keypoints_list,
          groundtruth_weights_list=labels[
              fields.InputDataFields.groundtruth_weights],
          groundtruth_is_crowd_list=gt_is_crowd_list)

    preprocessed_images = features[fields.InputDataFields.image]
    prediction_dict = detection_model.predict(
        preprocessed_images, features[fields.InputDataFields.true_image_shape])
    detections = detection_model.postprocess(
        prediction_dict, features[fields.InputDataFields.true_image_shape])

    if mode == tf.estimator.ModeKeys.TRAIN:
      if train_config.fine_tune_checkpoint and hparams.load_pretrained:
        if not train_config.fine_tune_checkpoint_type:
          # train_config.from_detection_checkpoint field is deprecated. For
          # backward compatibility, set train_config.fine_tune_checkpoint_type
          # based on train_config.from_detection_checkpoint.
          if train_config.from_detection_checkpoint:
            train_config.fine_tune_checkpoint_type = 'detection'
          else:
            train_config.fine_tune_checkpoint_type = 'classification'
        asg_map = detection_model.restore_map(
            fine_tune_checkpoint_type=train_config.fine_tune_checkpoint_type,
            load_all_detection_checkpoint_vars=(
                train_config.load_all_detection_checkpoint_vars))
        available_var_map = (
            variables_helper.get_variables_available_in_checkpoint(
                asg_map, train_config.fine_tune_checkpoint,
                include_global_step=False))
        if use_tpu:
          def tpu_scaffold():
            tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint,
                                          available_var_map)
            return tf.train.Scaffold()
          scaffold_fn = tpu_scaffold
        else:
          tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint,
                                        available_var_map)

    if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
      losses_dict = detection_model.loss(
          prediction_dict, features[fields.InputDataFields.true_image_shape])
      losses = [loss_tensor for loss_tensor in losses_dict.itervalues()]
      if train_config.add_regularization_loss:
        regularization_losses = tf.get_collection(
            tf.GraphKeys.REGULARIZATION_LOSSES)
        if regularization_losses:
          regularization_loss = tf.add_n(regularization_losses,
                                         name='regularization_loss')
          losses.append(regularization_loss)
          losses_dict['Loss/regularization_loss'] = regularization_loss
      total_loss = tf.add_n(losses, name='total_loss')
      losses_dict['Loss/total_loss'] = total_loss

      if 'graph_rewriter_config' in configs:
        graph_rewriter_fn = graph_rewriter_builder.build(
            configs['graph_rewriter_config'], is_training=is_training)
        graph_rewriter_fn()

      # TODO(rathodv): Stop creating optimizer summary vars in EVAL mode once we
      # can write learning rate summaries on TPU without host calls.
      global_step = tf.train.get_or_create_global_step()
      training_optimizer, optimizer_summary_vars = optimizer_builder.build(
          train_config.optimizer)

    if mode == tf.estimator.ModeKeys.TRAIN:
      if use_tpu:
        training_optimizer = tf.contrib.tpu.CrossShardOptimizer(
            training_optimizer)

      # Optionally freeze some layers by setting their gradients to be zero.
      trainable_variables = None
      if train_config.freeze_variables:
        trainable_variables = tf.contrib.framework.filter_variables(
            tf.trainable_variables(),
            exclude_patterns=train_config.freeze_variables)

      clip_gradients_value = None
      if train_config.gradient_clipping_by_norm > 0:
        clip_gradients_value = train_config.gradient_clipping_by_norm

      if not use_tpu:
        for var in optimizer_summary_vars:
          tf.summary.scalar(var.op.name, var)
      summaries = [] if use_tpu else None
      train_op = tf.contrib.layers.optimize_loss(
          loss=total_loss,
          global_step=global_step,
          learning_rate=None,
          clip_gradients=clip_gradients_value,
          optimizer=training_optimizer,
          variables=trainable_variables,
          summaries=summaries,
          name='')  # Preventing scope prefix on all variables.

    if mode == tf.estimator.ModeKeys.PREDICT:
      export_outputs = {
          tf.saved_model.signature_constants.PREDICT_METHOD_NAME:
              tf.estimator.export.PredictOutput(detections)
      }

    eval_metric_ops = None
    scaffold = None
    if mode == tf.estimator.ModeKeys.EVAL:
      class_agnostic = (fields.DetectionResultFields.detection_classes
                        not in detections)
      groundtruth = _prepare_groundtruth_for_eval(
          detection_model, class_agnostic)
      use_original_images = fields.InputDataFields.original_image in features
      eval_images = (
          features[fields.InputDataFields.original_image] if use_original_images
          else features[fields.InputDataFields.image])
      eval_dict = eval_util.result_dict_for_single_example(
          eval_images[0:1],
          features[inputs.HASH_KEY][0],
          detections,
          groundtruth,
          class_agnostic=class_agnostic,
          scale_to_absolute=True)

      if class_agnostic:
        category_index = label_map_util.create_class_agnostic_category_index()
      else:
        category_index = label_map_util.create_category_index_from_labelmap(
            eval_input_config.label_map_path)
      img_summary = None
      if not use_tpu and use_original_images:
        detection_and_groundtruth = (
            vis_utils.draw_side_by_side_evaluation_image(
                eval_dict, category_index, max_boxes_to_draw=20,
                min_score_thresh=0.2,
                use_normalized_coordinates=False))
        img_summary = tf.summary.image('Detections_Left_Groundtruth_Right',
                                       detection_and_groundtruth)

      # Eval metrics on a single example.
      eval_metrics = eval_config.metrics_set
      if not eval_metrics:
        eval_metrics = ['coco_detection_metrics']
      eval_metric_ops = eval_util.get_eval_metric_ops_for_evaluators(
          eval_metrics,
          category_index.values(),
          eval_dict,
          include_metrics_per_category=eval_config.include_metrics_per_category)
      for loss_key, loss_tensor in iter(losses_dict.items()):
        eval_metric_ops[loss_key] = tf.metrics.mean(loss_tensor)
      for var in optimizer_summary_vars:
        eval_metric_ops[var.op.name] = (var, tf.no_op())
      if img_summary is not None:
        eval_metric_ops['Detections_Left_Groundtruth_Right'] = (
            img_summary, tf.no_op())
      eval_metric_ops = {str(k): v for k, v in eval_metric_ops.iteritems()}

      if eval_config.use_moving_averages:
        variable_averages = tf.train.ExponentialMovingAverage(0.0)
        variables_to_restore = variable_averages.variables_to_restore()
        keep_checkpoint_every_n_hours = (
            train_config.keep_checkpoint_every_n_hours)
        saver = tf.train.Saver(
            variables_to_restore,
            keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours)
        scaffold = tf.train.Scaffold(saver=saver)

    if use_tpu:
      return tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          scaffold_fn=scaffold_fn,
          predictions=detections,
          loss=total_loss,
          train_op=train_op,
          eval_metrics=eval_metric_ops,
          export_outputs=export_outputs)
    else:
      return tf.estimator.EstimatorSpec(
          mode=mode,
          predictions=detections,
          loss=total_loss,
          train_op=train_op,
          eval_metric_ops=eval_metric_ops,
          export_outputs=export_outputs,
          scaffold=scaffold)