Beispiel #1
0
def _extract_prediction_tensors(model,
                                create_input_dict_fn,
                                image_root,
                                ignore_groundtruth=False):
    """Restores the model in a tensorflow session.
    
    Args:
    model: model to perform predictions with.
    create_input_dict_fn: function to create input tensor dictionaries.
    ignore_groundtruth: whether groundtruth should be ignored.
    
    Returns:
    tensor_dict: A tensor dictionary with evaluations.
    """
    # input_dict = create_input_dict_fn()
    # prefetch_queue = prefetcher.prefetch(input_dict, capacity=500)
    # input_dict = prefetch_queue.dequeue()
    ## ##########################################3
    input_queue = _create_input_queue(batch_size_per_clone = 1,
                                      create_tensor_dict_fn = create_input_dict_fn,
                                      detection_model = model,
                                      batch_queue_capacity = 10,
                                      num_batch_queue_threads = 8,
                                      prefetch_queue_capacity = 10,
                                      image_path = image_root)

    (images, groundtruth_boxes, groundtruth_classes,
     original_image) = _get_inputs(input_queue)
    model.provide_groundtruth(groundtruth_boxes,
                                        groundtruth_classes,
                                        None)
    prediction_dict = model.predict(images)
    detections = model.postprocess(prediction_dict)


    # original_image = tf.expand_dims(input_dict[fields.InputDataFields.image], 0)
    # preprocessed_image = model.preprocess(tf.to_float(original_image))
    # prediction_dict = model.predict(preprocessed_image)
    # detections = model.postprocess(prediction_dict)

    original_image_shape = tf.shape(original_image)
    absolute_detection_boxlist = box_list_ops.to_absolute_coordinates(
        box_list.BoxList(tf.squeeze(detections['detection_boxes'], axis=0)),
        original_image_shape[2], original_image_shape[3])
    absolute_groundtruth_boxlist = box_list_ops.to_absolute_coordinates(
        box_list.BoxList(tf.squeeze(groundtruth_boxes, axis=0)),
    original_image_shape[2], original_image_shape[3])

    label_id_offset = 1
    tensor_dict = {
        'original_image': original_image,
        'detection_boxes': absolute_detection_boxlist.get(),
        'groundtruth_boxes': absolute_groundtruth_boxlist.get(),
        'detection_scores': tf.squeeze(detections['detection_scores'], axis=0),
        'detection_classes': (
            tf.squeeze(detections['detection_classes'], axis=0) +
            label_id_offset),
    }

    return tensor_dict
    def _get_instance_embeddings(self, boxes, instance_embedding):
        """Return the instance embeddings from bounding box centers.

    Args:
      boxes: A [num_instances, 4] float tensor holding bounding boxes. The
        coordinates are in normalized input space.
      instance_embedding: A [height, width, embedding_size] float tensor
        containing the instance embeddings.

    Returns:
      instance_embeddings: A [num_instances, embedding_size] shaped float tensor
        containing the center embedding for each instance.
    """
        blist = box_list.BoxList(boxes)
        output_height = tf.shape(instance_embedding)[0]
        output_width = tf.shape(instance_embedding)[1]

        blist_output = box_list_ops.to_absolute_coordinates(blist,
                                                            output_height,
                                                            output_width,
                                                            check_range=False)
        (y_center_output, x_center_output, _,
         _) = blist_output.get_center_coordinates_and_sizes()
        center_coords_output = tf.stack([y_center_output, x_center_output],
                                        axis=1)
        center_coords_output_int = tf.cast(center_coords_output, tf.int32)
        center_latents = tf.gather_nd(instance_embedding,
                                      center_coords_output_int)

        return center_latents
def build_test_graph(model, model_scope, reuse=None, weights_dict=None):
    input_init_gt_box = tf.constant(np.zeros((1, 4)), dtype=tf.float32)
    # input_init_image = tf.constant(init_img_array, dtype=tf.uint8)
    input_init_image = tf.placeholder(dtype=tf.uint8, shape=[128, 128, 3])
    input_cur_image = tf.placeholder(dtype=tf.uint8, shape=[300, 300, 3])

    init_gt_box = tf.reshape(input_init_gt_box, shape=[1, 1, 4])
    groundtruth_classes = tf.ones(dtype=tf.float32, shape=[1, 1, 1])
    float_init_image = tf.to_float(input_init_image)
    float_init_image = tf.expand_dims(tf.expand_dims(float_init_image, axis=0),
                                      axis=0)
    preprocessed_init_image = model.preprocess(float_init_image, [128, 128])
    images = tf.expand_dims(input_cur_image, axis=0)
    float_images = tf.to_float(images)
    preprocessed_images = model.preprocess(float_images)
    preprocessed_images = tf.expand_dims(preprocessed_images, axis=0)
    model.provide_groundtruth(init_gt_box, groundtruth_classes, None)
    with tf.variable_scope(model_scope, reuse=reuse):
        prediction_dict = model.predict(preprocessed_init_image,
                                        preprocessed_images,
                                        istraining=False,
                                        reuse=reuse)
    detections = model.postprocess(prediction_dict)
    original_image_shape = tf.shape(preprocessed_images)
    absolute_detection_boxlist = box_list_ops.to_absolute_coordinates(
        box_list.BoxList(tf.squeeze(detections['detection_boxes'], axis=0)),
        original_image_shape[2], original_image_shape[3])
    return absolute_detection_boxlist.get(
    ), detections['detection_scores'], input_cur_image, input_init_image
 def _absolute_boxes(normalized_boxes):
     absolute_detection_boxlist_list = [
         box_list_ops.to_absolute_coordinates(
             box_list.BoxList(tf.squeeze(k, axis=0)),
             original_image_shape[1], original_image_shape[2])
         for k in tf.split(normalized_boxes, k_shot)
     ]
     return tf.stack([db.get() for db in absolute_detection_boxlist_list])
Beispiel #5
0
 def graph_fn():
   coordinates = tf.constant([[0, 0, 100, 100],
                              [25, 25, 75, 75]], tf.float32)
   img = tf.ones((128, 100, 100, 3))
   boxlist = box_list.BoxList(coordinates)
   absolute_boxlist = box_list_ops.to_absolute_coordinates(boxlist,
                                                           tf.shape(img)[1],
                                                           tf.shape(img)[2])
   return absolute_boxlist.get()
Beispiel #6
0
 def graph_fn():
   img = tf.ones((128, 202, 202, 3))
   boxlist = box_list.BoxList(tf.constant(coordinates, tf.float32))
   boxlist = box_list_ops.to_absolute_coordinates(boxlist,
                                                  tf.shape(img)[1],
                                                  tf.shape(img)[2])
   boxlist = box_list_ops.to_normalized_coordinates(boxlist,
                                                    tf.shape(img)[1],
                                                    tf.shape(img)[2])
   return boxlist.get()
Beispiel #7
0
 def graph_fn():
   coordinates = tf.constant([[0, 0, 1.2, 1.2],
                              [0.25, 0.25, 0.75, 0.75]], tf.float32)
   img = tf.ones((128, 100, 100, 3))
   boxlist = box_list.BoxList(coordinates)
   absolute_boxlist = box_list_ops.to_absolute_coordinates(
       boxlist,
       tf.shape(img)[1],
       tf.shape(img)[2],
       maximum_normalized_coordinate=1.1)
   return absolute_boxlist.get()
  def test_to_absolute_coordinates_already_abolute(self):
    coordinates = tf.constant([[0, 0, 100, 100],
                               [25, 25, 75, 75]], tf.float32)
    img = tf.ones((128, 100, 100, 3))
    boxlist = box_list.BoxList(coordinates)
    absolute_boxlist = box_list_ops.to_absolute_coordinates(boxlist,
                                                            tf.shape(img)[1],
                                                            tf.shape(img)[2])

    with self.test_session() as sess:
      with self.assertRaisesOpError('assertion failed'):
        sess.run(absolute_boxlist.get())
  def test_to_absolute_coordinates_already_abolute(self):
    coordinates = tf.constant([[0, 0, 100, 100],
                               [25, 25, 75, 75]], tf.float32)
    img = tf.ones((128, 100, 100, 3))
    boxlist = box_list.BoxList(coordinates)
    absolute_boxlist = box_list_ops.to_absolute_coordinates(boxlist,
                                                            tf.shape(img)[1],
                                                            tf.shape(img)[2])

    with self.test_session() as sess:
      with self.assertRaisesOpError('assertion failed'):
        sess.run(absolute_boxlist.get())
Beispiel #10
0
  def test_to_absolute_coordinates_maximum_coordinate_check(self):
    coordinates = tf.constant([[0, 0, 1.2, 1.2],
                               [0.25, 0.25, 0.75, 0.75]], tf.float32)
    img = tf.ones((128, 100, 100, 3))
    boxlist = box_list.BoxList(coordinates)
    absolute_boxlist = box_list_ops.to_absolute_coordinates(
        boxlist,
        tf.shape(img)[1],
        tf.shape(img)[2],
        maximum_normalized_coordinate=1.1)

    with self.test_session() as sess:
      with self.assertRaisesOpError('assertion failed'):
        sess.run(absolute_boxlist.get())
    def test_to_absolute_coordinates(self):
        coordinates = tf.constant([[0, 0, 1, 1], [0.25, 0.25, 0.75, 0.75]],
                                  tf.float32)
        img = tf.ones((128, 100, 100, 3))
        boxlist = box_list.BoxList(coordinates)
        absolute_boxlist = box_list_ops.to_absolute_coordinates(
            boxlist,
            tf.shape(img)[1],
            tf.shape(img)[2])
        expected_boxes = [[0, 0, 100, 100], [25, 25, 75, 75]]

        with self.test_session() as sess:
            absolute_boxes = sess.run(absolute_boxlist.get())
            self.assertAllClose(absolute_boxes, expected_boxes)
Beispiel #12
0
  def test_to_absolute_coordinates(self):
    coordinates = tf.constant([[0, 0, 1, 1],
                               [0.25, 0.25, 0.75, 0.75]], tf.float32)
    img = tf.ones((128, 100, 100, 3))
    boxlist = box_list.BoxList(coordinates)
    absolute_boxlist = box_list_ops.to_absolute_coordinates(boxlist,
                                                            tf.shape(img)[1],
                                                            tf.shape(img)[2])
    expected_boxes = [[0, 0, 100, 100],
                      [25, 25, 75, 75]]

    with self.test_session() as sess:
      absolute_boxes = sess.run(absolute_boxlist.get())
      self.assertAllClose(absolute_boxes, expected_boxes)
  def test_to_absolute_coordinates_maximum_coordinate_check(self):
    coordinates = tf.constant([[0, 0, 1.2, 1.2],
                               [0.25, 0.25, 0.75, 0.75]], tf.float32)
    img = tf.ones((128, 100, 100, 3))
    boxlist = box_list.BoxList(coordinates)
    absolute_boxlist = box_list_ops.to_absolute_coordinates(
        boxlist,
        tf.shape(img)[1],
        tf.shape(img)[2],
        maximum_normalized_coordinate=1.1)

    with self.test_session() as sess:
      with self.assertRaisesOpError('assertion failed'):
        sess.run(absolute_boxlist.get())
 def transform_boxes(elems):
     boxes_per_image, true_image_shape = elems
     blist = box_list.BoxList(boxes_per_image)
     # First transform boxes from image space to resized image space since
     # there may have paddings in the resized images.
     blist = box_list_ops.scale(
         blist, true_image_shape[0] / resized_image_height,
         true_image_shape[1] / resized_image_width)
     # Then transform boxes from resized image space (normalized) to the
     # feature map space (absolute).
     blist = box_list_ops.to_absolute_coordinates(blist,
                                                  height,
                                                  width,
                                                  check_range=False)
     return blist.get()
Beispiel #15
0
  def test_convert_to_absolute_and_back(self):
    coordinates = np.random.uniform(size=(100, 4))
    coordinates = np.sort(coordinates)
    coordinates[99, :] = [0, 0, 1, 1]
    img = tf.ones((128, 202, 202, 3))

    boxlist = box_list.BoxList(tf.constant(coordinates, tf.float32))
    boxlist = box_list_ops.to_absolute_coordinates(boxlist,
                                                   tf.shape(img)[1],
                                                   tf.shape(img)[2])
    boxlist = box_list_ops.to_normalized_coordinates(boxlist,
                                                     tf.shape(img)[1],
                                                     tf.shape(img)[2])

    with self.test_session() as sess:
      out = sess.run(boxlist.get())
      self.assertAllClose(out, coordinates)
  def test_convert_to_absolute_and_back(self):
    coordinates = np.random.uniform(size=(100, 4))
    coordinates = np.sort(coordinates)
    coordinates[99, :] = [0, 0, 1, 1]
    img = tf.ones((128, 202, 202, 3))

    boxlist = box_list.BoxList(tf.constant(coordinates, tf.float32))
    boxlist = box_list_ops.to_absolute_coordinates(boxlist,
                                                   tf.shape(img)[1],
                                                   tf.shape(img)[2])
    boxlist = box_list_ops.to_normalized_coordinates(boxlist,
                                                     tf.shape(img)[1],
                                                     tf.shape(img)[2])

    with self.test_session() as sess:
      out = sess.run(boxlist.get())
      self.assertAllClose(out, coordinates)
Beispiel #17
0
def fill_boxes(boxes, height, width):
    """Fills the area included in the box."""
    blist = box_list.BoxList(boxes)
    blist = box_list_ops.to_absolute_coordinates(blist, height, width)
    boxes = blist.get()
    ymin, xmin, ymax, xmax = tf.unstack(boxes[:, tf.newaxis, tf.newaxis, :],
                                        4,
                                        axis=3)

    ygrid, xgrid = tf.meshgrid(tf.range(height),
                               tf.range(width),
                               indexing='ij')
    ygrid, xgrid = tf.cast(ygrid, tf.float32), tf.cast(xgrid, tf.float32)
    ygrid, xgrid = ygrid[tf.newaxis, :, :], xgrid[tf.newaxis, :, :]

    filled_boxes = tf.logical_and(tf.logical_and(ygrid >= ymin, ygrid <= ymax),
                                  tf.logical_and(xgrid >= xmin, xgrid <= xmax))

    return tf.cast(filled_boxes, tf.float32)
Beispiel #18
0
    def _format_groundtruth_data(self, image_shape, to_absolute=True):
        """Helper function for preparing groundtruth data for target assignment.

    In order to be consistent with the model.DetectionModel interface,
    groundtruth boxes are specified in normalized coordinates and classes are
    specified as label indices with no assumed background category.  To prepare
    for target assignment, we:
    1) convert boxes to absolute coordinates,
    2) add a background class at class index 0

    Args:
      image_shape: A 1-D int32 tensor of shape [4] representing the shape of the
        input image batch.

    Returns:
      groundtruth_boxlists: A list of BoxLists containing (absolute) coordinates
        of the groundtruth boxes.
      groundtruth_classes_with_background_list: A list of 2-D one-hot
        (or k-hot) tensors of shape [num_boxes, num_classes+1] containing the
        class targets with the 0th index assumed to map to the background class.
    """
        if to_absolute:
            groundtruth_boxlists = [
                box_list_ops.to_absolute_coordinates(box_list.BoxList(boxes),
                                                     image_shape[1],
                                                     image_shape[2])
                for boxes in self.groundtruth_lists(fields.BoxListFields.boxes)
            ]
        else:
            groundtruth_boxlists = tf.concat(
                self.groundtruth_lists(fields.BoxListFields.boxes), 0)
        groundtruth_classes_with_background_list = [
            tf.to_float(one_hot_encoding)
            #tf.to_float(
            #    tf.pad(one_hot_encoding, [[0, 0], [1, 0]], mode='CONSTANT'))
            for one_hot_encoding in self.groundtruth_lists(
                fields.BoxListFields.classes)
        ]
        return groundtruth_boxlists, groundtruth_classes_with_background_list
def _extract_prediction_tensors(model,
                                create_input_dict_fn,
                                ignore_groundtruth=False,
                                provide_groundtruth_to_model=False,
                                calc_loss=False):
    """Restores the model in a tensorflow session.

  Args:
    model: model to perform predictions with.
    create_input_dict_fn: function to create input tensor dictionaries.
    ignore_groundtruth: whether groundtruth should be ignored.
    provide_groundtruth_to_model: whether to use model.provide_groundtruth()

  Returns:
    tensor_dict: A tensor dictionary with evaluations.
  """
    mtl = model._mtl
    input_dict = create_input_dict_fn()
    prefetch_queue = prefetcher.prefetch(input_dict, capacity=500)
    input_dict = prefetch_queue.dequeue()

    if calc_loss or mtl.window or mtl.edgemask:
        provide_groundtruth_to_model = True

    # Get groundtruth information
    if provide_groundtruth_to_model:
        (_, groundtruth_boxes_list, groundtruth_ignore_list,
         groundtruth_classes_list, groundtruth_masks_list, _,
         window_boxes_list, window_classes_list, groundtruth_closeness_list,
         groundtruth_edgemask_list) = _get_inputs([input_dict],
                                                  model.num_classes,
                                                  with_filename=False)

        if any(mask is None for mask in groundtruth_masks_list):
            groundtruth_masks_list = None
        model.provide_groundtruth(groundtruth_boxes_list,
                                  groundtruth_classes_list,
                                  groundtruth_closeness_list,
                                  groundtruth_ignore_list,
                                  groundtruth_masks_list)
        model.provide_window(window_boxes_list, window_classes_list)
        model.provide_edgemask(groundtruth_edgemask_list)

    original_image = tf.expand_dims(input_dict[fields.InputDataFields.image],
                                    0)
    preprocessed_image = model.preprocess(tf.to_float(original_image))
    prediction_dict = model.predict(preprocessed_image)

    if mtl.window:
        prediction_dict = model.predict_with_window(prediction_dict)
    if mtl.edgemask:
        prediction_dict = model.predict_edgemask(prediction_dict)
    if mtl.refine:
        prediction_dict = model.predict_with_mtl_results(prediction_dict)

    detections = model.postprocess(prediction_dict)

    original_image_shape = tf.shape(original_image)
    absolute_detection_boxlist = box_list_ops.to_absolute_coordinates(
        box_list.BoxList(tf.squeeze(detections['detection_boxes'], axis=0)),
        original_image_shape[1], original_image_shape[2])
    label_id_offset = 1
    tensor_dict = {
        'original_image':
        original_image,
        'image_id':
        input_dict[fields.InputDataFields.source_id],
        'detection_boxes':
        absolute_detection_boxlist.get(),
        'detection_scores':
        tf.squeeze(detections['detection_scores'], axis=0),
        'detection_classes':
        (tf.squeeze(detections['detection_classes'], axis=0) +
         label_id_offset),
    }

    if 'detection_thresholds' in detections:
        tensor_dict['detection_thresholds'] = \
            tf.squeeze(detections['detection_thresholds'], axis=0)
    if 'detection_masks' in detections:
        detection_masks = tf.squeeze(detections['detection_masks'], axis=0)
        detection_boxes = tf.squeeze(detections['detection_boxes'], axis=0)
        # TODO: This should be done in model's postprocess function ideally.
        detection_masks_reframed = ops.reframe_box_masks_to_image_masks(
            detection_masks, detection_boxes, original_image_shape[1],
            original_image_shape[2])
        detection_masks_reframed = tf.to_float(
            tf.greater(detection_masks_reframed, 0.5))

        tensor_dict['detection_masks'] = detection_masks_reframed
    # load groundtruth fields into tensor_dict
    if not ignore_groundtruth:
        normalized_gt_boxlist = box_list.BoxList(
            input_dict[fields.InputDataFields.groundtruth_boxes])
        gt_boxlist = box_list_ops.scale(normalized_gt_boxlist,
                                        tf.shape(original_image)[1],
                                        tf.shape(original_image)[2])
        groundtruth_boxes = gt_boxlist.get()
        groundtruth_classes = input_dict[
            fields.InputDataFields.groundtruth_classes]
        tensor_dict['groundtruth_boxes'] = groundtruth_boxes
        tensor_dict['groundtruth_classes'] = groundtruth_classes
        tensor_dict['area'] = input_dict[
            fields.InputDataFields.groundtruth_area]
        tensor_dict['difficult'] = input_dict[
            fields.InputDataFields.groundtruth_difficult]
        if 'detection_masks' in tensor_dict:
            tensor_dict['groundtruth_instance_masks'] = input_dict[
                fields.InputDataFields.groundtruth_instance_masks]

        # Subset annotations
        if fields.InputDataFields.groundtruth_subset in input_dict:
            tensor_dict['groundtruth_subset'] \
              = input_dict[fields.InputDataFields.groundtruth_subset]

    if calc_loss:
        losses_dict = model.loss(prediction_dict)

        for loss_name, loss_tensor in losses_dict.iteritems():
            loss_tensor = tf.check_numerics(loss_tensor,
                                            '%s is inf or nan.' % loss_name,
                                            name='Loss/' + loss_name)
            tensor_dict['Loss/' + loss_name] = loss_tensor

    # mtl groundtruth
    if mtl.window:
        tensor_dict['window_classes_gt'] = input_dict[
            fields.InputDataFields.window_classes]
        tensor_dict['window_classes_dt'] = prediction_dict[
            'window_class_predictions']
    if mtl.closeness:
        tensor_dict['closeness_gt'] = input_dict[
            fields.InputDataFields.groundtruth_closeness]
        tensor_dict['closeness_dt'] = prediction_dict['closeness_predictions']
    if mtl.edgemask:
        tensor_dict['edgemask_gt'] = input_dict[
            fields.InputDataFields.groundtruth_edgemask_masks]
        tensor_dict['edgemask_dt'] = prediction_dict['edgemask_predictions']

    return tensor_dict
Beispiel #20
0
 def _format_groundtruth_data(self, image_shape):
   groundtruth_boxlists = [
       box_list_ops.to_absolute_coordinates(
           box_list.BoxList(boxes), image_shape[1], image_shape[2], check_range=False)
       for boxes in self.groundtruth_lists(fields.BoxListFields.boxes)]
   return groundtruth_boxlists
Beispiel #21
0
def _create_losses(input_queue, create_model_fn):
  """Creates loss function for a DetectionModel.

  Args:
    input_queue: BatchQueue object holding enqueued tensor_dicts.
    create_model_fn: A function to create the DetectionModel.
  """
  detection_model = create_model_fn()
  (original_images, filenames, groundtruth_boxes_list, groundtruth_classes_list, groundtruth_transcriptions_list,
   groundtruth_masks_list
  ) = _get_inputs(input_queue, detection_model.num_classes)

  images = [detection_model.preprocess(image) for image in original_images]
  images = tf.concat(images, 0)
  if any(mask is None for mask in groundtruth_masks_list):
    groundtruth_masks_list = None

  tf.summary.image('InputImage', images, max_outputs=99999)

  print ''
  print '_create_losses'
  print original_images
  print images
  print groundtruth_boxes_list
  print groundtruth_classes_list
  print groundtruth_transcriptions_list
  sys.stdout.flush()

  detection_model.provide_groundtruth(groundtruth_boxes_list,
                                      groundtruth_classes_list,
                                      groundtruth_masks_list,
                                      groundtruth_transcriptions_list = groundtruth_transcriptions_list)
  prediction_dict = detection_model.predict(images)
  losses_dict = detection_model.loss(prediction_dict)
  for name, loss_tensor in losses_dict.iteritems():
    tf.summary.scalar(name, loss_tensor)
    tf.losses.add_loss(loss_tensor)
  print losses_dict
  sys.stdout.flush()

  # Metrics for sequence accuracy
  if prediction_dict['transcriptions'] is not None:
    tf.summary.scalar('CharAccuracy', metrics.char_accuracy(prediction_dict['transcriptions'], prediction_dict['transcriptions_groundtruth']))
    tf.summary.scalar('SequenceAccuracy', metrics.sequence_accuracy(prediction_dict['transcriptions'], prediction_dict['transcriptions_groundtruth']))

  return 

  # All the rest is for debugging and testing during training purpose. 

  # Metrics for detection
  detections = detection_model.postprocess(prediction_dict)

  original_images = original_images[0]
  filenames = filenames[0]

  original_image_shape = tf.shape(original_images)
  absolute_detection_boxlist = box_list_ops.to_absolute_coordinates(
      box_list.BoxList(tf.squeeze(detections['detection_boxes'], axis=0)),
      original_image_shape[1], original_image_shape[2])
  label_id_offset = 1
  det_boxes = absolute_detection_boxlist.get()

  det_scores = tf.squeeze(detections['detection_scores'], axis=0)
  det_classes = tf.ones_like(det_scores)
  det_transcriptions = tf.squeeze(detections['detection_transcriptions'], axis=0)

  print ''
  print 'Metrics printing'
  print groundtruth_boxes_list
  print groundtruth_classes_list
  print groundtruth_transcriptions_list

  normalized_gt_boxlist = box_list.BoxList(groundtruth_boxes_list[0])
  gt_boxlist = box_list_ops.scale(normalized_gt_boxlist, original_image_shape[1], original_image_shape[2])
  gt_boxes = gt_boxlist.get()
  gt_classes = groundtruth_classes_list[0]
  gt_transcriptions = groundtruth_transcriptions_list[0]

  print original_images
  print filenames
  print det_boxes
  print det_scores 
  print det_classes 
  print det_transcriptions
  print gt_boxes
  print gt_classes
  print gt_transcriptions
  #images = tf.Print(images, [groundtruth_boxes_list[0], xx, tf.shape(original_images[0])], message='groundtruthboxes', summarize=10000)
  sys.stdout.flush()

  mAP = tf.py_func(eval_wrapper, [original_images, filenames, det_boxes, det_scores, det_classes, det_transcriptions, gt_boxes, gt_classes, gt_transcriptions, tf.train.get_global_step()], tf.float64, stateful=False)
  tf.summary.scalar('mAP', mAP)
 def _scale_box_to_absolute(args):
     boxes, height, width = args
     return box_list_ops.to_absolute_coordinates(box_list.BoxList(boxes),
                                                 height, width).get()
Beispiel #23
0
def _scale_box_to_absolute(args):
    boxes, image_shape = args
    return box_list_ops.to_absolute_coordinates(box_list.BoxList(boxes),
                                                image_shape[0],
                                                image_shape[1]).get()
Beispiel #24
0
def _scale_box_to_absolute(args):
  boxes, image_shape = args
  return box_list_ops.to_absolute_coordinates(
      box_list.BoxList(boxes), image_shape[0], image_shape[1]).get()
Beispiel #25
0
def _extract_prediction_tensors(model,
                                create_input_dict_fn,
                                ignore_groundtruth=False):
    """Restores the model in a tensorflow session.

  Args:
    model: model to perform predictions with.
    create_input_dict_fn: function to create input tensor dictionaries.
    ignore_groundtruth: whether groundtruth should be ignored.

  Returns:
    tensor_dict: A tensor dictionary with evaluations.
  """
    input_dict = create_input_dict_fn()
    prefetch_queue = prefetcher.prefetch(input_dict, capacity=500)  # TODO
    input_dict = prefetch_queue.dequeue()
    original_image = tf.expand_dims(input_dict[fields.InputDataFields.image],
                                    0)

    next_image = input_dict.get(fields.InputDataFields.next_image)
    image_input = tf.to_float(original_image)
    if next_image is not None:
        next_image = tf.to_float(next_image)
        image_input = tf.concat(
            [image_input, tf.expand_dims(next_image, 0)], 3)
        depth = input_dict.get(fields.InputDataFields.groundtruth_depth)
        next_depth = input_dict.get(
            fields.InputDataFields.groundtruth_next_depth)
        image_input.set_shape([1, None, None, 6])
        if depth is not None and next_depth is not None:
            camera_intrinsics = input_dict[
                fields.InputDataFields.camera_intrinsics]
            coords = motion_util.get_3D_coords(tf.expand_dims(depth, 0),
                                               camera_intrinsics)
            next_coords = motion_util.get_3D_coords(
                tf.expand_dims(next_depth, 0), camera_intrinsics)
            image_input = tf.concat([image_input, coords, next_coords], 3)
            image_input.set_shape([1, None, None, 12])

    preprocessed_image = model.preprocess(image_input)
    prediction_dict = model.predict(preprocessed_image)
    detections = model.postprocess(prediction_dict)

    original_image_shape = tf.shape(original_image)
    absolute_detection_boxlist = box_list_ops.to_absolute_coordinates(
        box_list.BoxList(tf.squeeze(detections['detection_boxes'], axis=0)),
        original_image_shape[1], original_image_shape[2])
    label_id_offset = 1
    tensor_dict = {
        'original_image':
        original_image,
        'image_id':
        input_dict[fields.InputDataFields.source_id],
        'detection_boxes':
        absolute_detection_boxlist.get(),
        'detection_scores':
        tf.squeeze(detections['detection_scores'], axis=0),
        'detection_classes':
        (tf.squeeze(detections['detection_classes'], axis=0) +
         label_id_offset),
    }
    if 'detection_masks' in detections:
        detection_masks = tf.squeeze(detections['detection_masks'], axis=0)
        detection_boxes = tf.squeeze(detections['detection_boxes'], axis=0)
        # TODO: This should be done in model's postprocess function ideally.
        detection_masks_reframed = ops.reframe_box_masks_to_image_masks(
            detection_masks, detection_boxes, original_image_shape[1],
            original_image_shape[2])
        detection_masks_reframed = tf.to_float(
            tf.greater(detection_masks_reframed, 0.5))

        tensor_dict['detection_masks'] = detection_masks_reframed

    if 'detection_motions' in detections:
        detection_motions = tf.squeeze(detections['detection_motions'], axis=0)
        detection_motions_with_matrices = (
            motion_util.postprocess_detection_motions(detection_motions,
                                                      keep_logits=False))
        tensor_dict['detection_motions'] = detection_motions_with_matrices

    if 'camera_motion' in detections:
        camera_motion_with_matrices = tf.squeeze(
            motion_util.postprocess_camera_motion(detections['camera_motion']),
            axis=0)
        tensor_dict['camera_motion'] = camera_motion_with_matrices
        tensor_dict['groundtruth_camera_motion'] = input_dict[
            fields.InputDataFields.groundtruth_camera_motion]

    # load groundtruth fields into tensor_dict
    if not ignore_groundtruth:
        normalized_gt_boxlist = box_list.BoxList(
            input_dict[fields.InputDataFields.groundtruth_boxes])
        gt_boxlist = box_list_ops.scale(normalized_gt_boxlist,
                                        tf.shape(original_image)[1],
                                        tf.shape(original_image)[2])
        groundtruth_boxes = gt_boxlist.get()
        groundtruth_classes = input_dict[
            fields.InputDataFields.groundtruth_classes]
        tensor_dict['groundtruth_boxes'] = groundtruth_boxes
        tensor_dict['groundtruth_classes'] = groundtruth_classes
        tensor_dict['area'] = input_dict[
            fields.InputDataFields.groundtruth_area]
        tensor_dict['is_crowd'] = input_dict[
            fields.InputDataFields.groundtruth_is_crowd]
        tensor_dict['difficult'] = input_dict[
            fields.InputDataFields.groundtruth_difficult]
        if 'detection_masks' in tensor_dict:
            tensor_dict['groundtruth_instance_masks'] = input_dict[
                fields.InputDataFields.groundtruth_instance_masks]

        if 'detection_motions' in tensor_dict:
            tensor_dict['groundtruth_camera_motion'] = input_dict[
                fields.InputDataFields.groundtruth_camera_motion]
            tensor_dict['groundtruth_instance_motions'] = input_dict[
                fields.InputDataFields.groundtruth_instance_motions]
            tensor_dict['camera_intrinsics'] = input_dict[
                fields.InputDataFields.camera_intrinsics]
            if fields.InputDataFields.groundtruth_flow in input_dict:
                tensor_dict['groundtruth_flow'] = input_dict[
                    fields.InputDataFields.groundtruth_flow]
            if not 'depth' in tensor_dict:
                tensor_dict['depth'] = input_dict[
                    fields.InputDataFields.groundtruth_depth]
            else:
                tensor_dict['groundtruth_depth'] = input_dict[
                    fields.InputDataFields.groundtruth_depth]
    return tensor_dict
def _extract_prediction_tensors(model,
                                create_input_dict_fn,
                                ignore_groundtruth=False):
  """Restores the model in a tensorflow session.

  Args:
    model: model to perform predictions with.
    create_input_dict_fn: function to create input tensor dictionaries.
    ignore_groundtruth: whether groundtruth should be ignored.

  Returns:
    tensor_dict: A tensor dictionary with evaluations.
  """
  input_dict = create_input_dict_fn()
  prefetch_queue = prefetcher.prefetch(input_dict, capacity=500)
  input_dict = prefetch_queue.dequeue()
  original_image = tf.expand_dims(input_dict[fields.InputDataFields.image], 0)
  preprocessed_image = model.preprocess(tf.to_float(original_image))
  prediction_dict = model.predict(preprocessed_image)
  detections = model.postprocess(prediction_dict)

  original_image_shape = tf.shape(original_image)
  absolute_detection_boxlist = box_list_ops.to_absolute_coordinates(
      box_list.BoxList(tf.squeeze(detections['detection_boxes'], axis=0)),
      original_image_shape[1], original_image_shape[2])
  label_id_offset = 1
  tensor_dict = {
      'original_image': original_image,
      'image_id': input_dict[fields.InputDataFields.source_id],
      'detection_boxes': absolute_detection_boxlist.get(),
      'detection_scores': tf.squeeze(detections['detection_scores'], axis=0),
      'detection_classes': (
          tf.squeeze(detections['detection_classes'], axis=0) +
          label_id_offset),
  }
  if 'detection_masks' in detections:
    detection_masks = tf.squeeze(detections['detection_masks'],
                                 axis=0)
    detection_boxes = tf.squeeze(detections['detection_boxes'],
                                 axis=0)
    # TODO: This should be done in model's postprocess function ideally.
    detection_masks_reframed = ops.reframe_box_masks_to_image_masks(
        detection_masks,
        detection_boxes,
        original_image_shape[1], original_image_shape[2])
    detection_masks_reframed = tf.to_float(tf.greater(detection_masks_reframed,
                                                      0.5))

    tensor_dict['detection_masks'] = detection_masks_reframed
  # load groundtruth fields into tensor_dict
  if not ignore_groundtruth:
    normalized_gt_boxlist = box_list.BoxList(
        input_dict[fields.InputDataFields.groundtruth_boxes])
    gt_boxlist = box_list_ops.scale(normalized_gt_boxlist,
                                    tf.shape(original_image)[1],
                                    tf.shape(original_image)[2])
    groundtruth_boxes = gt_boxlist.get()
    groundtruth_classes = input_dict[fields.InputDataFields.groundtruth_classes]
    tensor_dict['groundtruth_boxes'] = groundtruth_boxes
    tensor_dict['groundtruth_classes'] = groundtruth_classes
    tensor_dict['area'] = input_dict[fields.InputDataFields.groundtruth_area]
    tensor_dict['is_crowd'] = input_dict[
        fields.InputDataFields.groundtruth_is_crowd]
    tensor_dict['difficult'] = input_dict[
        fields.InputDataFields.groundtruth_difficult]
    if 'detection_masks' in tensor_dict:
      tensor_dict['groundtruth_instance_masks'] = input_dict[
          fields.InputDataFields.groundtruth_instance_masks]
  return tensor_dict
Beispiel #27
0
 def _to_absolute_coordinates(normalized_boxes):
   return box_list_ops.to_absolute_coordinates(
       box_list.BoxList(normalized_boxes),
       image_shape[1], image_shape[2], check_range=False).get()
def result_dict_for_single_example(image,
                                   key,
                                   detections,
                                   groundtruth=None,
                                   class_agnostic=False,
                                   scale_to_absolute=False):
  """Merges all detection and groundtruth information for a single example.

  Note that evaluation tools require classes that are 1-indexed, and so this
  function performs the offset. If `class_agnostic` is True, all output classes
  have label 1.

  Args:
    image: A single 4D uint8 image tensor of shape [1, H, W, C].
    key: A single string tensor identifying the image.
    detections: A dictionary of detections, returned from
      DetectionModel.postprocess().
    groundtruth: (Optional) Dictionary of groundtruth items, with fields:
      'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in
        normalized coordinates.
      'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes.
      'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional)
      'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional)
      'groundtruth_difficult': [num_boxes] int64 tensor. (Optional)
      'groundtruth_group_of': [num_boxes] int64 tensor. (Optional)
      'groundtruth_instance_masks': 3D int64 tensor of instance masks
        (Optional).
    class_agnostic: Boolean indicating whether the detections are class-agnostic
      (i.e. binary). Default False.
    scale_to_absolute: Boolean indicating whether boxes and keypoints should be
      scaled to absolute coordinates. Note that for IoU based evaluations, it
      does not matter whether boxes are expressed in absolute or relative
      coordinates. Default False.

  Returns:
    A dictionary with:
    'original_image': A [1, H, W, C] uint8 image tensor.
    'key': A string tensor with image identifier.
    'detection_boxes': [max_detections, 4] float32 tensor of boxes, in
      normalized or absolute coordinates, depending on the value of
      `scale_to_absolute`.
    'detection_scores': [max_detections] float32 tensor of scores.
    'detection_classes': [max_detections] int64 tensor of 1-indexed classes.
    'detection_masks': [max_detections, H, W] float32 tensor of binarized
      masks, reframed to full image masks.
    'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in
      normalized or absolute coordinates, depending on the value of
      `scale_to_absolute`. (Optional)
    'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes.
      (Optional)
    'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional)
    'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional)
    'groundtruth_difficult': [num_boxes] int64 tensor. (Optional)
    'groundtruth_group_of': [num_boxes] int64 tensor. (Optional)
    'groundtruth_instance_masks': 3D int64 tensor of instance masks
      (Optional).

  """
  label_id_offset = 1  # Applying label id offset (b/63711816)

  input_data_fields = fields.InputDataFields
  output_dict = {
      input_data_fields.original_image: image,
      input_data_fields.key: key,
  }

  detection_fields = fields.DetectionResultFields
  detection_boxes = detections[detection_fields.detection_boxes][0]
  image_shape = tf.shape(image)
  detection_scores = detections[detection_fields.detection_scores][0]

  if class_agnostic:
    detection_classes = tf.ones_like(detection_scores, dtype=tf.int64)
  else:
    detection_classes = (
        tf.to_int64(detections[detection_fields.detection_classes][0]) +
        label_id_offset)

  num_detections = tf.to_int32(detections[detection_fields.num_detections][0])
  detection_boxes = tf.slice(
      detection_boxes, begin=[0, 0], size=[num_detections, -1])
  detection_classes = tf.slice(
      detection_classes, begin=[0], size=[num_detections])
  detection_scores = tf.slice(
      detection_scores, begin=[0], size=[num_detections])

  if scale_to_absolute:
    absolute_detection_boxlist = box_list_ops.to_absolute_coordinates(
        box_list.BoxList(detection_boxes), image_shape[1], image_shape[2])
    output_dict[detection_fields.detection_boxes] = (
        absolute_detection_boxlist.get())
  else:
    output_dict[detection_fields.detection_boxes] = detection_boxes
  output_dict[detection_fields.detection_classes] = detection_classes
  output_dict[detection_fields.detection_scores] = detection_scores

  if detection_fields.detection_masks in detections:
    detection_masks = detections[detection_fields.detection_masks][0]
    # TODO(rathodv): This should be done in model's postprocess
    # function ideally.
    detection_masks = tf.slice(
        detection_masks, begin=[0, 0, 0], size=[num_detections, -1, -1])
    detection_masks_reframed = ops.reframe_box_masks_to_image_masks(
        detection_masks, detection_boxes, image_shape[1], image_shape[2])
    detection_masks_reframed = tf.cast(
        tf.greater(detection_masks_reframed, 0.5), tf.uint8)
    output_dict[detection_fields.detection_masks] = detection_masks_reframed
  if detection_fields.detection_keypoints in detections:
    detection_keypoints = detections[detection_fields.detection_keypoints][0]
    output_dict[detection_fields.detection_keypoints] = detection_keypoints
    if scale_to_absolute:
      absolute_detection_keypoints = keypoint_ops.scale(
          detection_keypoints, image_shape[1], image_shape[2])
      output_dict[detection_fields.detection_keypoints] = (
          absolute_detection_keypoints)

  if groundtruth:
    if input_data_fields.groundtruth_instance_masks in groundtruth:
      groundtruth[input_data_fields.groundtruth_instance_masks] = tf.cast(
          groundtruth[input_data_fields.groundtruth_instance_masks], tf.uint8)
    output_dict.update(groundtruth)
    if scale_to_absolute:
      groundtruth_boxes = groundtruth[input_data_fields.groundtruth_boxes]
      absolute_gt_boxlist = box_list_ops.to_absolute_coordinates(
          box_list.BoxList(groundtruth_boxes), image_shape[1], image_shape[2])
      output_dict[input_data_fields.groundtruth_boxes] = (
          absolute_gt_boxlist.get())
    # For class-agnostic models, groundtruth classes all become 1.
    if class_agnostic:
      groundtruth_classes = groundtruth[input_data_fields.groundtruth_classes]
      groundtruth_classes = tf.ones_like(groundtruth_classes, dtype=tf.int64)
      output_dict[input_data_fields.groundtruth_classes] = groundtruth_classes

  return output_dict
Beispiel #29
0
def _extract_prediction_tensors(model,
                                create_input_dict_fn,
                                ignore_groundtruth=False):
  """Restores the model in a tensorflow session.

  Args:
    model: model to perform predictions with.
    create_input_dict_fn: function to create input tensor dictionaries.
    ignore_groundtruth: whether groundtruth should be ignored.

  Returns:
    tensor_dict: A tensor dictionary with evaluations.
  """
  input_dict = create_input_dict_fn()
  
  prefetch_queue = prefetcher.prefetch(input_dict, capacity=500)
  input_dict = prefetch_queue.dequeue()
  original_image = tf.expand_dims(input_dict[fields.InputDataFields.image], 0)
  preprocessed_image = model.preprocess(tf.to_float(original_image))
  prediction_dict = model.predict(preprocessed_image)
  detections = model.postprocess(prediction_dict)

  original_image_shape = tf.shape(original_image)
  absolute_detection_boxlist = box_list_ops.to_absolute_coordinates(
      box_list.BoxList(tf.squeeze(detections['detection_boxes'], axis=0)),
      original_image_shape[1], original_image_shape[2])
  label_id_offset = 1
  tensor_dict = {
      'original_image': original_image,
      'image_id': input_dict[fields.InputDataFields.source_id],
      'detection_boxes': absolute_detection_boxlist.get(),
      'detection_scores': tf.squeeze(detections['detection_scores'], axis=0),
      'detection_classes': (
          tf.squeeze(detections['detection_classes'], axis=0) +
          label_id_offset),
  }
  if 'detection_masks' in detections:
    detection_masks = tf.squeeze(detections['detection_masks'],
                                 axis=0)
    detection_boxes = tf.squeeze(detections['detection_boxes'],
                                 axis=0)
    # TODO: This should be done in model's postprocess function ideally.
    detection_masks_reframed = ops.reframe_box_masks_to_image_masks(
        detection_masks,
        detection_boxes,
        original_image_shape[1], original_image_shape[2])
    detection_masks_reframed = tf.to_float(tf.greater(detection_masks_reframed,
                                                      0.5))

    tensor_dict['detection_masks'] = detection_masks_reframed
  # load groundtruth fields into tensor_dict
  if not ignore_groundtruth:
    normalized_gt_boxlist = box_list.BoxList(
        input_dict[fields.InputDataFields.groundtruth_boxes])
    gt_boxlist = box_list_ops.scale(normalized_gt_boxlist,
                                    tf.shape(original_image)[1],
                                    tf.shape(original_image)[2])
    groundtruth_boxes = gt_boxlist.get()
    groundtruth_classes = input_dict[fields.InputDataFields.groundtruth_classes]
    tensor_dict['groundtruth_boxes'] = groundtruth_boxes
    tensor_dict['groundtruth_classes'] = groundtruth_classes
    tensor_dict['area'] = input_dict[fields.InputDataFields.groundtruth_area]
    tensor_dict['is_crowd'] = input_dict[
        fields.InputDataFields.groundtruth_is_crowd]
    tensor_dict['difficult'] = input_dict[
        fields.InputDataFields.groundtruth_difficult]
    if 'detection_masks' in tensor_dict:
      tensor_dict['groundtruth_instance_masks'] = input_dict[
          fields.InputDataFields.groundtruth_instance_masks]
  return tensor_dict
Beispiel #30
0
 def _to_absolute_coordinates(normalized_boxes):
     return box_list_ops.to_absolute_coordinates(
         box_list.BoxList(normalized_boxes),
         image_shape[1],
         image_shape[2],
         check_range=False).get()
Beispiel #31
0
def result_dict_for_single_example(image,
                                   key,
                                   detections,
                                   groundtruth=None,
                                   class_agnostic=False,
                                   scale_to_absolute=False):
  """Merges all detection and groundtruth information for a single example.

  Note that evaluation tools require classes that are 1-indexed, and so this
  function performs the offset. If `class_agnostic` is True, all output classes
  have label 1.

  Args:
    image: A single 4D uint8 image tensor of shape [1, H, W, C].
    key: A single string tensor identifying the image.
    detections: A dictionary of detections, returned from
      DetectionModel.postprocess().
    groundtruth: (Optional) Dictionary of groundtruth items, with fields:
      'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in
        normalized coordinates.
      'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes.
      'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional)
      'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional)
      'groundtruth_difficult': [num_boxes] int64 tensor. (Optional)
      'groundtruth_group_of': [num_boxes] int64 tensor. (Optional)
      'groundtruth_instance_masks': 3D int64 tensor of instance masks
        (Optional).
    class_agnostic: Boolean indicating whether the detections are class-agnostic
      (i.e. binary). Default False.
    scale_to_absolute: Boolean indicating whether boxes and keypoints should be
      scaled to absolute coordinates. Note that for IoU based evaluations, it
      does not matter whether boxes are expressed in absolute or relative
      coordinates. Default False.

  Returns:
    A dictionary with:
    'original_image': A [1, H, W, C] uint8 image tensor.
    'key': A string tensor with image identifier.
    'detection_boxes': [max_detections, 4] float32 tensor of boxes, in
      normalized or absolute coordinates, depending on the value of
      `scale_to_absolute`.
    'detection_scores': [max_detections] float32 tensor of scores.
    'detection_classes': [max_detections] int64 tensor of 1-indexed classes.
    'detection_masks': [max_detections, H, W] float32 tensor of binarized
      masks, reframed to full image masks.
    'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in
      normalized or absolute coordinates, depending on the value of
      `scale_to_absolute`. (Optional)
    'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes.
      (Optional)
    'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional)
    'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional)
    'groundtruth_difficult': [num_boxes] int64 tensor. (Optional)
    'groundtruth_group_of': [num_boxes] int64 tensor. (Optional)
    'groundtruth_instance_masks': 3D int64 tensor of instance masks
      (Optional).

  """
  label_id_offset = 1  # Applying label id offset (b/63711816)

  input_data_fields = fields.InputDataFields
  output_dict = {
      input_data_fields.original_image: image,
      input_data_fields.key: key,
  }

  detection_fields = fields.DetectionResultFields
  detection_boxes = detections[detection_fields.detection_boxes][0]
  image_shape = tf.shape(image)
  detection_scores = detections[detection_fields.detection_scores][0]

  if class_agnostic:
    detection_classes = tf.ones_like(detection_scores, dtype=tf.int64)
  else:
    detection_classes = (
        tf.to_int64(detections[detection_fields.detection_classes][0]) +
        label_id_offset)

  num_detections = tf.to_int32(detections[detection_fields.num_detections][0])
  detection_boxes = tf.slice(
      detection_boxes, begin=[0, 0], size=[num_detections, -1])
  detection_classes = tf.slice(
      detection_classes, begin=[0], size=[num_detections])
  detection_scores = tf.slice(
      detection_scores, begin=[0], size=[num_detections])

  if scale_to_absolute:
    absolute_detection_boxlist = box_list_ops.to_absolute_coordinates(
        box_list.BoxList(detection_boxes), image_shape[1], image_shape[2])
    output_dict[detection_fields.detection_boxes] = (
        absolute_detection_boxlist.get())
  else:
    output_dict[detection_fields.detection_boxes] = detection_boxes
  output_dict[detection_fields.detection_classes] = detection_classes
  output_dict[detection_fields.detection_scores] = detection_scores

  if detection_fields.detection_masks in detections:
    detection_masks = detections[detection_fields.detection_masks][0]
    # TODO(rathodv): This should be done in model's postprocess
    # function ideally.
    detection_masks = tf.slice(
        detection_masks, begin=[0, 0, 0], size=[num_detections, -1, -1])
    detection_masks_reframed = ops.reframe_box_masks_to_image_masks(
        detection_masks, detection_boxes, image_shape[1], image_shape[2])
    detection_masks_reframed = tf.cast(
        tf.greater(detection_masks_reframed, 0.5), tf.uint8)
    output_dict[detection_fields.detection_masks] = detection_masks_reframed
  if detection_fields.detection_keypoints in detections:
    detection_keypoints = detections[detection_fields.detection_keypoints][0]
    output_dict[detection_fields.detection_keypoints] = detection_keypoints
    if scale_to_absolute:
      absolute_detection_keypoints = keypoint_ops.scale(
          detection_keypoints, image_shape[1], image_shape[2])
      output_dict[detection_fields.detection_keypoints] = (
          absolute_detection_keypoints)

  if groundtruth:
    if input_data_fields.groundtruth_instance_masks in groundtruth:
      groundtruth[input_data_fields.groundtruth_instance_masks] = tf.cast(
          groundtruth[input_data_fields.groundtruth_instance_masks], tf.uint8)
    output_dict.update(groundtruth)
    if scale_to_absolute:
      groundtruth_boxes = groundtruth[input_data_fields.groundtruth_boxes]
      absolute_gt_boxlist = box_list_ops.to_absolute_coordinates(
          box_list.BoxList(groundtruth_boxes), image_shape[1], image_shape[2])
      output_dict[input_data_fields.groundtruth_boxes] = (
          absolute_gt_boxlist.get())
    # For class-agnostic models, groundtruth classes all become 1.
    if class_agnostic:
      groundtruth_classes = groundtruth[input_data_fields.groundtruth_classes]
      groundtruth_classes = tf.ones_like(groundtruth_classes, dtype=tf.int64)
      output_dict[input_data_fields.groundtruth_classes] = groundtruth_classes

  return output_dict