コード例 #1
0
ファイル: roi_ops.py プロジェクト: zzm422/tpu
def _propose_rois_tpu(scores, boxes, anchor_boxes, height, width, scale,
                      rpn_pre_nms_topn, rpn_post_nms_topn, rpn_nms_threshold,
                      rpn_min_size, bbox_reg_weights):
    """Proposes RoIs giva group of candidates (TPU version).

  Args:
    scores: a tensor with a shape of [batch_size, num_boxes].
    boxes: a tensor with a shape of [batch_size, num_boxes, 4],
      in the encoded form.
    anchor_boxes: an Anchors object that contains the anchors with a shape of
      [batch_size, num_boxes, 4].
    height: a tensor of shape [batch_size, 1, 1] representing the image height.
    width: a tensor of shape [batch_size, 1, 1] representing the image width.
    scale: a tensor of shape [batch_size, 1, 1] representing the image scale.
    rpn_pre_nms_topn: a integer number of top scoring RPN proposals to keep
      before applying NMS. This is *per FPN level* (not total).
    rpn_post_nms_topn: a integer number of top scoring RPN proposals to keep
      after applying NMS. This is the total number of RPN proposals produced.
    rpn_nms_threshold: a float number between 0 and 1 as the NMS threshold
      used on RPN proposals.
    rpn_min_size: a integer number as the minimum proposal height and width as
      both need to be greater than this number. Note that this number is at
      origingal image scale; not scale used during training or inference).
    bbox_reg_weights: None or a list of four integer specifying the weights used
      when decoding the box.

  Returns:
    scores: a tensor with a shape of [batch_size, rpn_post_nms_topn, 1]
      representing the scores of the proposals. It has same dtype as input
      scores.
    boxes: a tensor with a shape of [batch_size, rpn_post_nms_topn, 4]
      represneting the boxes of the proposals. The boxes are in normalized
      coordinates with a form of [ymin, xmin, ymax, xmax]. It has same dtype as
      input boxes.

  """
    _, num_boxes = scores.get_shape().as_list()

    topk_limit = (num_boxes
                  if num_boxes < rpn_pre_nms_topn else rpn_pre_nms_topn)
    scores, boxes_list = box_utils.top_k(scores,
                                         k=topk_limit,
                                         boxes_list=[boxes, anchor_boxes])
    boxes = boxes_list[0]
    anchor_boxes = boxes_list[1]

    # Decode boxes w.r.t. anchors and transform to the absoluate coordinates.
    boxes = box_utils.decode_boxes(boxes, anchor_boxes, bbox_reg_weights)

    # Clip boxes that exceed the boundary.
    boxes = box_utils.clip_boxes(boxes, height, width)

    # Filter boxes that one side is less than rpn_min_size threshold.
    boxes, scores = box_utils.filter_boxes(boxes,
                                           tf.expand_dims(scores, axis=-1),
                                           rpn_min_size, height, width, scale)
    scores = tf.squeeze(scores, axis=-1)

    post_nms_topk_limit = (topk_limit if topk_limit < rpn_post_nms_topn else
                           rpn_post_nms_topn)
    # NMS.
    if rpn_nms_threshold > 0:
        scores, boxes = box_utils.sorted_non_max_suppression_padded(
            scores,
            boxes,
            max_output_size=post_nms_topk_limit,
            iou_threshold=rpn_nms_threshold)

    # Pick top-K post NMS'ed boxes.
    scores, boxes = box_utils.top_k(scores,
                                    k=post_nms_topk_limit,
                                    boxes_list=[boxes])
    boxes = boxes[0]
    return scores, boxes
コード例 #2
0
ファイル: roi_ops.py プロジェクト: zzm422/tpu
def multilevel_propose_rois(scores_outputs,
                            box_outputs,
                            all_anchors,
                            image_info,
                            rpn_pre_nms_topn,
                            rpn_post_nms_topn,
                            rpn_nms_threshold,
                            rpn_min_size,
                            bbox_reg_weights,
                            use_batched_nms=False):
    """Proposes RoIs given a group of candidates from different FPN levels.

  Args:
    scores_outputs: an OrderDict with keys representing levels and values
      representing logits in [batch_size, height, width, num_anchors].
    box_outputs: an OrderDict with keys representing levels and values
      representing box regression targets in
      [batch_size, height, width, num_anchors * 4]
    all_anchors: an Anchors object that contains the all anchors.
    image_info: a tensor of shape [batch_size, 5] where the three columns
      encode the input image's [height, width, scale,
      original_height, original_width]. Height and width are for
      the input to the network, not the original image; scale is the scale
      factor used to scale the network input size to the original image size.
      See dataloader.DetectionInputProcessor for details. The last two are
      original height and width. See dataloader.DetectionInputProcessor for
      details.
    rpn_pre_nms_topn: a integer number of top scoring RPN proposals to keep
      before applying NMS. This is *per FPN level* (not total).
    rpn_post_nms_topn: a integer number of top scoring RPN proposals to keep
      after applying NMS. This is the total number of RPN proposals produced.
    rpn_nms_threshold: a float number between 0 and 1 as the NMS threshold
      used on RPN proposals.
    rpn_min_size: a integer number as the minimum proposal height and width as
      both need to be greater than this number. Note that this number is at
      origingal image scale; not scale used during training or inference).
    bbox_reg_weights: None or a list of four integer specifying the weights used
      when decoding the box.
    use_batched_nms: whether use batched nms. The batched nms will use
      tf.combined_non_max_suppression, which is only available for CPU/GPU.

  Returns:
    scores: a tensor with a shape of [batch_size, rpn_post_nms_topn, 1]
      representing the scores of the proposals.
    rois: a tensor with a shape of [batch_size, rpn_post_nms_topn, 4]
      representing the boxes of the proposals. The boxes are in normalized
      coordinates with a form of [ymin, xmin, ymax, xmax].
  """
    with tf.name_scope('multilevel_propose_rois'):
        levels = scores_outputs.keys()
        scores = []
        rois = []
        anchor_boxes = all_anchors.get_unpacked_boxes()

        height = tf.expand_dims(image_info[:, 0:1], axis=-1)
        width = tf.expand_dims(image_info[:, 1:2], axis=-1)
        scale = tf.expand_dims(image_info[:, 2:3], axis=-1)

        for level in levels:
            with tf.name_scope('level_%d' % level):
                batch_size, feature_h, feature_w, num_anchors_per_location = (
                    scores_outputs[level].get_shape().as_list())
                num_boxes = feature_h * feature_w * num_anchors_per_location

                this_level_scores = tf.reshape(scores_outputs[level],
                                               [batch_size, num_boxes])
                this_level_scores = tf.sigmoid(this_level_scores)
                this_level_boxes = tf.reshape(box_outputs[level],
                                              [batch_size, num_boxes, 4])
                this_level_anchors = tf.cast(tf.reshape(
                    tf.expand_dims(anchor_boxes[level], axis=0) *
                    tf.ones([batch_size, 1, 1, 1]),
                    [batch_size, num_boxes, 4]),
                                             dtype=this_level_scores.dtype)

                if use_batched_nms:
                    propose_rois_fn = _propose_rois_gpu
                else:
                    propose_rois_fn = _propose_rois_tpu
                this_level_scores, this_level_boxes = propose_rois_fn(
                    this_level_scores, this_level_boxes, this_level_anchors,
                    height, width, scale, rpn_pre_nms_topn, rpn_post_nms_topn,
                    rpn_nms_threshold, rpn_min_size, bbox_reg_weights)

                scores.append(this_level_scores)
                rois.append(this_level_boxes)
        scores = tf.concat(scores, axis=1)
        rois = tf.concat(rois, axis=1)

        with tf.name_scope('roi_post_nms_topk'):
            post_nms_num_anchors = scores.shape[1]
            post_nms_topk_limit = min(post_nms_num_anchors, rpn_post_nms_topn)
            top_k_scores, top_k_rois = box_utils.top_k(scores,
                                                       k=post_nms_topk_limit,
                                                       boxes_list=[rois])
            top_k_rois = top_k_rois[0]
        return top_k_scores, top_k_rois
コード例 #3
0
ファイル: roi_ops.py プロジェクト: zzm422/tpu
def _propose_rois_gpu(scores, boxes, anchor_boxes, height, width, scale,
                      rpn_pre_nms_topn, rpn_post_nms_topn, rpn_nms_threshold,
                      rpn_min_size, bbox_reg_weights):
    """Proposes RoIs giva group of candidates (GPU version).

  Args:
    scores: a tensor with a shape of [batch_size, num_boxes].
    boxes: a tensor with a shape of [batch_size, num_boxes, 4],
      in the encoded form.
    anchor_boxes: an Anchors object that contains the anchors with a shape of
      [batch_size, num_boxes, 4].
    height: a tensor of shape [batch_size, 1, 1] representing the image height.
    width: a tensor of shape [batch_size, 1, 1] representing the image width.
    scale: a tensor of shape [batch_size, 1, 1] representing the image scale.
    rpn_pre_nms_topn: a integer number of top scoring RPN proposals to keep
      before applying NMS. This is *per FPN level* (not total).
    rpn_post_nms_topn: a integer number of top scoring RPN proposals to keep
      after applying NMS. This is the total number of RPN proposals produced.
    rpn_nms_threshold: a float number between 0 and 1 as the NMS threshold
      used on RPN proposals.
    rpn_min_size: a integer number as the minimum proposal height and width as
      both need to be greater than this number. Note that this number is at
      origingal image scale; not scale used during training or inference).
    bbox_reg_weights: None or a list of four integer specifying the weights used
      when decoding the box.

  Returns:
    scores: a tensor with a shape of [batch_size, rpn_post_nms_topn, 1]
      representing the scores of the proposals. It has same dtype as input
      scores.
    boxes: a tensor with a shape of [batch_size, rpn_post_nms_topn, 4]
      represneting the boxes of the proposals. The boxes are in normalized
      coordinates with a form of [ymin, xmin, ymax, xmax]. It has same dtype as
      input boxes.
  """
    batch_size, num_boxes = scores.get_shape().as_list()

    topk_limit = min(num_boxes, rpn_pre_nms_topn)
    boxes = box_utils.decode_boxes(boxes, anchor_boxes, bbox_reg_weights)
    boxes = box_utils.clip_boxes(boxes, height, width)

    if rpn_min_size > 0.0:
        boxes, scores = box_utils.filter_boxes(boxes,
                                               tf.expand_dims(scores, axis=-1),
                                               rpn_min_size, height, width,
                                               scale)
        scores = tf.squeeze(scores, axis=-1)

    post_nms_topk_limit = (topk_limit if topk_limit < rpn_post_nms_topn else
                           rpn_post_nms_topn)
    if rpn_nms_threshold > 0:
        # Normalize coordinates as combined_non_max_suppression currently
        # only support normalized coordinates.
        pre_nms_boxes = box_utils.to_normalized_coordinates(
            boxes, height, width)
        pre_nms_boxes = tf.reshape(pre_nms_boxes,
                                   [batch_size, num_boxes, 1, 4])
        pre_nms_scores = tf.reshape(scores, [batch_size, num_boxes, 1])
        boxes, scores, _, _ = tf.image.combined_non_max_suppression(
            pre_nms_boxes,
            pre_nms_scores,
            max_output_size_per_class=topk_limit,
            max_total_size=post_nms_topk_limit,
            iou_threshold=rpn_nms_threshold,
            score_threshold=0.0,
            pad_per_class=False)
        boxes = box_utils.to_absolute_coordinates(boxes, height, width)
    else:
        scores, boxes = box_utils.top_k(scores,
                                        k=post_nms_topk_limit,
                                        boxes_list=[boxes])
        boxes = boxes[0]

    return scores, boxes
コード例 #4
0
def proposal_op(scores_outputs, box_outputs, all_anchors, image_info,
                rpn_pre_nms_topn, rpn_post_nms_topn, rpn_nms_threshold,
                rpn_min_size):
    """Proposes RoIs for the second stage nets.

  This proposal op performs the following operations.
    1. propose rois at each level.
    2. collect all proposals.
    3. keep rpn_post_nms_topn proposals by their sorted scores from the highest
       to the lowest.
  Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/ops/collect_and_distribute_fpn_rpn_proposals.py  # pylint: disable=line-too-long

  Args:
    scores_outputs: an OrderDict with keys representing levels and values
      representing logits in [batch_size, height, width, num_anchors].
    box_outputs: an OrderDict with keys representing levels and values
      representing box regression targets in
      [batch_size, height, width, num_anchors * 4]
    all_anchors: an Anchors object that contains the all anchors.
    image_info: a tensor of shape [batch_size, 5] where the three columns
      encode the input image's [height, width, scale,
      original_height, original_width]. Height and width are for
      the input to the network, not the original image; scale is the scale
      factor used to scale the network input size to the original image size.
      See dataloader.DetectionInputProcessor for details. The last two are
      original height and width. See dataloader.DetectionInputProcessor for
      details.
    rpn_pre_nms_topn: a integer number of top scoring RPN proposals to keep
      before applying NMS. This is *per FPN level* (not total).
    rpn_post_nms_topn: a integer number of top scoring RPN proposals to keep
      after applying NMS. This is the total number of RPN proposals produced.
    rpn_nms_threshold: a float number between 0 and 1 as the NMS threshold
      used on RPN proposals.
    rpn_min_size: a integer number as the minimum proposal height and width as
      both need to be greater than this number. Note that this number is at
      origingal image scale; not scale used during training or inference).
  Returns:
    scores: a tensor with a shape of [batch_size, rpn_post_nms_topn, 1]
      representing the scores of the proposals.
    rois: a tensor with a shape of [batch_size, rpn_post_nms_topn, 4]
      representing the boxes of the proposals. The boxes are in normalized
      coordinates with a form of [ymin, xmin, ymax, xmax].
  """
    with tf.name_scope('proposal'):
        levels = scores_outputs.keys()
        scores = []
        rois = []
        anchor_boxes = all_anchors.get_unpacked_boxes()
        for level in levels:
            # Expands the batch dimension for anchors as anchors do not have batch
            # dimension. Note that batch_size is invariant across levels.
            batch_size = scores_outputs[level].shape[0]
            anchor_boxes_batch = tf.cast(tf.tile(
                tf.expand_dims(anchor_boxes[level], axis=0),
                [batch_size, 1, 1, 1]),
                                         dtype=scores_outputs[level].dtype)
            scores_per_level, boxes_per_level = _proposal_op_per_level(
                scores_outputs[level], box_outputs[level], anchor_boxes_batch,
                image_info, rpn_pre_nms_topn, rpn_post_nms_topn,
                rpn_nms_threshold, rpn_min_size, level)
            scores.append(scores_per_level)
            rois.append(boxes_per_level)
        scores = tf.concat(scores, axis=1)
        rois = tf.concat(rois, axis=1)

        with tf.name_scope('post_nms_topk'):
            # Selects the top-k rois, k being rpn_post_nms_topn or the number of total
            # anchors after non-max suppression.
            post_nms_num_anchors = scores.shape[1]
            post_nms_topk_limit = (post_nms_num_anchors
                                   if post_nms_num_anchors < rpn_post_nms_topn
                                   else rpn_post_nms_topn)

            top_k_scores, top_k_rois = box_utils.top_k(scores,
                                                       k=post_nms_topk_limit,
                                                       boxes_list=[rois])
            top_k_rois = top_k_rois[0]
        top_k_scores = tf.stop_gradient(top_k_scores)
        top_k_rois = tf.stop_gradient(top_k_rois)
        return top_k_scores, top_k_rois
コード例 #5
0
def _proposal_op_per_level(scores, boxes, anchor_boxes, image_info,
                           rpn_pre_nms_topn, rpn_post_nms_topn,
                           rpn_nms_threshold, rpn_min_size, level):
    """Proposes RoIs for the second stage nets.

  This proposal op performs the following operations.
    1. for each location i in a (H, W) grid:
         generate A anchor boxes centered on cell i
         apply predicted bbox deltas to each of the A anchors at cell i
    2. clip predicted boxes to image
    3. remove predicted boxes with either height or width < threshold
    4. sort all (proposal, score) pairs by score from highest to lowest
    5. take the top rpn_pre_nms_topn proposals before NMS
    6. apply NMS with a loose threshold (0.7) to the remaining proposals
    7. take after_nms_topN proposals after NMS
    8. return the top proposals
  Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/ops/generate_proposals.py  # pylint: disable=line-too-long

  Args:
    scores: a tensor with a shape of
      [batch_size, height, width, num_anchors].
    boxes: a tensor with a shape of
      [batch_size, height, width, num_anchors * 4], in the encoded form.
    anchor_boxes: an Anchors object that contains the anchors with a shape of
      [batch_size, height, width, num_anchors * 4].
    image_info: a tensor of shape [batch_size, 5] where the three columns
      encode the input image's [height, width, scale,
      original_height, original_width]. Height and width are for
      the input to the network, not the original image; scale is the scale
      factor used to scale the network input size to the original image size.
      See dataloader.DetectionInputProcessor for details. The last two are
      original height and width. See dataloader.DetectionInputProcessor for
      details.
    rpn_pre_nms_topn: a integer number of top scoring RPN proposals to keep
      before applying NMS. This is *per FPN level* (not total).
    rpn_post_nms_topn: a integer number of top scoring RPN proposals to keep
      after applying NMS. This is the total number of RPN proposals produced.
    rpn_nms_threshold: a float number between 0 and 1 as the NMS threshold
      used on RPN proposals.
    rpn_min_size: a integer number as the minimum proposal height and width as
      both need to be greater than this number. Note that this number is at
      origingal image scale; not scale used during training or inference).
    level: a integer number for the level that the function operates on.
  Returns:
    scores: a tensor with a shape of [batch_size, rpn_post_nms_topn, 1]
      representing the scores of the proposals. It has same dtype as input
      scores.
    boxes: a tensor with a shape of [batch_size, rpn_post_nms_topn, 4]
      represneting the boxes of the proposals. The boxes are in normalized
      coordinates with a form of [ymin, xmin, ymax, xmax]. It has same dtype as
      input boxes.

  """
    with tf.name_scope('proposal-l%d' % level):
        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take the top rpn_pre_nms_topn proposals before NMS
        batch_size, h, w, num_anchors = scores.get_shape().as_list()
        scores = tf.reshape(scores, [batch_size, -1])
        boxes = tf.reshape(boxes, [batch_size, -1, 4])
        # Map scores to [0, 1] for convenince of setting min score.
        scores = tf.sigmoid(scores)

        topk_limit = (h * w *
                      num_anchors if h * w * num_anchors < rpn_pre_nms_topn
                      else rpn_pre_nms_topn)
        anchor_boxes = tf.reshape(anchor_boxes, [batch_size, -1, 4])
        scores, boxes_list = box_utils.top_k(scores,
                                             k=topk_limit,
                                             boxes_list=[boxes, anchor_boxes])
        boxes = boxes_list[0]
        anchor_boxes = boxes_list[1]

        # Transforms anchors into proposals via bbox transformations.
        boxes = box_utils.batch_decode_box_outputs_op(anchor_boxes, boxes)

        # 2. clip proposals to image (may result in proposals with zero area
        # that will be removed in the next step)
        boxes = box_utils.clip_boxes(boxes, image_info[:, :2])

        # 3. remove predicted boxes with either height or width < min_size
        scores, boxes = box_utils.filter_boxes(scores, boxes, rpn_min_size,
                                               image_info)

        # 6. apply loose nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        post_nms_topk_limit = (topk_limit if topk_limit < rpn_post_nms_topn
                               else rpn_post_nms_topn)
        if rpn_nms_threshold > 0:
            scores, boxes = box_utils.sorted_non_max_suppression_padded(
                scores,
                boxes,
                max_output_size=post_nms_topk_limit,
                iou_threshold=rpn_nms_threshold)

        scores, boxes = box_utils.top_k(scores,
                                        k=post_nms_topk_limit,
                                        boxes_list=[boxes])
        boxes = boxes[0]
        return scores, boxes
コード例 #6
0
def generate_detections_per_image_op(
    cls_outputs, box_outputs, anchor_boxes, image_id, image_info,
    num_detections=100, pre_nms_num_detections=1000, nms_threshold=0.3,
    bbox_reg_weights=(10., 10., 5., 5.)):
  """Generates detections with model outputs and anchors.

  Args:
    cls_outputs: a Tensor with shape [N, num_classes], which stacks class
      logit outputs on all feature levels. The N is the number of total anchors
      on all levels. The num_classes is the number of classes predicted by the
      model. Note that the cls_outputs should be the output of softmax().
    box_outputs: a Tensor with shape [N, num_classes*4], which stacks
      box regression outputs on all feature levels. The N is the number of total
      anchors on all levels.
    anchor_boxes: a Tensor with shape [N, 4], which stacks anchors on all
      feature levels. The N is the number of total anchors on all levels.
    image_id: an integer number to specify the image id.
    image_info: a tensor of shape [5] which encodes the input image's [height,
      width, scale, original_height, original_width]
    num_detections: Number of detections after NMS.
    pre_nms_num_detections: Number of candidates before NMS.
    nms_threshold: a float number to specify the threshold of NMS.
    bbox_reg_weights: a list of 4 float scalars, which are default weights on
      (dx, dy, dw, dh) for normalizing bbox regression targets.
  Returns:
    detections: detection results in a tensor with each row representing
      [image_id, ymin, xmin, ymax, xmax, score, class]
  """
  num_boxes, num_classes = cls_outputs.get_shape().as_list()

  # Removes background class scores.
  cls_outputs = cls_outputs[:, 1:num_classes]
  top_k_scores, top_k_indices_with_classes = tf.nn.top_k(
      tf.reshape(cls_outputs, [-1]),
      k=pre_nms_num_detections,
      sorted=True)
  classes = tf.mod(top_k_indices_with_classes, num_classes - 1)
  top_k_indices = tf.floordiv(top_k_indices_with_classes, num_classes - 1)

  anchor_boxes = tf.gather(anchor_boxes, top_k_indices)
  box_outputs = tf.reshape(
      box_outputs, [num_boxes, num_classes, 4])[:, 1:num_classes, :]
  box_outputs = tf.gather_nd(box_outputs,
                             tf.stack([top_k_indices, classes], axis=1))

  # Applies bounding box regression to anchors.
  boxes = box_utils.batch_decode_box_outputs_op(
      tf.expand_dims(anchor_boxes, axis=0),
      tf.expand_dims(box_outputs, axis=0),
      bbox_reg_weights)[0]
  boxes = box_utils.clip_boxes(
      tf.expand_dims(boxes, axis=0), tf.expand_dims(image_info[:2], axis=0))[0]

  classes = tf.tile(tf.reshape(classes, [1, pre_nms_num_detections]),
                    [num_classes - 1, 1])
  scores = tf.tile(tf.reshape(top_k_scores, [1, pre_nms_num_detections]),
                   [num_classes - 1, 1])
  boxes = tf.tile(tf.reshape(boxes, [1, pre_nms_num_detections, 4]),
                  [num_classes - 1, 1, 1])

  class_bitmask = tf.tile(
      tf.reshape(tf.range(num_classes-1), [num_classes - 1, 1]),
      [1, pre_nms_num_detections])
  scores = tf.where(tf.equal(classes, class_bitmask), scores,
                    tf.zeros_like(scores))
  scores = tf.where(tf.greater(scores, 0.05), scores, tf.zeros_like(scores))
  # Reshape classes to be compartible with the top_k function.
  classes = tf.reshape(classes, [num_classes -1, pre_nms_num_detections, 1])
  scores, sorted_tensors = box_utils.top_k(
      scores, k=pre_nms_num_detections, tensors=[boxes, classes])
  boxes = sorted_tensors[0]
  classes = tf.reshape(sorted_tensors[1],
                       [num_classes - 1, pre_nms_num_detections])

  (post_nms_scores,
   post_nms_boxes, idx) = non_max_suppression.non_max_suppression_padded(
       scores, boxes, max_output_size=num_detections,
       iou_threshold=nms_threshold, level=0)

  # Sorts all results.
  sorted_scores, sorted_indices = tf.nn.top_k(
      tf.to_float(tf.reshape(post_nms_scores, [-1])),
      k=num_detections,
      sorted=True)
  post_nms_boxes = tf.gather(tf.reshape(post_nms_boxes, [-1, 4]),
                             sorted_indices)
  classes = tf.batch_gather(classes, idx)
  post_nms_classes = tf.gather(tf.reshape(classes, [-1]), sorted_indices) + 1

  if isinstance(image_id, int):
    image_id = tf.constant(image_id)
  image_id = tf.reshape(image_id, [])
  detections_result = tf.stack(
      [
          tf.to_float(tf.fill(tf.shape(sorted_scores), image_id)),
          post_nms_boxes[:, 0],
          post_nms_boxes[:, 1],
          post_nms_boxes[:, 2],
          post_nms_boxes[:, 3],
          sorted_scores,
          tf.to_float(post_nms_classes),
      ],
      axis=1)
  return detections_result