Exemple #1
0
def _propose_rois_gpu(scores, boxes, anchor_boxes, height, width, scale,
                      rpn_pre_nms_topn, rpn_post_nms_topn, rpn_nms_threshold,
                      rpn_min_size, bbox_reg_weights):
    """Proposes RoIs giva group of candidates (GPU version).

  Args:
    scores: a tensor with a shape of [batch_size, num_boxes].
    boxes: a tensor with a shape of [batch_size, num_boxes, 4],
      in the encoded form.
    anchor_boxes: an Anchors object that contains the anchors with a shape of
      [batch_size, num_boxes, 4].
    height: a tensor of shape [batch_size, 1, 1] representing the image height.
    width: a tensor of shape [batch_size, 1, 1] representing the image width.
    scale: a tensor of shape [batch_size, 1, 1] representing the image scale.
    rpn_pre_nms_topn: a integer number of top scoring RPN proposals to keep
      before applying NMS. This is *per FPN level* (not total).
    rpn_post_nms_topn: a integer number of top scoring RPN proposals to keep
      after applying NMS. This is the total number of RPN proposals produced.
    rpn_nms_threshold: a float number between 0 and 1 as the NMS threshold
      used on RPN proposals.
    rpn_min_size: a integer number as the minimum proposal height and width as
      both need to be greater than this number. Note that this number is at
      origingal image scale; not scale used during training or inference).
    bbox_reg_weights: None or a list of four integer specifying the weights used
      when decoding the box.

  Returns:
    scores: a tensor with a shape of [batch_size, rpn_post_nms_topn, 1]
      representing the scores of the proposals. It has same dtype as input
      scores.
    boxes: a tensor with a shape of [batch_size, rpn_post_nms_topn, 4]
      represneting the boxes of the proposals. The boxes are in normalized
      coordinates with a form of [ymin, xmin, ymax, xmax]. It has same dtype as
      input boxes.
  """
    batch_size, num_boxes = scores.get_shape().as_list()

    topk_limit = min(num_boxes, rpn_pre_nms_topn)
    boxes = box_utils.decode_boxes(boxes, anchor_boxes, bbox_reg_weights)
    boxes = box_utils.clip_boxes(boxes, height, width)

    if rpn_min_size > 0.0:
        boxes, scores = box_utils.filter_boxes(boxes,
                                               tf.expand_dims(scores, axis=-1),
                                               rpn_min_size, height, width,
                                               scale)
        scores = tf.squeeze(scores, axis=-1)

    post_nms_topk_limit = (topk_limit if topk_limit < rpn_post_nms_topn else
                           rpn_post_nms_topn)
    if rpn_nms_threshold > 0:
        # Normalize coordinates as combined_non_max_suppression currently
        # only support normalized coordinates.
        pre_nms_boxes = box_utils.to_normalized_coordinates(
            boxes, height, width)
        pre_nms_boxes = tf.reshape(pre_nms_boxes,
                                   [batch_size, num_boxes, 1, 4])
        pre_nms_scores = tf.reshape(scores, [batch_size, num_boxes, 1])
        boxes, scores, _, _ = tf.image.combined_non_max_suppression(
            pre_nms_boxes,
            pre_nms_scores,
            max_output_size_per_class=topk_limit,
            max_total_size=post_nms_topk_limit,
            iou_threshold=rpn_nms_threshold,
            score_threshold=0.0,
            pad_per_class=False)
        boxes = box_utils.to_absolute_coordinates(boxes, height, width)
    else:
        scores, boxes = box_utils.top_k(scores,
                                        k=post_nms_topk_limit,
                                        boxes_list=[boxes])
        boxes = boxes[0]

    return scores, boxes
Exemple #2
0
def _propose_rois_tpu(scores, boxes, anchor_boxes, height, width, scale,
                      rpn_pre_nms_topn, rpn_post_nms_topn, rpn_nms_threshold,
                      rpn_min_size, bbox_reg_weights):
    """Proposes RoIs giva group of candidates (TPU version).

  Args:
    scores: a tensor with a shape of [batch_size, num_boxes].
    boxes: a tensor with a shape of [batch_size, num_boxes, 4],
      in the encoded form.
    anchor_boxes: an Anchors object that contains the anchors with a shape of
      [batch_size, num_boxes, 4].
    height: a tensor of shape [batch_size, 1, 1] representing the image height.
    width: a tensor of shape [batch_size, 1, 1] representing the image width.
    scale: a tensor of shape [batch_size, 1, 1] representing the image scale.
    rpn_pre_nms_topn: a integer number of top scoring RPN proposals to keep
      before applying NMS. This is *per FPN level* (not total).
    rpn_post_nms_topn: a integer number of top scoring RPN proposals to keep
      after applying NMS. This is the total number of RPN proposals produced.
    rpn_nms_threshold: a float number between 0 and 1 as the NMS threshold
      used on RPN proposals.
    rpn_min_size: a integer number as the minimum proposal height and width as
      both need to be greater than this number. Note that this number is at
      origingal image scale; not scale used during training or inference).
    bbox_reg_weights: None or a list of four integer specifying the weights used
      when decoding the box.

  Returns:
    scores: a tensor with a shape of [batch_size, rpn_post_nms_topn, 1]
      representing the scores of the proposals. It has same dtype as input
      scores.
    boxes: a tensor with a shape of [batch_size, rpn_post_nms_topn, 4]
      represneting the boxes of the proposals. The boxes are in normalized
      coordinates with a form of [ymin, xmin, ymax, xmax]. It has same dtype as
      input boxes.

  """
    _, num_boxes = scores.get_shape().as_list()

    topk_limit = (num_boxes
                  if num_boxes < rpn_pre_nms_topn else rpn_pre_nms_topn)
    scores, boxes_list = box_utils.top_k(scores,
                                         k=topk_limit,
                                         boxes_list=[boxes, anchor_boxes])
    boxes = boxes_list[0]
    anchor_boxes = boxes_list[1]

    # Decode boxes w.r.t. anchors and transform to the absoluate coordinates.
    boxes = box_utils.decode_boxes(boxes, anchor_boxes, bbox_reg_weights)

    # Clip boxes that exceed the boundary.
    boxes = box_utils.clip_boxes(boxes, height, width)

    # Filter boxes that one side is less than rpn_min_size threshold.
    boxes, scores = box_utils.filter_boxes(boxes,
                                           tf.expand_dims(scores, axis=-1),
                                           rpn_min_size, height, width, scale)
    scores = tf.squeeze(scores, axis=-1)

    post_nms_topk_limit = (topk_limit if topk_limit < rpn_post_nms_topn else
                           rpn_post_nms_topn)
    # NMS.
    if rpn_nms_threshold > 0:
        scores, boxes = box_utils.sorted_non_max_suppression_padded(
            scores,
            boxes,
            max_output_size=post_nms_topk_limit,
            iou_threshold=rpn_nms_threshold)

    # Pick top-K post NMS'ed boxes.
    scores, boxes = box_utils.top_k(scores,
                                    k=post_nms_topk_limit,
                                    boxes_list=[boxes])
    boxes = boxes[0]
    return scores, boxes
Exemple #3
0
def _proposal_op_per_level(scores, boxes, anchor_boxes, image_info,
                           rpn_pre_nms_topn, rpn_post_nms_topn,
                           rpn_nms_threshold, rpn_min_size, level):
    """Proposes RoIs for the second stage nets.

  This proposal op performs the following operations.
    1. for each location i in a (H, W) grid:
         generate A anchor boxes centered on cell i
         apply predicted bbox deltas to each of the A anchors at cell i
    2. clip predicted boxes to image
    3. remove predicted boxes with either height or width < threshold
    4. sort all (proposal, score) pairs by score from highest to lowest
    5. take the top rpn_pre_nms_topn proposals before NMS
    6. apply NMS with a loose threshold (0.7) to the remaining proposals
    7. take after_nms_topN proposals after NMS
    8. return the top proposals
  Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/ops/generate_proposals.py  # pylint: disable=line-too-long

  Args:
    scores: a tensor with a shape of
      [batch_size, height, width, num_anchors].
    boxes: a tensor with a shape of
      [batch_size, height, width, num_anchors * 4], in the encoded form.
    anchor_boxes: an Anchors object that contains the anchors with a shape of
      [batch_size, height, width, num_anchors * 4].
    image_info: a tensor of shape [batch_size, 5] where the three columns
      encode the input image's [height, width, scale,
      original_height, original_width]. Height and width are for
      the input to the network, not the original image; scale is the scale
      factor used to scale the network input size to the original image size.
      See dataloader.DetectionInputProcessor for details. The last two are
      original height and width. See dataloader.DetectionInputProcessor for
      details.
    rpn_pre_nms_topn: a integer number of top scoring RPN proposals to keep
      before applying NMS. This is *per FPN level* (not total).
    rpn_post_nms_topn: a integer number of top scoring RPN proposals to keep
      after applying NMS. This is the total number of RPN proposals produced.
    rpn_nms_threshold: a float number between 0 and 1 as the NMS threshold
      used on RPN proposals.
    rpn_min_size: a integer number as the minimum proposal height and width as
      both need to be greater than this number. Note that this number is at
      origingal image scale; not scale used during training or inference).
    level: a integer number for the level that the function operates on.
  Returns:
    scores: a tensor with a shape of [batch_size, rpn_post_nms_topn, 1]
      representing the scores of the proposals. It has same dtype as input
      scores.
    boxes: a tensor with a shape of [batch_size, rpn_post_nms_topn, 4]
      represneting the boxes of the proposals. The boxes are in normalized
      coordinates with a form of [ymin, xmin, ymax, xmax]. It has same dtype as
      input boxes.

  """
    with tf.name_scope('proposal-l%d' % level):
        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take the top rpn_pre_nms_topn proposals before NMS
        batch_size, h, w, num_anchors = scores.get_shape().as_list()
        scores = tf.reshape(scores, [batch_size, -1])
        boxes = tf.reshape(boxes, [batch_size, -1, 4])
        # Map scores to [0, 1] for convenince of setting min score.
        scores = tf.sigmoid(scores)

        topk_limit = (h * w *
                      num_anchors if h * w * num_anchors < rpn_pre_nms_topn
                      else rpn_pre_nms_topn)
        anchor_boxes = tf.reshape(anchor_boxes, [batch_size, -1, 4])
        scores, boxes_list = box_utils.top_k(scores,
                                             k=topk_limit,
                                             boxes_list=[boxes, anchor_boxes])
        boxes = boxes_list[0]
        anchor_boxes = boxes_list[1]

        # Transforms anchors into proposals via bbox transformations.
        boxes = box_utils.batch_decode_box_outputs_op(anchor_boxes, boxes)

        # 2. clip proposals to image (may result in proposals with zero area
        # that will be removed in the next step)
        boxes = box_utils.clip_boxes(boxes, image_info[:, :2])

        # 3. remove predicted boxes with either height or width < min_size
        scores, boxes = box_utils.filter_boxes(scores, boxes, rpn_min_size,
                                               image_info)

        # 6. apply loose nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        post_nms_topk_limit = (topk_limit if topk_limit < rpn_post_nms_topn
                               else rpn_post_nms_topn)
        if rpn_nms_threshold > 0:
            scores, boxes = box_utils.sorted_non_max_suppression_padded(
                scores,
                boxes,
                max_output_size=post_nms_topk_limit,
                iou_threshold=rpn_nms_threshold)

        scores, boxes = box_utils.top_k(scores,
                                        k=post_nms_topk_limit,
                                        boxes_list=[boxes])
        boxes = boxes[0]
        return scores, boxes