Example #1
0
def generate_fpn_proposals(multilevel_anchor_boxes, multilevel_box_logits,
                           multilevel_label_logits, orig_image_dims,
                           batch_size):
    """
    Generating the rois from the box logits and pick K with top label scores as
    the box proposals.

    Args:
        multilevel_box_logits:      #lvl [ BS x (NA * 4) x H_feature x W_feature ] boxes
        multilevel_label_logits:    #lvl [ BS x H_feature x W_feature x NA ] tensors
        orig_image_dimensions: Original (prepadding) image dimensions (h,w,c)   BS x 3
    Returns:
        boxes: K x 5 float
        scores:  1-D, K (logits)
    """
    prefix = "model_fpn.generate_fpn_proposals"
    bug_prefix = "GEN_PROPOSALS_BUG fpn"
    num_lvl = len(cfg.FPN.ANCHOR_STRIDES)
    assert len(multilevel_label_logits) == num_lvl
    orig_images_hw = orig_image_dims[:, :2]

    training = get_current_tower_context().is_training
    all_boxes = []
    all_scores = []
    if cfg.FPN.PROPOSAL_MODE == 'Level':
        fpn_nms_topk = cfg.RPN.TRAIN_PER_LEVEL_NMS_TOPK * batch_size if training else cfg.RPN.TEST_PER_LEVEL_NMS_TOPK
        for lvl in range(num_lvl):
            with tf.name_scope(f'Lvl{lvl}'):
                im_info = tf.cast(orig_images_hw, tf.float32)

                scores = multilevel_label_logits[
                    lvl]  # BS x H_feature x W_featurex NA
                bbox_deltas = tf.transpose(
                    multilevel_box_logits[lvl],
                    [0, 2, 3, 1])  #BS x H_feature x W_feature x (NA * 4)

                single_level_anchor_boxes = multilevel_anchor_boxes[lvl]
                single_level_anchor_boxes = tf.reshape(
                    single_level_anchor_boxes, (-1, 4))

                # # This is a custom tensorflow op that translates the bbox deltas into bounding box coordinates
                # and then runs NMS. See CODEBASE.md for more info
                #
                # roi: (# boxes for a single level) x 5, the 5 colunms arranged as: batch_index, x_1, y_1, x_2, y_2
                # rois_probs: 1-D, # boxes for a single level
                rois, rois_probs = tf.generate_bounding_box_proposals(
                    scores,
                    bbox_deltas,
                    im_info,
                    single_level_anchor_boxes,
                    spatial_scale=1.0 / cfg.FPN.ANCHOR_STRIDES[lvl],
                    pre_nms_topn=fpn_nms_topk,
                    post_nms_topn=fpn_nms_topk,
                    nms_threshold=cfg.RPN.PROPOSAL_NMS_THRESH,
                    min_size=cfg.RPN.MIN_SIZE)
                # rois_probs = print_runtime_shape(f'rois_probs, lvl {lvl}', rois_probs, prefix=bug_prefix)
                all_boxes.append(rois)
                all_scores.append(rois_probs)

        proposal_boxes = tf.concat(all_boxes, axis=0)  # Num_all_rois x 5
        proposal_boxes = tf.reshape(proposal_boxes,
                                    [-1, 5])  # Num_all_rois x 5

        proposal_scores = tf.concat(all_scores, axis=0)  # 1-D Num_all_rois
        proposal_scores = tf.reshape(proposal_scores, [-1])  # 1-D Num_all_rois

        proposal_topk = tf.minimum(tf.size(proposal_scores), fpn_nms_topk)
        proposal_scores, topk_indices = tf.nn.top_k(proposal_scores,
                                                    k=proposal_topk,
                                                    sorted=False)
        proposal_boxes = tf.gather(proposal_boxes, topk_indices)  # K x 5

    else:
        raise RuntimeError(
            "Only level-wise predictions are supported with batches")

    return tf.stop_gradient(proposal_boxes, name='boxes'), \
        tf.stop_gradient(proposal_scores, name='scores')
Example #2
0
def generate_fpn_proposals_topk_per_image(multilevel_anchor_boxes,
                                          multilevel_box_logits,
                                          multilevel_label_logits,
                                          orig_image_dims, batch_size):
    """
    Args:
        multilevel_box_logits:      #lvl [ BS x (NAx4) x H x W ] boxes
        multilevel_label_logits:    #lvl [ BS x H x W x A ] tensors
        orig_image_dimensions: Original (prepadding) image dimensions (h,w,c)   BS x 3
    Returns:
        boxes: K x 5 float
        scores:  (#lvl x BS x K) vector       (logits)
    """

    num_lvl = len(cfg.FPN.ANCHOR_STRIDES)
    assert len(multilevel_label_logits) == num_lvl
    orig_images_hw = orig_image_dims[:, :2]

    training = get_current_tower_context().is_training
    all_boxes = []
    all_scores = []
    if cfg.FPN.PROPOSAL_MODE == 'Level':
        fpn_nms_topk = cfg.RPN.TRAIN_PER_LEVEL_NMS_TOPK if training else cfg.RPN.TEST_PER_LEVEL_NMS_TOPK
        boxes_list = []
        scores_list = []

        bs = batch_size if training else 1

        for i in range(bs):
            all_boxes = []
            all_scores = []
            for lvl in range(num_lvl):
                with tf.name_scope(f'Lvl{lvl}'):
                    im_info = tf.cast(orig_images_hw[i:(i + 1)], tf.float32)
                    # h, w

                    scores = multilevel_label_logits[lvl][i:(i + 1)]
                    bbox_deltas = tf.transpose(
                        multilevel_box_logits[lvl][i:(i + 1)], [0, 2, 3, 1])

                    single_level_anchor_boxes = multilevel_anchor_boxes[lvl]
                    single_level_anchor_boxes = tf.reshape(
                        single_level_anchor_boxes, (-1, 4))

                    # https://caffe2.ai/docs/operators-catalogue.html#generateproposals
                    rois, rois_probs = tf.generate_bounding_box_proposals(
                        scores,
                        bbox_deltas,
                        im_info,
                        single_level_anchor_boxes,
                        spatial_scale=1.0 / cfg.FPN.ANCHOR_STRIDES[lvl],
                        pre_nms_topn=fpn_nms_topk,
                        post_nms_topn=fpn_nms_topk,
                        nms_threshold=cfg.RPN.PROPOSAL_NMS_THRESH,
                        min_size=cfg.RPN.MIN_SIZE)

                    # rois_probs = print_runtime_shape(f'rois_probs, lvl {lvl}', rois_probs, prefix=bug_prefix)
                    all_boxes.append(
                        tf.concat((i + rois[:, :1], rois[:, 1:]), axis=1))
                    all_scores.append(rois_probs)

            proposal_boxes = tf.concat(all_boxes,
                                       axis=0)  # (#lvl x BS) x K x 5
            proposal_boxes = tf.reshape(proposal_boxes,
                                        [-1, 5])  # (#lvl x BS x K) x 5

            proposal_scores = tf.concat(all_scores, axis=0)  # (#lvl x BS) x K
            proposal_scores = tf.reshape(proposal_scores,
                                         [-1])  # (#lvl x BS x 5) vector

            topk = tf.minimum(tf.size(proposal_scores), fpn_nms_topk)
            topk_scores, topk_indices = tf.nn.top_k(proposal_scores,
                                                    k=topk,
                                                    sorted=False)

            boxes_list.append(tf.gather(proposal_boxes, topk_indices))
            scores_list.append(tf.gather(proposal_scores, topk_indices))

        #
        #        boxes_list = []
        #        scores_list = []
        #
        #        for i in range(batch_size):
        #            batch_ind = tf.squeeze(tf.where(tf.equal(proposal_boxes[:, 0], i)), axis=1)
        #            image_scores = tf.gather(proposal_scores, batch_ind)
        #            image_boxes = tf.gather(proposal_boxes, batch_ind)
        #
        #            image_proposal_topk = tf.minimum(tf.size(image_scores), fpn_nms_topk//batch_size)
        #            image_proposal_scores, image_topk_indices = tf.nn.top_k(image_scores, k=image_proposal_topk, sorted=False)
        #            boxes_list.append(tf.gather(image_boxes, image_topk_indices))
        #            scores_list.append(image_proposal_scores)

        boxes = tf.concat(boxes_list, axis=0)
        scores = tf.concat(scores_list, axis=0)

        #        proposal_topk = tf.minimum(tf.size(proposal_scores), fpn_nms_topk)
    #        proposal_scores, topk_indices = tf.nn.top_k(proposal_scores, k=proposal_topk, sorted=False)
    #        proposal_boxes = tf.gather(proposal_boxes, topk_indices)

    else:
        raise RuntimeError(
            "Only level-wise predictions are supported with batches")

    return tf.stop_gradient(boxes, name='boxes'), \
        tf.stop_gradient(scores, name='scores')
def custom_multilevel_propose_rois(scores_outputs, box_outputs, all_anchors,
                                   image_info, rpn_pre_nms_topn,
                                   rpn_post_nms_topn, rpn_nms_threshold,
                                   rpn_min_size):
    """Proposes RoIs for the second stage nets.

    This proposal op performs the following operations.
    1. propose rois at each level.
    2. collect all proposals.
    3. keep rpn_post_nms_topn proposals by their sorted scores from the highest
       to the lowest.

    Reference:
    https://github.com/facebookresearch/Detectron/blob/master/detectron/ops/collect_and_distribute_fpn_rpn_proposals.py

    Args:
    scores_outputs: an OrderDict with keys representing levels and values
      representing logits in [batch_size, height, width, num_anchors].
    box_outputs: an OrderDict with keys representing levels and values
      representing box regression targets in
      [batch_size, height, width, num_anchors * 4]
    all_anchors: an Anchors object that contains the all anchors.
    image_info: a tensor of shape [batch_size, 5] where the three columns
      encode the input image's [height, width, scale,
      original_height, original_width]. Height and width are for
      the input to the network, not the original image; scale is the scale
      factor used to scale the network input size to the original image size.
      See dataloader.DetectionInputProcessor for details. The last two are
      original height and width. See dataloader.DetectionInputProcessor for
      details.
    rpn_pre_nms_topn: a integer number of top scoring RPN proposals to keep
      before applying NMS. This is *per FPN level* (not total).
    rpn_post_nms_topn: a integer number of top scoring RPN proposals to keep
      after applying NMS. This is the total number of RPN proposals produced.
    rpn_nms_threshold: a float number between 0 and 1 as the NMS threshold
      used on RPN proposals.
    rpn_min_size: a integer number as the minimum proposal height and width as
      both need to be greater than this number. Note that this number is at
      origingal image scale; not scale used during training or inference).
    Returns:
    scores: a tensor with a shape of [batch_size, rpn_post_nms_topn, 1]
      representing the scores of the proposals.
    rois: a tensor with a shape of [batch_size, rpn_post_nms_topn, 4]
      representing the boxes of the proposals. The boxes are in normalized
      coordinates with a form of [ymin, xmin, ymax, xmax].
    """

    with tf.name_scope('proposal'):
        levels = scores_outputs.keys()
        scores = []
        rois = []
        anchor_boxes = all_anchors.get_unpacked_boxes()
        for level in levels:
            # Expands the batch dimension for anchors as anchors do not have batch
            # dimension. Note that batch_size is invariant across levels.
            # batch_size = scores_outputs[level].shape[0]
            # anchor_boxes_batch = tf.cast(
            #   tf.tile(tf.expand_dims(anchor_boxes[level], axis=0),
            #         [batch_size, 1, 1, 1]),
            #   dtype=scores_outputs[level].dtype)
            logging.debug(
                "[ROI OPs] Using GenerateBoxProposals op... Scope: proposal_%s"
                % level)

            boxes_per_level, scores_per_level = tf.generate_bounding_box_proposals(
                scores=tf.reshape(tf.sigmoid(scores_outputs[level]),
                                  scores_outputs[level].shape),
                bbox_deltas=box_outputs[level],
                image_info=image_info,
                anchors=anchor_boxes[level],
                pre_nms_topn=rpn_pre_nms_topn,
                post_nms_topn=rpn_post_nms_topn,
                nms_threshold=rpn_nms_threshold,
                min_size=rpn_min_size,
                name="proposal_%s" % level)

            scores.append(scores_per_level)
            rois.append(boxes_per_level)

            # a,b=_proposal_op_per_level(
            #     scores_outputs[level], box_outputs[level], anchor_boxes_batch,
            #     image_info, rpn_pre_nms_topn, rpn_post_nms_topn, rpn_nms_threshold,
            #     rpn_min_size, level)
            # print("SAMI Orig,",a,b,"ours=",scores_per_level,boxes_per_level,rpn_min_size,anchor_boxes)
        scores = tf.concat(scores, axis=1)
        rois = tf.concat(rois, axis=1)

        with tf.name_scope('post_nms_topk'):
            # Selects the top-k rois, k being rpn_post_nms_topn or the number of total
            # anchors after non-max suppression.
            post_nms_num_anchors = scores.shape[1]

            post_nms_topk_limit = (post_nms_num_anchors
                                   if post_nms_num_anchors < rpn_post_nms_topn
                                   else rpn_post_nms_topn)

            top_k_scores, top_k_rois = box_utils.top_k(scores,
                                                       k=post_nms_topk_limit,
                                                       boxes_list=[rois])
            top_k_rois = top_k_rois[0]

        top_k_scores = tf.stop_gradient(top_k_scores)
        top_k_rois = tf.stop_gradient(top_k_rois)

        return top_k_scores, top_k_rois