def test_ones():
    assertTensorClose(
        mge.ones((2, 2), dtype=np.int32).numpy(),
        np.ones((2, 2), dtype=np.int32))

    assertTensorClose(
        mge.ones(mge.tensor([2, 2], dtype=np.int32), dtype=np.int32).numpy(),
        np.ones((2, 2), dtype=np.int32),
    )
Beispiel #2
0
def roi_pool(rpn_fms, rois, stride, pool_shape, roi_type='roi_align', 
             labels=None, bbox_targets=None):
    assert len(stride) == len(rpn_fms)
    canonical_level = 4
    canonical_box_size = 224
    min_level = math.log2(stride[0])
    max_level = math.log2(stride[-1])

    num_fms = len(rpn_fms)
    box_sizes = F.sqrt((rois[:, 3] - rois[:, 1]) * (rois[:, 4] - rois[:, 2]))
    level_assignments = F.floor(
	canonical_level + F.log(box_sizes / canonical_box_size) / np.log(2)
    )
    level_assignments = F.minimum(level_assignments, max_level)
    level_assignments = F.maximum(level_assignments, min_level)
    level_assignments = level_assignments - min_level
    available_masks = F.concat(
        [mge.ones(level_assignments.shapeof()[0]), mge.zeros(num_fms)], axis=0)
    level_assignments = F.concat([level_assignments, mge.tensor(np.arange(num_fms, dtype=np.int32))], axis=0)
    rois = F.concat([rois, mge.zeros((num_fms, rois.shapeof()[-1]))], axis=0)
    if labels is not None:
        labels = F.concat([labels, mge.ones((num_fms, labels.shapeof()[-1]))], axis=0)
        bbox_targets = F.concat([bbox_targets, mge.zeros((num_fms, bbox_targets.shapeof()[-1]))], axis=0)
    pool_list, inds_list = [], []
    for i in range(len(rpn_fms)):
        mask = level_assignments == i
        inds = mask_to_inds(mask)
        rois_fm = rois.ai[inds]
        if roi_type == 'roi_pool':
            pool_fm = F.roi_pooling(
                    rpn_fms[i], rois_fm, pool_shape, mode='max', scale=1.0/stride[i])
        elif roi_type == 'roi_align':
            pool_fm = F.roi_align(
                    rpn_fms[i], rois_fm, pool_shape, mode='average', 
                    spatial_scale=1.0/stride[i], sample_points=2, aligned=True)
        pool_list.append(pool_fm)
        inds_list.append(inds)

    fm_order = F.concat(inds_list, axis=0)
    pool_feature = F.concat(pool_list, axis=0)

    ordered_available_masks = available_masks.ai[fm_order]
    available_inds = mask_to_inds(ordered_available_masks)
    pool_feature = pool_feature.ai[available_inds]
    rois = rois.ai[fm_order, :].ai[available_inds, :]
    if labels is not None:
        labels = labels.ai[fm_order].ai[available_inds]
        bbox_targets = bbox_targets.ai[fm_order, :].ai[available_inds, :]
        return pool_feature, rois, F.zero_grad(labels), F.zero_grad(bbox_targets)
    else:
        return pool_feature, rois, None, None
Beispiel #3
0
def test_dropout_dynamic_same_result():
    x = mge.ones(10)
    R.manual_seed(0)
    a = F.dropout(x, 0.5)
    R.manual_seed(0)
    b = F.dropout(x, 0.5)
    assert np.all(a.numpy() == b.numpy())
Beispiel #4
0
def make_grid(
        tensor: megengine.Tensor,  # [N,C,H,W]
        nrow: int = 8,
        padding: int = 2,
        background: float = 0,
        normalize: bool = False,
) -> megengine.Tensor:
    """align [N, C, H, W] image tensor to [H, W, 3] image grids, for visualization"""
    if normalize:
        tensor = normalize_image(tensor, scale=255)  # normalize to 0-255 scale

    c = tensor.shape[1]
    assert c in (1, 3), "only support color/grayscale images, got channel = {}".format(c)
    nmaps = tensor.shape[0]
    xmaps = min(nrow, nmaps)
    ymaps = int(math.ceil(float(nmaps) / xmaps))
    height, width = int(tensor.shape[2] + padding), int(tensor.shape[3] + padding)
    num_channels = tensor.shape[1]
    grid = megengine.ones((num_channels, height * ymaps + padding, width * xmaps + padding), "float32") * background
    k = 0
    for y in range(ymaps):
        for x in range(xmaps):
            if k >= nmaps:
                break
            grid = grid.set_subtensor(tensor[k])[:,
                                                 y * height + padding: (y + 1) * height,
                                                 x * width + padding: (x + 1) * width]
            k = k + 1
    c, h, w = grid.shape
    grid = grid.dimshuffle(1, 2, 0)  # [C,H,W] -> [H,W,C]
    grid = grid.broadcast(h, w, 3)   # [H,W,C] -> [H,W,3]
    return grid
def fpn_anchor_target_opr_core_impl(gt_boxes,
                                    im_info,
                                    anchors,
                                    allow_low_quality_matches=True):
    ignore_label = config.ignore_label
    # get the gt boxes
    valid_gt_boxes = gt_boxes[:im_info[5], :]
    non_ignore_mask = valid_gt_boxes[:, -1] > 0
    non_ignore_inds = mask_to_inds(non_ignore_mask)
    valid_gt_boxes = valid_gt_boxes.ai[non_ignore_inds]
    # compute the iou matrix
    overlaps = box_overlap_opr(anchors, valid_gt_boxes[:, :4])
    # match the dtboxes
    a_shp0 = anchors.shape[0]
    max_overlaps = F.max(overlaps, axis=1)
    argmax_overlaps = F.argmax(overlaps, axis=1)
    # all ignore
    labels = mge.ones(a_shp0).astype(np.int32) * ignore_label
    # set negative ones
    labels = labels * (max_overlaps >= config.rpn_negative_overlap)
    # set positive ones
    fg_mask = (max_overlaps >= config.rpn_positive_overlap)
    const_one = mge.tensor(1.0)
    if allow_low_quality_matches:
        # match the max gt
        gt_max_overlaps = F.max(overlaps, axis=0)
        gt_argmax_overlaps = F.argmax(overlaps, axis=0)
        g_shp0 = valid_gt_boxes.shapeof()[0]
        gt_id = F.linspace(0, g_shp0 - 1, g_shp0).astype(np.int32)
        argmax_overlaps = argmax_overlaps.set_ai(gt_id)[gt_argmax_overlaps]
        max_overlaps = max_overlaps.set_ai(
            const_one.broadcast(g_shp0))[gt_argmax_overlaps]
        fg_mask = (max_overlaps >= config.rpn_positive_overlap)
    # set positive ones
    fg_mask_ind = mask_to_inds(fg_mask)
    labels = labels.set_ai(const_one.broadcast(
        fg_mask_ind.shapeof()))[fg_mask_ind]
    # compute the targets
    bbox_targets = bbox_transform_opr(anchors,
                                      valid_gt_boxes.ai[argmax_overlaps, :4])
    if config.rpn_bbox_normalize_targets:
        std_opr = mge.tensor(config.bbox_normalize_stds[None, :])
        mean_opr = mge.tensor(config.bbox_normalize_means[None, :])
        minus_opr = mean_opr / std_opr
        bbox_targets = bbox_targets / std_opr - minus_opr
    return labels, bbox_targets
Beispiel #6
0
 def per_level_gt(self,
                  gt_boxes,
                  im_info,
                  anchors,
                  allow_low_quality_matches=True):
     ignore_label = self.cfg.ignore_label
     # get the gt boxes
     valid_gt_boxes = gt_boxes[:im_info[4], :]
     # compute the iou matrix
     overlaps = layers.get_iou(anchors, valid_gt_boxes[:, :4])
     # match the dtboxes
     a_shp0 = anchors.shape[0]
     max_overlaps = F.max(overlaps, axis=1)
     argmax_overlaps = F.argmax(overlaps, axis=1)
     # all ignore
     labels = mge.ones(a_shp0).astype("int32") * ignore_label
     # set negative ones
     labels = labels * (max_overlaps >= self.cfg.rpn_negative_overlap)
     # set positive ones
     fg_mask = max_overlaps >= self.cfg.rpn_positive_overlap
     const_one = mge.tensor(1.0)
     if allow_low_quality_matches:
         # make sure that max iou of gt matched
         gt_argmax_overlaps = F.argmax(overlaps, axis=0)
         num_valid_boxes = valid_gt_boxes.shapeof(0)
         gt_id = F.linspace(0, num_valid_boxes - 1,
                            num_valid_boxes).astype("int32")
         argmax_overlaps = argmax_overlaps.set_ai(gt_id)[gt_argmax_overlaps]
         max_overlaps = max_overlaps.set_ai(
             const_one.broadcast(num_valid_boxes))[gt_argmax_overlaps]
         fg_mask = max_overlaps >= self.cfg.rpn_positive_overlap
     # set positive ones
     _, fg_mask_ind = F.cond_take(fg_mask == 1, fg_mask)
     labels = labels.set_ai(const_one.broadcast(
         fg_mask_ind.shapeof(0)))[fg_mask_ind]
     # compute the targets
     bbox_targets = self.box_coder.encode(
         anchors, valid_gt_boxes.ai[argmax_overlaps, :4])
     return labels, bbox_targets
Beispiel #7
0
def get_padded_tensor(
    array: Tensor, multiple_number: int = 32, pad_value: float = 0
) -> Tensor:
    """ pad the nd-array to multiple stride of th e

    Args:
        array (Tensor):
            the tensor with the shape of [batch, channel, height, width]
        multiple_number (int):
            make the height and width can be divided by multiple_number
        pad_value (int): the value to be padded

    Returns:
        padded_array (Tensor)
    """
    batch, chl, t_height, t_width = array.shape
    padded_height = (
        (t_height + multiple_number - 1) // multiple_number * multiple_number
    )
    padded_width = (t_width + multiple_number - 1) // multiple_number * multiple_number

    padded_array = (
        mge.ones(
            F.concat([batch, chl, padded_height, padded_width], axis=0),
            dtype=np.float32,
        )
        * pad_value
    )

    ndim = array.ndim
    if ndim == 4:
        padded_array = padded_array.set_subtensor(array)[:, :, :t_height, :t_width]
    elif ndim == 3:
        padded_array = padded_array.set_subtensor(array)[:, :t_height, :t_width]
    else:
        raise Exception("Not supported tensor dim: %d" % ndim)
    return padded_array
Beispiel #8
0
    def find_top_rpn_proposals(
        self, rpn_bbox_offsets_list, rpn_cls_prob_list,
        all_anchors_list, im_info
    ):
        prev_nms_top_n = self.cfg.train_prev_nms_top_n \
            if self.training else self.cfg.test_prev_nms_top_n
        post_nms_top_n = self.cfg.train_post_nms_top_n \
            if self.training else self.cfg.test_post_nms_top_n

        batch_per_gpu = self.cfg.batch_per_gpu if self.training else 1
        nms_threshold = self.cfg.rpn_nms_threshold

        list_size = len(rpn_bbox_offsets_list)

        return_rois = []

        for bid in range(batch_per_gpu):
            batch_proposals_list = []
            batch_probs_list = []
            batch_level_list = []
            for l in range(list_size):
                # get proposals and probs
                offsets = rpn_bbox_offsets_list[l][bid].dimshuffle(2, 3, 0, 1).reshape(-1, 4)
                all_anchors = all_anchors_list[l]
                proposals = self.box_coder.decode(all_anchors, offsets)

                probs = rpn_cls_prob_list[l][bid, 1].dimshuffle(1, 2, 0).reshape(1, -1)
                # prev nms top n
                probs, order = F.argsort(probs, descending=True)
                num_proposals = F.minimum(probs.shapeof(1), prev_nms_top_n)
                probs = probs.reshape(-1)[:num_proposals]
                order = order.reshape(-1)[:num_proposals]
                proposals = proposals.ai[order, :]

                batch_proposals_list.append(proposals)
                batch_probs_list.append(probs)
                batch_level_list.append(mge.ones(probs.shapeof(0)) * l)

            proposals = F.concat(batch_proposals_list, axis=0)
            scores = F.concat(batch_probs_list, axis=0)
            level = F.concat(batch_level_list, axis=0)

            proposals = layers.get_clipped_box(proposals, im_info[bid, :])
            # filter empty
            keep_mask = layers.filter_boxes(proposals)
            _, keep_inds = F.cond_take(keep_mask == 1, keep_mask)
            proposals = proposals.ai[keep_inds, :]
            scores = scores.ai[keep_inds]
            level = level.ai[keep_inds]

            # gather the proposals and probs
            # sort nms by scores
            scores, order = F.argsort(scores.reshape(1, -1), descending=True)
            order = order.reshape(-1)
            proposals = proposals.ai[order, :]
            level = level.ai[order]

            # apply total level nms
            rois = F.concat([proposals, scores.reshape(-1, 1)], axis=1)
            keep_inds = batched_nms(proposals, scores, level, nms_threshold, post_nms_top_n)
            rois = rois.ai[keep_inds]

            # rois shape (N, 5), info [batch_id, x1, y1, x2, y2]
            batch_inds = mge.ones((rois.shapeof(0), 1)) * bid
            batch_rois = F.concat([batch_inds, rois[:, :4]], axis=1)
            return_rois.append(batch_rois)

        return F.zero_grad(F.concat(return_rois, axis=0))
Beispiel #9
0
def get_focal_loss(
    score: Tensor,
    label: Tensor,
    ignore_label: int = -1,
    background: int = 0,
    alpha: float = 0.5,
    gamma: float = 0,
    norm_type: str = "fg",
) -> Tensor:
    r"""Focal Loss for Dense Object Detection:
    <https://arxiv.org/pdf/1708.02002.pdf>

    .. math::

        FL(p_t) = -\alpha_t(1-p_t)^\gamma \log(p_t)

    Args:
        score (Tensor):
            the predicted score with the shape of :math:`(B, A, C)`
        label (Tensor):
            the assigned label of boxes with shape of :math:`(B, A)`
        ignore_label (int):
            the value of ignore class. Default: -1
        background (int):
            the value of background class. Default: 0
        alpha (float):
            parameter to mitigate class imbalance. Default: 0.5
        gamma (float):
            parameter to mitigate easy/hard loss imbalance. Default: 0
        norm_type (str): current support 'fg', 'none':
            'fg': loss will be normalized by number of fore-ground samples
            'none": not norm

    Returns:
        the calculated focal loss.
    """
    mask = 1 - (label == ignore_label)
    valid_label = label * mask

    score_shp = score.shape
    zero_mat = mge.zeros(
        F.concat([score_shp[0], score_shp[1], score_shp[2] + 1], axis=0),
        dtype=np.float32,
    )
    one_mat = mge.ones(
        F.concat([score_shp[0], score_shp[1], tensor(1)], axis=0), dtype=np.float32,
    )

    one_hot = basic.indexing_set_one_hot(
        zero_mat, 2, valid_label.astype(np.int32), one_mat
    )[:, :, 1:]
    pos_part = F.power(1 - score, gamma) * one_hot * F.log(score)
    neg_part = F.power(score, gamma) * (1 - one_hot) * F.log(1 - score)
    loss = -(alpha * pos_part + (1 - alpha) * neg_part).sum(axis=2) * mask

    if norm_type == "fg":
        positive_mask = label > background
        return loss.sum() / F.maximum(positive_mask.sum(), 1)
    elif norm_type == "none":
        return loss.sum()
    else:
        raise NotImplementedError
Beispiel #10
0
def test_dropout_dynamic_diff_result():
    x = mge.ones(10)
    a = F.dropout(x, 0.5)
    b = F.dropout(x, 0.5)
    assert np.any(a.numpy() != b.numpy())
Beispiel #11
0
def ones_like(inp):
    return mge.ones(inp.shape, dtype=inp.dtype)
Beispiel #12
0
def cascade_roi_target(rpn_rois, im_info, gt_boxes, pos_threshold=0.5, top_k=1):
    return_rois = []
    return_labels = []
    return_bbox_targets = []
    # get per image proposals and gt_boxes
    for bid in range(config.batch_per_gpu):
        gt_boxes_perimg = gt_boxes[bid, :im_info[bid, 5], :]
        batch_inds = mge.ones((gt_boxes_perimg.shapeof()[0], 1)) * bid
        #if config.proposal_append_gt:
        gt_rois = F.concat([batch_inds, gt_boxes_perimg[:, :4]], axis=1)
        batch_roi_mask = rpn_rois[:, 0] == bid
        batch_roi_inds = mask_to_inds(batch_roi_mask)
        all_rois = F.concat([rpn_rois.ai[batch_roi_inds], gt_rois], axis=0)
        overlaps_normal, overlaps_ignore = box_overlap_ignore_opr(
                all_rois[:, 1:5], gt_boxes_perimg)
        overlaps_normal, overlaps_normal_indices = F.argsort(overlaps_normal, descending=True)
        overlaps_ignore, overlaps_ignore_indices = F.argsort(overlaps_ignore, descending=True)
        # gt max and indices, ignore max and indices
        max_overlaps_normal = overlaps_normal[:, :top_k].reshape(-1)
        gt_assignment_normal = overlaps_normal_indices[:, :top_k].reshape(-1)
        max_overlaps_ignore = overlaps_ignore[:, :top_k].reshape(-1)
        gt_assignment_ignore = overlaps_ignore_indices[:, :top_k].reshape(-1)
        # cons masks
        ignore_assign_mask = (max_overlaps_normal < config.fg_threshold) * (
                max_overlaps_ignore > max_overlaps_normal)
        max_overlaps = max_overlaps_normal * (1 - ignore_assign_mask) + \
                max_overlaps_ignore * ignore_assign_mask
        gt_assignment = gt_assignment_normal * (1- ignore_assign_mask) + \
                gt_assignment_ignore * ignore_assign_mask
        gt_assignment = gt_assignment.astype(np.int32)
        labels = gt_boxes_perimg.ai[gt_assignment, 4]
        fg_mask = (max_overlaps >= config.fg_threshold) * (1 - F.equal(labels, config.ignore_label))
        bg_mask = (max_overlaps < config.bg_threshold_high) * (
                max_overlaps >= config.bg_threshold_low)
        fg_mask = fg_mask.reshape(-1, top_k)
        bg_mask = bg_mask.reshape(-1, top_k)
        #pos_max = config.num_rois * config.fg_ratio
        #fg_inds_mask = _bernoulli_sample_masks(fg_mask[:, 0], pos_max, 1)
        #neg_max = config.num_rois - fg_inds_mask.sum()
        #bg_inds_mask = _bernoulli_sample_masks(bg_mask[:, 0], neg_max, 1)
        labels = labels * fg_mask.reshape(-1)
        #keep_mask = fg_inds_mask + bg_inds_mask
        #keep_inds = mask_to_inds(keep_mask)
        #keep_inds = keep_inds[:F.minimum(config.num_rois, keep_inds.shapeof()[0])]
        # labels
        labels = labels.reshape(-1, top_k)
        gt_assignment = gt_assignment.reshape(-1, top_k).reshape(-1)
        target_boxes = gt_boxes_perimg.ai[gt_assignment, :4]
        #rois = all_rois.ai[keep_inds]
        target_shape = (all_rois.shapeof()[0], top_k, all_rois.shapeof()[-1])
        target_rois = F.add_axis(all_rois, 1).broadcast(target_shape).reshape(-1, all_rois.shapeof()[-1])
        bbox_targets = bbox_transform_opr(target_rois[:, 1:5], target_boxes)
        if config.rcnn_bbox_normalize_targets:
            std_opr = mge.tensor(config.bbox_normalize_stds[None, :])
            mean_opr = mge.tensor(config.bbox_normalize_means[None, :])
            minus_opr = mean_opr / std_opr
            bbox_targets = bbox_targets / std_opr - minus_opr
        bbox_targets = bbox_targets.reshape(-1, top_k * 4)
        return_rois.append(all_rois)
        return_labels.append(labels)
        return_bbox_targets.append(bbox_targets)
    if config.batch_per_gpu == 1:
        return F.zero_grad(all_rois), F.zero_grad(labels), F.zero_grad(bbox_targets)
    else:
        return_rois = F.concat(return_rois, axis=0)
        return_labels = F.concat(return_labels, axis=0)
        return_bbox_targets = F.concat(return_bbox_targets, axis=0)
        return F.zero_grad(return_rois), F.zero_grad(return_labels), F.zero_grad(return_bbox_targets)
Beispiel #13
0
    def get_ground_truth(self, rpn_rois, im_info, gt_boxes):
        if not self.training:
            return rpn_rois, None, None

        return_rois = []
        return_labels = []
        return_bbox_targets = []

        # get per image proposals and gt_boxes
        for bid in range(self.cfg.batch_per_gpu):
            num_valid_boxes = im_info[bid, 4]
            gt_boxes_per_img = gt_boxes[bid, :num_valid_boxes, :]
            batch_inds = mge.ones((gt_boxes_per_img.shapeof(0), 1)) * bid
            # if config.proposal_append_gt:
            gt_rois = F.concat([batch_inds, gt_boxes_per_img[:, :4]], axis=1)
            batch_roi_mask = rpn_rois[:, 0] == bid
            _, batch_roi_inds = F.cond_take(batch_roi_mask == 1, batch_roi_mask)
            # all_rois : [batch_id, x1, y1, x2, y2]
            all_rois = F.concat([rpn_rois.ai[batch_roi_inds], gt_rois])

            overlaps_normal, overlaps_ignore = layers.get_iou(
                all_rois[:, 1:5], gt_boxes_per_img, return_ignore=True,
            )

            max_overlaps_normal = overlaps_normal.max(axis=1)
            gt_assignment_normal = F.argmax(overlaps_normal, axis=1)

            max_overlaps_ignore = overlaps_ignore.max(axis=1)
            gt_assignment_ignore = F.argmax(overlaps_ignore, axis=1)

            ignore_assign_mask = (max_overlaps_normal < self.cfg.fg_threshold) * (
                max_overlaps_ignore > max_overlaps_normal
            )
            max_overlaps = (
                max_overlaps_normal * (1 - ignore_assign_mask)
                + max_overlaps_ignore * ignore_assign_mask
            )
            gt_assignment = (
                gt_assignment_normal * (1 - ignore_assign_mask)
                + gt_assignment_ignore * ignore_assign_mask
            )
            gt_assignment = gt_assignment.astype("int32")
            labels = gt_boxes_per_img.ai[gt_assignment, 4]

            # ---------------- get the fg/bg labels for each roi ---------------#
            fg_mask = (max_overlaps >= self.cfg.fg_threshold) * (
                labels != self.cfg.ignore_label
            )
            bg_mask = (max_overlaps < self.cfg.bg_threshold_high) * (
                max_overlaps >= self.cfg.bg_threshold_low
            )

            num_fg_rois = self.cfg.num_rois * self.cfg.fg_ratio

            fg_inds_mask = self._bernoulli_sample_masks(fg_mask, num_fg_rois, 1)
            num_bg_rois = self.cfg.num_rois - fg_inds_mask.sum()
            bg_inds_mask = self._bernoulli_sample_masks(bg_mask, num_bg_rois, 1)

            labels = labels * fg_inds_mask

            keep_mask = fg_inds_mask + bg_inds_mask
            _, keep_inds = F.cond_take(keep_mask == 1, keep_mask)
            # Add next line to avoid memory exceed
            keep_inds = keep_inds[: F.minimum(self.cfg.num_rois, keep_inds.shapeof(0))]
            # labels
            labels = labels.ai[keep_inds].astype("int32")
            rois = all_rois.ai[keep_inds]
            target_boxes = gt_boxes_per_img.ai[gt_assignment.ai[keep_inds], :4]
            bbox_targets = self.box_coder.encode(rois[:, 1:5], target_boxes)
            bbox_targets = bbox_targets.reshape(-1, 4)

            return_rois.append(rois)
            return_labels.append(labels)
            return_bbox_targets.append(bbox_targets)

        return (
            F.zero_grad(F.concat(return_rois, axis=0)),
            F.zero_grad(F.concat(return_labels, axis=0)),
            F.zero_grad(F.concat(return_bbox_targets, axis=0)),
        )
def find_top_rpn_proposals(is_train, rpn_bbox_offsets_list, rpn_cls_prob_list,
                           all_anchors_list, im_info):
    prev_nms_top_n = config.train_prev_nms_top_n \
        if is_train else config.test_prev_nms_top_n
    post_nms_top_n = config.train_post_nms_top_n \
        if is_train else config.test_post_nms_top_n
    batch_per_gpu = config.batch_per_gpu if is_train else 1
    nms_threshold = config.rpn_nms_threshold
    box_min_size = config.rpn_min_box_size
    bbox_normalize_targets = config.rpn_bbox_normalize_targets
    bbox_normalize_means = config.bbox_normalize_means
    bbox_normalize_stds = config.bbox_normalize_stds

    list_size = len(rpn_bbox_offsets_list)

    return_rois = []
    return_probs = []
    for bid in range(batch_per_gpu):
        batch_proposals_list = []
        batch_probs_list = []
        for l in range(list_size):
            # get proposals and probs
            offsets = rpn_bbox_offsets_list[l][bid] \
                .dimshuffle(1, 2, 0).reshape(-1, 4)
            if bbox_normalize_targets:
                std_opr = tensor(config.bbox_normalize_stds[None, :])
                mean_opr = tensor(config.bbox_normalize_means[None, :])
                pred_offsets = pred_offsets * std_opr
                pred_offsets = pred_offsets + mean_opr
            all_anchors = all_anchors_list[l]
            proposals = bbox_transform_inv_opr(all_anchors, offsets)
            if config.anchor_within_border:
                proposals = clip_boxes_opr(proposals, im_info[bid, :])
            probs = rpn_cls_prob_list[l][bid] \
                    .dimshuffle(1,2,0).reshape(-1, 2)
            probs = F.softmax(probs)[:, 1]
            # gather the proposals and probs
            batch_proposals_list.append(proposals)
            batch_probs_list.append(probs)
        batch_proposals = F.concat(batch_proposals_list, axis=0)
        batch_probs = F.concat(batch_probs_list, axis=0)
        # filter the zero boxes.
        batch_keep_mask = filter_boxes_opr(batch_proposals,
                                           box_min_size * im_info[bid, 2])
        batch_probs = batch_probs * batch_keep_mask
        # prev_nms_top_n
        num_proposals = F.minimum(prev_nms_top_n, batch_probs.shapeof()[0])
        batch_probs, idx = F.argsort(batch_probs, descending=True)
        batch_probs = batch_probs[:num_proposals].reshape(-1, 1)
        topk_idx = idx[:num_proposals].reshape(-1)
        batch_proposals = batch_proposals.ai[topk_idx]
        batch_rois = F.concat([batch_proposals, batch_probs], axis=1)
        # For each image, run a total-level NMS, and choose topk results.
        keep_inds = gpu_nms(batch_rois, nms_threshold, post_nms_top_n)
        batch_rois = batch_rois.ai[keep_inds]
        batch_probs = batch_rois[:, -1]
        # cons the rois
        batch_inds = mge.ones((batch_rois.shapeof()[0], 1)) * bid
        batch_rois = F.concat([batch_inds, batch_rois[:, :-1]], axis=1)
        return_rois.append(batch_rois)
        return_probs.append(batch_probs)

    if batch_per_gpu == 1:
        return batch_rois, batch_probs
    else:
        concated_rois = F.concat(return_rois, axis=0)
        concated_probs = F.concat(return_probs, axis=0)
        return concated_rois, concated_probs