def _im_detect(self, image):
        """Taken from
        https://github.com/daijifeng001/MNC/blob/master/tools/demo.py.
        Somehow combines different stages of the network. No idea how it works.

        :param image: An image (numpy array) of shape (height, width, 3).
        :return: A tuple of three numpy arrays, the n_proposals x n_classes
            scores, the corresponding n_proposals x 4 bounding boxes, where
            each bounding box is defined as <xul, yul, xlr, ylr> and the
            n_proposals x 1 x 21 x 21 segmentation masks.
        """
        forward_kwargs, im_scales = self._prepare_mnc_args(image)
        blobs_out = self._net.forward(**forward_kwargs)
        # output we need to collect:
        # 1. output from phase1'
        rois_phase1 = self._net.blobs['rois'].data.copy()
        masks_phase1 = self._net.blobs['mask_proposal'].data[...]
        scores_phase1 = self._net.blobs['seg_cls_prob'].data[...]
        # 2. output from phase2
        rois_phase2 = self._net.blobs['rois_ext'].data[...]
        masks_phase2 = self._net.blobs['mask_proposal_ext'].data[...]
        scores_phase2 = self._net.blobs['seg_cls_prob_ext'].data[...]
        # Boxes are in resized space, we un-scale them back
        rois_phase1 = rois_phase1[:, 1:5] / im_scales[0]
        rois_phase2 = rois_phase2[:, 1:5] / im_scales[0]
        rois_phase1, _ = clip_boxes(rois_phase1, image.shape)
        rois_phase2, _ = clip_boxes(rois_phase2, image.shape)
        # concatenate two stages to get final network output
        masks = np.concatenate((masks_phase1, masks_phase2), axis=0)
        boxes = np.concatenate((rois_phase1, rois_phase2), axis=0)
        scores = np.concatenate((scores_phase1, scores_phase2), axis=0)
        return scores, boxes, masks
Example #2
0
    def forward_train(self, bottom, top):
        """
        During forward, we need to do several things:
        1. Apply bounding box regression output which has highest
           classification score to proposed ROIs
        2. Sample ROIs based on there current overlaps, assign labels
           on them
        3. Make mask regression targets and positive/negative weights,
           just like the proposal_target_layer
        """
        rois = bottom[0].data
        bbox_deltas = bottom[1].data
        # Apply bounding box regression according to maximum segmentation score
        seg_scores = bottom[2].data
        self._bbox_reg_labels = seg_scores[:, 1:].argmax(axis=1) + 1

        gt_boxes = bottom[3].data
        gt_masks = bottom[4].data
        im_info = bottom[5].data[0, :]
        mask_info = bottom[6].data

        # select bbox_deltas according to
        artificial_deltas = np.zeros((rois.shape[0], 4))
        for i in xrange(rois.shape[0]):
            artificial_deltas[i, :] = bbox_deltas[i, 4 *
                                                  self._bbox_reg_labels[i]:4 *
                                                  (self._bbox_reg_labels[i] +
                                                   1)]
        artificial_deltas[self._bbox_reg_labels == 0, :] = 0

        all_rois = np.zeros((rois.shape[0], 5))
        all_rois[:, 0] = 0
        all_rois[:, 1:5] = bbox_transform_inv(rois[:, 1:5], artificial_deltas)
        zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype)
        all_rois = np.vstack((all_rois, np.hstack((zeros, gt_boxes[:, :-1]))))
        all_rois[:, 1:5], self._clip_keep = clip_boxes(all_rois[:, 1:5],
                                                       im_info[:2])

        labels, rois_out, fg_inds, keep_inds, mask_targets, top_mask_info, bbox_targets, bbox_inside_weights = \
            self._sample_output(all_rois, gt_boxes, im_info[2], gt_masks, mask_info)
        bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(
            np.float32)
        self._keep_inds = keep_inds

        mask_weight = np.zeros(
            (rois_out.shape[0], 1, cfg.MASK_SIZE, cfg.MASK_SIZE))
        mask_weight[0:len(fg_inds), :, :, :] = 1

        blobs = {
            'rois': rois_out,
            'labels': labels,
            'mask_targets': mask_targets,
            'mask_weight': mask_weight,
            'gt_mask_info': top_mask_info,
            'bbox_targets': bbox_targets,
            'bbox_inside_weights': bbox_inside_weights,
            'bbox_outside_weights': bbox_outside_weights
        }
        return blobs
Example #3
0
    def forward_train(self, bottom, top):
        """
        During forward, we need to do several things:
        1. Apply bounding box regression output which has highest
           classification score to proposed ROIs
        2. Sample ROIs based on there current overlaps, assign labels
           on them
        3. Make mask regression targets and positive/negative weights,
           just like the proposal_target_layer
        """
        rois = bottom[0].data
        bbox_deltas = bottom[1].data
        # Apply bounding box regression according to maximum segmentation score
        seg_scores = bottom[2].data
        self._bbox_reg_labels = seg_scores[:, 1:].argmax(axis=1) + 1

        gt_boxes = bottom[3].data
        gt_masks = bottom[4].data
        im_info = bottom[5].data[0, :]
        mask_info = bottom[6].data

        # select bbox_deltas according to
        artificial_deltas = np.zeros((rois.shape[0], 4))
        for i in xrange(rois.shape[0]):
            artificial_deltas[i, :] = bbox_deltas[i, 4*self._bbox_reg_labels[i]:4*(self._bbox_reg_labels[i]+1)]
        artificial_deltas[self._bbox_reg_labels == 0, :] = 0

        all_rois = np.zeros((rois.shape[0], 5))
        all_rois[:, 0] = 0
        all_rois[:, 1:5] = bbox_transform_inv(rois[:, 1:5], artificial_deltas)
        zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype)
        all_rois = np.vstack(
            (all_rois, np.hstack((zeros, gt_boxes[:, :-1])))
        )
        all_rois[:, 1:5], self._clip_keep = clip_boxes(all_rois[:, 1:5], im_info[:2])

        labels, rois_out, fg_inds, keep_inds, mask_targets, top_mask_info, bbox_targets, bbox_inside_weights = \
            self._sample_output(all_rois, gt_boxes, im_info[2], gt_masks, mask_info)
        bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
        self._keep_inds = keep_inds

        mask_weight = np.zeros((rois_out.shape[0], 1, cfg.MASK_SIZE, cfg.MASK_SIZE))
        mask_weight[0:len(fg_inds), :, :, :] = 1

        blobs = {
            'rois': rois_out,
            'labels': labels,
            'mask_targets': mask_targets,
            'mask_weight': mask_weight,
            'gt_mask_info': top_mask_info,
            'bbox_targets': bbox_targets,
            'bbox_inside_weights': bbox_inside_weights,
            'bbox_outside_weights': bbox_outside_weights
        }
        return blobs
 def _segmentation_forward(self, im):
     forward_kwargs, im_scales = self._prepare_mnc_args(im)
     blobs_out = self.net.forward(**forward_kwargs)
     # output we need to collect:
     # 1. output from phase1'
     rois_phase1 = self.net.blobs['rois'].data.copy()
     masks_phase1 = self.net.blobs['mask_proposal'].data[...]
     scores_phase1 = self.net.blobs['seg_cls_prob'].data[...]
     # 2. output from phase2
     rois_phase2 = self.net.blobs['rois_ext'].data[...]
     masks_phase2 = self.net.blobs['mask_proposal_ext'].data[...]
     scores_phase2 = self.net.blobs['seg_cls_prob_ext'].data[...]
     # Boxes are in resized space, we un-scale them back
     rois_phase1 = rois_phase1[:, 1:5] / im_scales[0]
     rois_phase2 = rois_phase2[:, 1:5] / im_scales[0]
     rois_phase1, _ = clip_boxes(rois_phase1, im.shape)
     rois_phase2, _ = clip_boxes(rois_phase2, im.shape)
     # concatenate two stages to get final network output
     masks = np.concatenate((masks_phase1, masks_phase2), axis=0)
     boxes = np.concatenate((rois_phase1, rois_phase2), axis=0)
     scores = np.concatenate((scores_phase1, scores_phase2), axis=0)
     return masks, boxes, scores
Example #5
0
File: demo.py Project: kekedan/MNC
def im_detect(im, net):
    forward_kwargs, im_scales = prepare_mnc_args(im, net)
    blobs_out = net.forward(**forward_kwargs)
    # output we need to collect:
    # 1. output from phase1'
    rois_phase1 = net.blobs['rois'].data.copy()
    masks_phase1 = net.blobs['mask_proposal'].data[...]
    scores_phase1 = net.blobs['seg_cls_prob'].data[...]
    # 2. output from phase2
    rois_phase2 = net.blobs['rois_ext'].data[...]
    masks_phase2 = net.blobs['mask_proposal_ext'].data[...]
    scores_phase2 = net.blobs['seg_cls_prob_ext'].data[...]
    # Boxes are in resized space, we un-scale them back
    rois_phase1 = rois_phase1[:, 1:5] / im_scales[0]
    rois_phase2 = rois_phase2[:, 1:5] / im_scales[0]
    rois_phase1, _ = clip_boxes(rois_phase1, im.shape)
    rois_phase2, _ = clip_boxes(rois_phase2, im.shape)
    # concatenate two stages to get final network output
    masks = np.concatenate((masks_phase1, masks_phase2), axis=0)
    boxes = np.concatenate((rois_phase1, rois_phase2), axis=0)
    scores = np.concatenate((scores_phase1, scores_phase2), axis=0)
    return boxes, masks, scores
Example #6
0
def im_detect(im, net):
    forward_kwargs, im_scales = prepare_mnc_args(im, net, cfg.TEST.SCALES[0])
    blobs_out = net.forward(**forward_kwargs)
    # output we need to collect:
    # 1. output from phase1'
    rois_phase1 = net.blobs['rois'].data.copy()
    #print 'rois_phase1:{}'.format(rois_phase1.shape)
    masks_phase1 = net.blobs['mask_proposal'].data[...]
    scores_phase1 = net.blobs['seg_cls_prob'].data[...]
    # 2. output from phase2
    # Boxes are in resized space, we un-scale them back
    rois_phase1 = rois_phase1[:, 1:5] / im_scales[0]
    rois_phase1, _ = clip_boxes(rois_phase1, im.shape)
    masks = masks_phase1
    boxes = rois_phase1
    scores = scores_phase1

    #test_size_list = (550,580,630,650)
    test_size_list = cfg.TEST.SCALES
    print test_size_list
    for test_size in test_size_list:
        print '>>>>> use test_size %d' % test_size
        forward_kwargs, im_scales = prepare_mnc_args(im, net, test_size)
        blobs_out = net.forward(**forward_kwargs)

        rois_phase_t = net.blobs['rois'].data.copy()
        masks_phase_t = net.blobs['mask_proposal'].data[...]
        scores_phase_t = net.blobs['seg_cls_prob'].data[...]

        rois_phase_t = rois_phase_t[:, 1:5] / im_scales[0]
        rois_phase_t, _ = clip_boxes(rois_phase_t, im.shape)
        masks = np.concatenate((masks, masks_phase_t), axis=0)
        boxes = np.concatenate((boxes, rois_phase_t), axis=0)
        scores = np.concatenate((scores, scores_phase_t), axis=0) 

    return boxes, masks, scores
Example #7
0
 def forward_test(self, bottom, top):
     rois = bottom[0].data
     bbox_deltas = bottom[1].data
     # get ~ n * 4(1+c) new rois
     all_rois = bbox_transform_inv(rois[:, 1:5], bbox_deltas)
     scores = bottom[2].data
     im_info = bottom[3].data
     # get highest scored category's bounding box regressor
     score_max = scores.argmax(axis=1)
     rois_out = np.zeros((rois.shape[0], 5))
     # Single batch training
     rois_out[:, 0] = 0
     for i in xrange(len(score_max)):
         rois_out[i,
                  1:5] = all_rois[i,
                                  4 * score_max[i]:4 * (score_max[i] + 1)]
     rois_out[:, 1:5], _ = clip_boxes(rois_out[:, 1:5], im_info[0, :2])
     blobs = {'rois': rois_out}
     return blobs
Example #8
0
 def forward_test(self, bottom, top):
     rois = bottom[0].data
     bbox_deltas = bottom[1].data
     # get ~ n * 4(1+c) new rois
     all_rois = bbox_transform_inv(rois[:, 1:5], bbox_deltas)
     scores = bottom[2].data
     im_info = bottom[3].data
     # get highest scored category's bounding box regressor
     score_max = scores.argmax(axis=1)
     rois_out = np.zeros((rois.shape[0], 5))
     # Single batch training
     rois_out[:, 0] = 0
     for i in xrange(len(score_max)):
         rois_out[i, 1:5] = all_rois[i, 4*score_max[i]:4*(score_max[i]+1)]
     rois_out[:, 1:5], _ = clip_boxes(rois_out[:, 1:5], im_info[0, :2])
     blobs = {
         'rois': rois_out
     }
     return blobs
def im_detect(im, net):
    forward_kwargs, im_scales = prepare_mnc_args(im, net)
    blobs_out = net.forward(**forward_kwargs)
    # output we need to collect:
    # 1. output from phase1'
    rois_phase1 = net.blobs['rois'].data.copy()
    #print 'rois_phase1:{}'.format(rois_phase1.shape)
    masks_phase1 = net.blobs['mask_proposal'].data[...]
    scores_phase1 = net.blobs['seg_cls_prob'].data[...]
    # 2. output from phase2
    '''
    rois_phase2 = net.blobs['rois_ext'].data[...]
    masks_phase2 = net.blobs['mask_proposal_ext'].data[...]
    scores_phase2 = net.blobs['seg_cls_prob_ext'].data[...]
    '''
    # Boxes are in resized space, we un-scale them back
    rois_phase1 = rois_phase1[:, 1:5] / im_scales[0]
    rois_phase1, _ = clip_boxes(rois_phase1, im.shape)
    masks = masks_phase1
    boxes = rois_phase1
    scores = scores_phase1
    return boxes, masks, scores
 def _detection_forward(self, im):
     """ Detect object classes in an image given object proposals.
     Arguments:
         im (ndarray): color image to test (in BGR order)
     Returns:
         box_scores (ndarray): R x K array of object class scores (K includes
             background as object category 0)
         all_boxes (ndarray): R x (4*K) array of predicted bounding boxes
     """
     forward_kwargs, im_scales = self._prepare_mnc_args(im)
     blobs_out = self.net.forward(**forward_kwargs)
     # There are some data we need to get:
     # 1. ROIS (with bbox regression)
     rois = self.net.blobs['rois'].data.copy()
     # un-scale back to raw image space
     boxes = rois[:, 1:5] / im_scales[0]
     box_deltas = blobs_out['bbox_pred']
     pred_boxes = bbox_transform_inv(boxes, box_deltas)
     pred_boxes, _ = clip_boxes(pred_boxes, im.shape)
     # 2. Detection score
     scores = blobs_out['cls_prob']
     return scores, pred_boxes
Example #11
0
 def _detection_forward(self, im):
     """ Detect object classes in an image given object proposals.
     Arguments:
         im (ndarray): color image to test (in BGR order)
     Returns:
         box_scores (ndarray): R x K array of object class scores (K includes
             background as object category 0)
         all_boxes (ndarray): R x (4*K) array of predicted bounding boxes
     """
     forward_kwargs, im_scales = self._prepare_mnc_args(im)
     blobs_out = self.net.forward(**forward_kwargs)
     # There are some data we need to get:
     # 1. ROIS (with bbox regression)
     rois = self.net.blobs['rois'].data.copy()
     # un-scale back to raw image space
     boxes = rois[:, 1:5] / im_scales[0]
     box_deltas = blobs_out['bbox_pred']
     pred_boxes = bbox_transform_inv(boxes, box_deltas)
     pred_boxes, _ = clip_boxes(pred_boxes, im.shape)
     # 2. Detection score
     scores = blobs_out['cls_prob']
     return scores, pred_boxes
Example #12
0
    def forward(self, bottom, top):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted transform deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)
        assert bottom[0].data.shape[
            0] == 1, 'Only single item batches are supported'

        cfg_key = str(self.phase)  # either 'TRAIN' or 'TEST'
        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        scores = bottom[0].data[:, self._num_anchors:, :, :]
        bbox_deltas = bottom[1].data
        im_info = bottom[2].data[0, :]

        # 1. Generate proposals from transform deltas and shifted anchors
        height, width = scores.shape[-2:]
        self._height = height
        self._width = width
        # Enumerate all shifts
        shift_x = np.arange(0, self._width) * self._feat_stride
        shift_y = np.arange(0, self._height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchors.reshape((1, A, 4)) + \
                  shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))
        _, keep = clip_boxes(anchors, im_info[:2])
        self._anchor_index_before_clip = keep

        # Transpose and reshape predicted transform transformations to get them
        # into the same order as the anchors:
        #
        # transform deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via transform transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals, keep = clip_boxes(proposals, im_info[:2])
        # Record the cooresponding index before and after clip
        # This step doesn't need unmap
        # We need it to decide whether do back propagation
        self._proposal_index_before_clip = keep

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = filter_small_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]
        self._ind_after_filter = keep

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]

        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]
        self._ind_after_sort = order
        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        keep = nms(np.hstack((proposals, scores)), nms_thresh)

        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        proposals = proposals[keep, :]

        scores = scores[keep]
        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        proposals = np.hstack(
            (batch_inds, proposals.astype(np.float32, copy=False)))
        self._proposal_index = keep

        blobs = {'rois': proposals}

        if str(self.phase) == 'TRAIN':
            if cfg.TRAIN.MIX_INDEX:
                all_rois_index = self._ind_after_filter[self._ind_after_sort[
                    self._proposal_index]].reshape(1, len(keep))
                blobs['proposal_index'] = all_rois_index

        # Copy data to forward to top layer
        for blob_name, blob in blobs.iteritems():
            top[self._top_name_map[blob_name]].reshape(*blob.shape)
            top[self._top_name_map[blob_name]].data[...] = blob.astype(
                np.float32, copy=False)
Example #13
0
    def forward(self, bottom, top):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted transform deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)
        assert bottom[0].data.shape[0] == 1, 'Only single item batches are supported'

        cfg_key = str(self.phase)  # either 'TRAIN' or 'TEST'
        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        scores = bottom[0].data[:, self._num_anchors:, :, :]
        bbox_deltas = bottom[1].data
        im_info = bottom[2].data[0, :]

        # 1. Generate proposals from transform deltas and shifted anchors
        height, width = scores.shape[-2:]
        self._height = height
        self._width = width
        # Enumerate all shifts
        shift_x = np.arange(0, self._width) * self._feat_stride
        shift_y = np.arange(0, self._height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchors.reshape((1, A, 4)) + \
                  shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))
        _, keep = clip_boxes(anchors, im_info[:2])
        self._anchor_index_before_clip = keep

        # Transpose and reshape predicted transform transformations to get them
        # into the same order as the anchors:
        #
        # transform deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via transform transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals, keep = clip_boxes(proposals, im_info[:2])
        # Record the cooresponding index before and after clip
        # This step doesn't need unmap
        # We need it to decide whether do back propagation
        self._proposal_index_before_clip = keep

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = filter_small_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]
        self._ind_after_filter = keep

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]

        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]
        self._ind_after_sort = order
        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        keep = nms(np.hstack((proposals, scores)), nms_thresh)

        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        proposals = proposals[keep, :]

        scores = scores[keep]
        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        proposals = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
        self._proposal_index = keep

        blobs = {
            'rois': proposals
        }

        if str(self.phase) == 'TRAIN':
            if cfg.TRAIN.MIX_INDEX:
                all_rois_index = self._ind_after_filter[self._ind_after_sort[self._proposal_index]].reshape(1, len(keep))
                blobs['proposal_index'] = all_rois_index

        # Copy data to forward to top layer
        for blob_name, blob in blobs.iteritems():
            top[self._top_name_map[blob_name]].reshape(*blob.shape)
            top[self._top_name_map[blob_name]].data[...] = blob.astype(np.float32, copy=False)