def rois_refine(rois, bbox_pred, cls_score, im_info):
    device_id = bbox_pred.get_device()
    box_num1 = int(np.sum(rois[:, 0] == 0))
    box_num2 = int(rois.shape[0]) - box_num1
    temp_bbox_pred = [bbox_pred[:box_num1], bbox_pred[box_num1:]]
    temp_cls_score = [cls_score[:box_num1], cls_score[box_num1:]]
    temp_boxes = [rois[:box_num1], rois[box_num1:]]
    # box_num = [0, box_num1, box_num2 + box_num1]
    new_rois = np.zeros((box_num1 + box_num2, 5))
    # new_rois = torch.zeros((box_num1 + box_num2, 5)).cuda(device_id)
    box_num = [0, box_num1, box_num2 + box_num1]

    batch = len(im_info)
    im_info = im_info.data.numpy()
    for i in range(batch):
        im_scale = float(im_info[i][-1])
        im_shape = im_info[i][:2] / im_scale
        boxes = temp_boxes[i][:, 1:5] / im_scale
        # Apply bounding-box regression deltas
        box_deltas = temp_bbox_pred[i].data.cpu().numpy().squeeze()
        # In case there is 1 proposal
        box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]])
        # Remove predictions for bg class (compat with MSRA code)
        # box_deltas = box_deltas[:, -4:]
        pred_boxes = box_utils.bbox_transform(boxes, box_deltas,
                                              cfg.MODEL.BBOX_REG_WEIGHTS)
        # pred_boxes (512,14) 14组坐标差异不大
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im_shape)
        # pred_boxes = torch.from_numpy(pred_boxes).cuda(device_id)

        scores = F.softmax(temp_cls_score[i], dim=1)
        scores = scores.data.cpu().numpy().squeeze()
        # In case there is 1 proposal
        scores = scores.reshape([-1, scores.shape[-1]])
        # scores = scores.view([-1, scores.size(-1)])
        # -1 index为背景类
        max_index = np.argmax(scores, axis=1)
        # max_index = max_index[:, np.newaxis]
        # max_index = torch.argmax(scores, 1)
        # pdb.set_trace()
        new_boxes = np.zeros((pred_boxes.shape[0], 4))
        # new_boxes = torch.zeros((pred_boxes.size(0), 4)).cuda(device_id)
        # for j in range(pred_boxes.size(0)):
        # pdb.set_trace()
        for j in range(pred_boxes.shape[0]):
            new_boxes[j] = pred_boxes[j][max_index[j] * 4:(max_index[j] + 1) *
                                         4]

        new_boxes = new_boxes * im_scale
        rois_index = np.zeros((pred_boxes.shape[0], 1))
        # rois_index = torch.zeros((pred_boxes.size(0), 1)).cuda(device_id)
        rois_index[:] = i
        new_rois[box_num[i]:box_num[i + 1]] = np.append(rois_index,
                                                        new_boxes,
                                                        axis=1)
        # new_rois[box_num[i]: box_num[i + 1]] = torch.cat((rois_index, new_boxes), dim=1)
        # pdb.set_trace()
    new_rois = np.array(new_rois, dtype=np.float32)

    return new_rois
def process_return_dict(return_dict, im, im_scale):
    if cfg.MODEL.FASTER_RCNN:
        rois = return_dict['rois'].data.cpu().numpy()
        # unscale back to raw image space
        boxes = rois[:, 1:5] / im_scale

    # cls prob (activations after softmax)
    scores = return_dict['cls_score'].data.cpu().numpy().squeeze()
    # In case there is 1 proposal
    scores = scores.reshape([-1, scores.shape[-1]])

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = return_dict['bbox_pred'].data.cpu().numpy().squeeze()
        # In case there is 1 proposal
        box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]])
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            # Remove predictions for bg class (compat with MSRA code)
            box_deltas = box_deltas[:, -4:]
        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            # (legacy) Optionally normalize targets by a precomputed mean and stdev
            box_deltas = box_deltas.view(-1, 4) * cfg.TRAIN.BBOX_NORMALIZE_STDS \
                         + cfg.TRAIN.BBOX_NORMALIZE_MEANS
        pred_boxes = box_utils.bbox_transform(boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS)
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape[:2])
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            pred_boxes = np.tile(pred_boxes, (1, scores.shape[1]))
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    return scores, pred_boxes
    def get_det_boxes(self, boxes, scores, box_deltas, h_and_w):

        if cfg.TEST.BBOX_REG:
            if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
                # Remove predictions for bg class (compat with MSRA code)
                box_deltas = box_deltas[:, -4:]
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # (legacy) Optionally normalize targets by a precomputed mean and stdev
                box_deltas = box_deltas.view(-1, 4) * cfg.TRAIN.BBOX_NORMALIZE_STDS \
                             + cfg.TRAIN.BBOX_NORMALIZE_MEANS
            pred_boxes = box_utils.bbox_transform(boxes, box_deltas,
                                                  cfg.MODEL.BBOX_REG_WEIGHTS)
            pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, h_and_w)
            if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
                pred_boxes = np.tile(pred_boxes, (1, scores.shape[1]))
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
            # Map scores and predictions back to the original set of boxes
            scores = scores[inv_index, :]
            pred_boxes = pred_boxes[inv_index, :]

        return scores, pred_boxes
Exemple #4
0
def postprocess_output(rois,
                       scaling_factor,
                       im_size,
                       class_scores,
                       bbox_deltas,
                       bbox_reg_weights=(10.0, 10.0, 5.0, 5.0)):
    boxes = to_np(rois.div(scaling_factor).squeeze(0))
    bbox_deltas = to_np(bbox_deltas)
    orig_im_size = to_np(im_size).squeeze()
    # apply deltas
    pred_boxes = box_utils.bbox_transform(boxes, bbox_deltas, bbox_reg_weights)
    # clip on boundaries
    pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, orig_im_size)
    scores = to_np(class_scores)
    # Map scores and predictions back to the original set of boxes
    # This re-duplicates the previously removed boxes
    # Is there any use for this?
    #    inv_index = to_np(batch['proposal_inv_index']).squeeze().astype(np.int64)
    #    scores = scores[inv_index, :]
    #    pred_boxes = pred_boxes[inv_index, :]
    # threshold on score and run nms to remove duplicates
    scores_final, boxes_final, boxes_per_class = box_results_with_nms_and_limit(
        scores, pred_boxes)

    return (scores_final, boxes_final, boxes_per_class)
Exemple #5
0
def im_detect_bbox(model, im, target_scale, target_max_size, boxes=None):
    """Prepare the bbox for testing"""

    inputs, im_scale = _get_blobs(im, boxes, target_scale, target_max_size)

    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(
            hashes, return_index=True, return_inverse=True
        )
        inputs['rois'] = inputs['rois'][index, :]
        boxes = boxes[index, :]

    # Add multi-level rois for FPN
    if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN:
        _add_multilevel_rois_for_test(inputs, 'rois')

    inputs['data'] = [Variable(torch.from_numpy(inputs['data']), volatile=True)]
    inputs['im_info'] = [Variable(torch.from_numpy(inputs['im_info']), volatile=True)]

    return_dict = model(**inputs)

    if cfg.MODEL.FASTER_RCNN:
        rois = return_dict['rois'].data.cpu().numpy()
        # unscale back to raw image space
        boxes = rois[:, 1:5] / im_scale

    # cls prob (activations after softmax)
    scores = return_dict['cls_score'].data.cpu().numpy().squeeze()
    # In case there is 1 proposal
    scores = scores.reshape([-1, scores.shape[-1]])

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = return_dict['bbox_pred'].data.cpu().numpy().squeeze()
        # In case there is 1 proposal
        box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]])
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            # Remove predictions for bg class (compat with MSRA code)
            box_deltas = box_deltas[:, -4:]
        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            # (legacy) Optionally normalize targets by a precomputed mean and stdev
            box_deltas = box_deltas.view(-1, 4) * cfg.TRAIN.BBOX_NORMALIZE_STDS \
                         + cfg.TRAIN.BBOX_NORMALIZE_MEANS
        pred_boxes = box_utils.bbox_transform(boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS)
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            pred_boxes = np.tile(pred_boxes, (1, scores.shape[1]))
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    return scores, pred_boxes, im_scale, return_dict['blob_conv']
    def test_res(self):
        cls_preds, cls_probs, box_preds, anchors, im_info = self._test_std()
        im_info = im_info.astype(np.float32)

        for level in range(7, 8):
            cls_pred = cls_preds[level - 3]
            cls_prob = cls_probs[level - 3]
            box_pred = box_preds[level - 3]
            anchor = anchors[level].astype(np.float32)

            YI, YV = self._run_select_top_n_op_gpu(-cls_prob, 1000)
            boxes_act, feats_act, stats_act = self._run_boxes_and_feats_op_gpu(
                cls_pred, box_pred, anchor, YI, YV, im_info, level)

            cls_probs_ravel = cls_prob.ravel()
            A = 9
            num_cls = int(cls_prob.shape[1] / A)
            H = cls_prob.shape[2]
            W = cls_prob.shape[3]
            cls_pred = cls_pred.reshape((1, A, num_cls, H, W))
            cls_prob = cls_prob.reshape((1, A, num_cls, H, W))
            box_pred = box_pred.reshape((1, A, 4, H, W))
            inds_5d = np.array(np.unravel_index(YI,
                                                cls_prob.shape)).transpose()
            classes = inds_5d[:, 2]
            anchor_ids, y, x = inds_5d[:, 1], inds_5d[:, 3], inds_5d[:, 4]
            feats_exp = cls_pred[:, anchor_ids, :, y, x]
            feats_exp = feats_exp.reshape(-1, num_cls)

            scores = cls_prob[:, anchor_ids, classes, y, x]
            scores = scores.ravel()

            boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32)
            boxes *= (2**level)
            boxes += anchor[anchor_ids, :]
            box_deltas = box_pred[0, anchor_ids, :, y, x]
            pred_boxes = box_utils.bbox_transform(boxes, box_deltas)
            pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im_info[0, :2])

            boxes_exp = np.zeros((pred_boxes.shape[0], 5), dtype=np.float32)
            boxes_exp[:, 0:4] = pred_boxes
            boxes_exp[:, 4] = scores

            # for i in range(num_cls):
            #     if stats_act[0, i] > 0:
            #         print('cls %d: %d' % (i+1, stats_act[0, i]))

            np.testing.assert_allclose(boxes_act[:, :5],
                                       boxes_exp,
                                       rtol=1e-6,
                                       atol=1e-6)
            np.testing.assert_allclose(feats_act, feats_exp)
Exemple #7
0
def im_detect_bbox(model, im, target_scale, target_max_size, boxes=None):
    """Prepare the bbox for testing"""

    inputs, im_scale = _get_blobs(im, boxes, target_scale, target_max_size)

    if cfg.DEDUP_BOXES > 0:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(
            hashes, return_index=True, return_inverse=True
        )
        inputs['rois'] = inputs['rois'][index, :]
        boxes = boxes[index, :]

    if cfg.PYTORCH_VERSION_LESS_THAN_040:
        inputs['data'] = [Variable(torch.from_numpy(inputs['data']), volatile=True)]
        inputs['rois'] = [Variable(torch.from_numpy(inputs['rois']), volatile=True)]
        inputs['labels'] = [Variable(torch.from_numpy(inputs['labels']), volatile=True)]
    else:
        inputs['data'] = [torch.from_numpy(inputs['data'])]
        inputs['rois'] = [torch.from_numpy(inputs['rois'])]
        inputs['labels'] = [torch.from_numpy(inputs['labels'])]

    return_dict = model(**inputs)

    # cls prob (activations after softmax)
    scores = return_dict['refine_score'][0].data.cpu().numpy().squeeze()
    for i in range(1, cfg.REFINE_TIMES):
        scores += return_dict['refine_score'][i].data.cpu().numpy().squeeze()
    scores /= cfg.REFINE_TIMES
    # In case there is 1 proposal
    scores = scores.reshape([-1, scores.shape[-1]])

    if cfg.MODEL.WITH_FRCNN:
        scores += return_dict['cls_score'].data.cpu().numpy().squeeze().reshape([-1, scores.shape[-1]])
        # scores = scores.reshape([-1, scores.shape[-1]])
        scores /= 2.

        box_deltas = return_dict['bbox_pred'].data.cpu().numpy().squeeze()

        pred_boxes = box_utils.bbox_transform(boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS)
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
    else:
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    return scores, pred_boxes, im_scale, return_dict['blob_conv']
def postprocess_output(rois,scaling_factor,im_size,class_scores,bbox_deltas,bbox_reg_weights = (10.0,10.0,5.0,5.0)):
    boxes = to_np(rois.div(scaling_factor).squeeze(0))
    bbox_deltas = to_np(bbox_deltas)    
    orig_im_size = to_np(im_size).squeeze()    
    # apply deltas
    pred_boxes = box_utils.bbox_transform(boxes, bbox_deltas, bbox_reg_weights)
    # clip on boundaries
    pred_boxes = box_utils.clip_tiled_boxes(pred_boxes,orig_im_size)    
    scores = to_np(class_scores)
    # Map scores and predictions back to the original set of boxes
    # This re-duplicates the previously removed boxes
    # Is there any use for this?
#    inv_index = to_np(batch['proposal_inv_index']).squeeze().astype(np.int64)
#    scores = scores[inv_index, :]
#    pred_boxes = pred_boxes[inv_index, :]
    # threshold on score and run nms to remove duplicates
    scores_final, boxes_final, boxes_per_class = box_results_with_nms_and_limit(scores, pred_boxes)
    
    return (scores_final, boxes_final, boxes_per_class)
Exemple #9
0
def im_detect_bbox_batch(model, ims, boxes=None):
    """Bounding box object detection for an image with given box proposals.

    Arguments:
        model (DetectionModelHelper): the detection model to use
        ims (ndarray): cfg.TEST.IMS_PER_BATCH color images to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals in 0-indexed
            [x1, y1, x2, y2] format, or None if using RPN

    Returns:
        scores list((ndarray)): [R x K] array of object class scores for K classes
            (K includes background as object category 0)
        boxes list((ndarray)): [R x 4*K] array of predicted bounding boxes
        im_scales (list): list of image scales used in the input blob (as
            returned by _get_blobs and for use with im_detect_mask, etc.)
    """
    inputs, im_scales = _get_blobs(ims, boxes)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes,
                                        return_index=True,
                                        return_inverse=True)
        inputs['rois'] = inputs['rois'][index, :]
        boxes = boxes[index, :]

    # Add multi-level rois for FPN
    if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN:
        _add_multilevel_rois_for_test(inputs, 'rois')

    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v)
    workspace.RunNet(model.net.Proto().name)

    # Read out blobs
    if cfg.MODEL.FASTER_RCNN:
        rois = workspace.FetchBlob(core.ScopedName('rois'))

    # Softmax class probabilities
    scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze()

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = workspace.FetchBlob(
            core.ScopedName('bbox_pred')).squeeze()

    # In case there is 1 proposal
    scores_batch = []
    pred_boxes_batch = []
    for i in range(cfg.TEST.IMS_PER_BATCH):
        # select batch
        select_inds = np.where(rois[:, 0] == i)

        # unscale back to raw image space
        boxes = rois[select_inds, 1:5] / im_scales[i]
        boxes = boxes.reshape([-1, boxes.shape[-1]])
        scores_i = scores[select_inds, :]
        scores_i = scores_i.reshape([-1, scores_i.shape[-1]])
        scores_batch.append(scores_i)

        if cfg.TEST.BBOX_REG:
            # In case there is 1 proposal
            box_deltas_i = box_deltas[select_inds, :]
            box_deltas_i = box_deltas_i.reshape([-1, box_deltas_i.shape[-1]])
            if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
                # Remove predictions for bg class (compat with MSRA code)
                box_deltas_i = box_deltas_i[:, -4:]
            pred_boxes = box_utils.bbox_transform(boxes, box_deltas_i,
                                                  cfg.MODEL.BBOX_REG_WEIGHTS)
            pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, ims[i].shape)
            if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
                pred_boxes = (np.tile(pred_boxes, (1, scores_i.shape[1])))
            pred_boxes_batch.append(pred_boxes)
        else:
            logger.error('Not implemented.')
            return None, None, None

        if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
            logger.error('Not implemented.')
            return None, None, None

    return scores_batch, pred_boxes_batch, im_scales
    def proposals_for_one_image(self, im_info, all_anchors, bbox_deltas,
                                scores, frames_per_vid):
        # Get mode-dependent configuration
        cfg_key = 'TRAIN' if self._train else 'TEST'
        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE
        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #   - bbox deltas will be (4 * A * frames_per_vid, H, W) format from
        #     conv output
        #   - transpose to (H, W, 4 * A * frames_per_vid)
        #   - reshape to (H * W * A, 4 * frames_per_vid) where rows are ordered
        #     by (H, W, A) in slowest to fastest order to match the enumerated
        #     anchors
        bbox_deltas = bbox_deltas.transpose((1, 2, 0)).reshape(
            (-1, 4 * frames_per_vid))

        # Same story for the scores:
        #   - scores are (A, H, W) format from conv output
        #     This computes the score for the tube
        #   - transpose to (H, W, A)
        #   - reshape to (H * W * A, 1) where rows are ordered by (H, W, A)
        #     to match the order of anchors and bbox_deltas
        scores = scores.transpose((1, 2, 0)).reshape((-1, 1))

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        if pre_nms_topN <= 0 or pre_nms_topN > len(scores):
            order = np.argsort(-scores.squeeze())
        else:
            # Avoid sorting possibly large arrays; First partition to get top K
            # unsorted and then sort just those (~20x faster for 200k scores)
            inds = np.argpartition(-scores.squeeze(),
                                   pre_nms_topN)[:pre_nms_topN]
            order = np.argsort(-scores[inds].squeeze())
            order = inds[order]
        bbox_deltas = bbox_deltas[order, :]
        all_anchors = all_anchors[order, :]
        scores = scores[order]

        # 1. Transform anchors into proposals via bbox transformations
        proposals = box_utils.bbox_transform(all_anchors, bbox_deltas,
                                             (1.0, 1.0, 1.0, 1.0))

        # 2. clip proposals to image (may result in proposals with zero area
        # that will be removed in the next step)
        proposals = box_utils.clip_tiled_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < min_size
        # again, needs to be done one each frame and need to "AND" over frames
        keep = np.arange(proposals.shape[0])
        for frame_id in range(frames_per_vid):
            keep = np.intersect1d(
                keep,
                _filter_boxes(proposals[:, frame_id * 4:(frame_id + 1) * 4],
                              min_size, im_info))
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 6. apply loose nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        if nms_thresh > 0:
            # When we're training on multiple GPUs, running NMS on the GPU
            # causes serious perf issues. We need to debug, but for now force
            # running on the CPU when training
            keep = nms(np.hstack((proposals, scores)), nms_thresh)
            if post_nms_topN > 0:
                keep = keep[:post_nms_topN]
            proposals = proposals[keep, :]
            scores = scores[keep]
        return proposals, scores
Exemple #11
0
def im_detect_bbox(model, im, target_scale, target_max_size, boxes=None):
    """Bounding box object detection for an image with given box proposals.

    Arguments:
        model (DetectionModelHelper): the detection model to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals in 0-indexed
            [x1, y1, x2, y2] format, or None if using RPN

    Returns:
        scores (ndarray): R x K array of object class scores for K classes
            (K includes background as object category 0)
        boxes (ndarray): R x 4*K array of predicted bounding boxes
        im_scales (list): list of image scales used in the input blob (as
            returned by _get_blobs and for use with im_detect_mask, etc.)
    """
    inputs, im_scale = _get_blobs(im, boxes, target_scale, target_max_size)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(
            hashes, return_index=True, return_inverse=True
        )
        inputs['rois'] = inputs['rois'][index, :]
        boxes = boxes[index, :]

    # Add multi-level rois for FPN
    if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN:
        _add_multilevel_rois_for_test(inputs, 'rois')

    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v)
    workspace.RunNet(model.net.Proto().name)

    # Read out blobs
    if cfg.MODEL.FASTER_RCNN:
        rois = workspace.FetchBlob(core.ScopedName('rois'))
        # unscale back to raw image space
        boxes = rois[:, 1:5] / im_scale

    # Softmax class probabilities
    scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze()
    # In case there is 1 proposal
    scores = scores.reshape([-1, scores.shape[-1]])

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = workspace.FetchBlob(core.ScopedName('bbox_pred')).squeeze()
        # In case there is 1 proposal
        box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]])
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            # Remove predictions for bg class (compat with MSRA code)
            box_deltas = box_deltas[:, -4:]
        pred_boxes = box_utils.bbox_transform(
            boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS
        )
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            pred_boxes = np.tile(pred_boxes, (1, scores.shape[1]))
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    return scores, pred_boxes, im_scale
Exemple #12
0
    def _forward(self, data, im_info, roidb=None, **rpn_kwargs):
        im_data = data
        if self.training:
            roidb = list(map(lambda x: blob_utils.deserialize(x)[0], roidb))

        device_id = im_data.get_device()

        return_dict = {}  # A dict to collect return variables

        blob_conv = self.Conv_Body(im_data)

        rpn_ret = self.RPN(blob_conv, im_info, roidb)

        # if self.training:
        #     # can be used to infer fg/bg ratio
        #     return_dict['rois_label'] = rpn_ret['labels_int32']

        if cfg.FPN.FPN_ON:
            # Retain only the blobs that will be used for RoI heads. `blob_conv` may include
            # extra blobs that are used for RPN proposals, but not for RoI heads.
            blob_conv = blob_conv[-self.num_roi_levels:]

        if not self.training:
            return_dict['blob_conv'] = blob_conv

        if not cfg.MODEL.RPN_ONLY:
            if cfg.MODEL.SHARE_RES5 and self.training:
                box_feat, res5_feat = self.Box_Head(blob_conv, rpn_ret)
            else:
                box_feat = self.Box_Head(blob_conv, rpn_ret)
            cls_score, bbox_pred = self.Box_Outs(box_feat)
        else:
            # TODO: complete the returns for RPN only situation
            pass

        if self.training:
            return_dict['losses'] = {}
            return_dict['metrics'] = {}
            # rpn loss
            rpn_kwargs.update(
                dict((k, rpn_ret[k]) for k in rpn_ret.keys()
                     if (k.startswith('rpn_cls_logits')
                         or k.startswith('rpn_bbox_pred'))))
            loss_rpn_cls, loss_rpn_bbox = rpn_heads.generic_rpn_losses(
                **rpn_kwargs)
            if cfg.FPN.FPN_ON:
                for k, lvl in enumerate(
                        range(cfg.FPN.RPN_MIN_LEVEL,
                              cfg.FPN.RPN_MAX_LEVEL + 1)):
                    return_dict['losses']['loss_rpn_cls_fpn%d' %
                                          lvl] = loss_rpn_cls[k]
                    return_dict['losses']['loss_rpn_bbox_fpn%d' %
                                          lvl] = loss_rpn_bbox[k]
            else:
                return_dict['losses']['loss_rpn_cls'] = loss_rpn_cls
                return_dict['losses']['loss_rpn_bbox'] = loss_rpn_bbox

            # bbox loss
            loss_cls, loss_bbox, accuracy_cls = fast_rcnn_heads.fast_rcnn_losses(
                cls_score,
                bbox_pred,
                rpn_ret['labels_int32'],
                rpn_ret['bbox_targets'],
                rpn_ret['bbox_inside_weights'],
                rpn_ret['bbox_outside_weights'],
                stage=0)
            return_dict['losses']['loss_cls'] = loss_cls
            return_dict['losses']['loss_bbox'] = loss_bbox
            return_dict['metrics']['accuracy_cls'] = accuracy_cls

            if cfg.MODEL.MASK_ON:
                if getattr(self.Mask_Head, 'SHARE_RES5', False):
                    mask_feat = self.Mask_Head(
                        res5_feat,
                        rpn_ret,
                        roi_has_mask_int32=rpn_ret['roi_has_mask_int32'])
                else:
                    mask_feat = self.Mask_Head(blob_conv, rpn_ret)
                mask_pred = self.Mask_Outs(mask_feat)
                # return_dict['mask_pred'] = mask_pred
                # mask loss
                loss_mask = mask_rcnn_heads.mask_rcnn_losses(
                    mask_pred, rpn_ret['masks_int32'])
                return_dict['losses']['loss_mask'] = loss_mask

            if cfg.MODEL.KEYPOINTS_ON:
                if getattr(self.Keypoint_Head, 'SHARE_RES5', False):
                    # No corresponding keypoint head implemented yet (Neither in Detectron)
                    # Also, rpn need to generate the label 'roi_has_keypoints_int32'
                    kps_feat = self.Keypoint_Head(
                        res5_feat,
                        rpn_ret,
                        roi_has_keypoints_int32=rpn_ret[
                            'roi_has_keypoint_int32'])
                else:
                    kps_feat = self.Keypoint_Head(blob_conv, rpn_ret)
                kps_pred = self.Keypoint_Outs(kps_feat)
                # return_dict['keypoints_pred'] = kps_pred
                # keypoints loss
                if cfg.KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS:
                    loss_keypoints = keypoint_rcnn_heads.keypoint_losses(
                        kps_pred, rpn_ret['keypoint_locations_int32'],
                        rpn_ret['keypoint_weights'])
                else:
                    loss_keypoints = keypoint_rcnn_heads.keypoint_losses(
                        kps_pred, rpn_ret['keypoint_locations_int32'],
                        rpn_ret['keypoint_weights'],
                        rpn_ret['keypoint_loss_normalizer'])
                return_dict['losses']['loss_kps'] = loss_keypoints

            # pytorch0.4 bug on gathering scalar(0-dim) tensors
            for k, v in return_dict['losses'].items():
                return_dict['losses'][k] = v.unsqueeze(0)
            for k, v in return_dict['metrics'].items():
                return_dict['metrics'][k] = v.unsqueeze(0)

        if not cfg.FAST_RCNN.USE_CASCADE:
            return_dict['rois'] = rpn_ret['rois']
            return_dict['cls_score'] = cls_score
            return_dict['bbox_pred'] = bbox_pred
        else:
            return_dict['rois' + '_{}'.format(0)] = rpn_ret['rois']
            return_dict['cls_score' + '_{}'.format(0)] = cls_score.detach()
            return_dict['bbox_pred' + '_{}'.format(0)] = bbox_pred.detach()

        if cfg.FAST_RCNN.USE_CASCADE:
            for i in range(2):
                i += 1
                pre_stage_name = '_{}'.format(i - 1)
                cls_score_cpu = cls_score.data.cpu().numpy()
                import utils.boxes as box_utils
                bbox_pred_cpu = bbox_pred.reshape(
                    [-1, bbox_pred.shape[-1]]).data.cpu().numpy().squeeze()
                rois = deepcopy(return_dict['rois' + pre_stage_name])

                assert cfg.MODEL.CLS_AGNOSTIC_BBOX_REG is True
                if not cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
                    cls_loc = np.argmax(cls_score_cpu[:, 1:], axis=1) + 1
                    cls_loc = np.reshape(cls_loc, (cls_loc.shape[0], 1))
                    # Based on scores, we can select transformed rois
                    scores = np.zeros((cls_score_cpu.shape[0], 1))
                    for k in range(len(cls_loc)):
                        scores[k] = cls_score_cpu[k, cls_loc[k]]

                batch_inds = rois[:, 0]
                uni_inds = np.unique(batch_inds)

                # WE suppose the WIDTH of image is EQUAL to its HEIGHT

                # We also provide an example to show how to perform the operation batch-wise
                # Scale forward
                batch_se = []
                for e in range(len(uni_inds)):
                    id_min = min(np.where(batch_inds == uni_inds[e])[0])
                    id_max = max(np.where(batch_inds == uni_inds[e])[0])
                    rois[id_min:id_max + 1, 1:5] /= im_info[e][2]
                    batch_se.append([id_min, id_max])

                pred_boxes = box_utils.bbox_transform(
                    rois[:, 1:5], bbox_pred_cpu,
                    cfg.CASCADE_RCNN.BBOX_REG_WEIGHTS[i])
                # Scale back
                for e in range(len(uni_inds)):
                    id_min = batch_se[e][0]
                    id_max = batch_se[e][1]
                    pred_boxes[id_min:id_max + 1] *= im_info[e][2]
                    pred_boxes[id_min:id_max + 1] = box_utils.clip_tiled_boxes(
                        pred_boxes[id_min:id_max + 1], im_info[e][0:2])

                cfg_key = 'TRAIN' if self.training else 'TEST'
                min_size = cfg[cfg_key].RPN_MIN_SIZE

                if not cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
                    # Cannot use for loop here which may cause "illegal memory access"
                    # Thanks to Chen-Wei Xie !
                    rows = pred_boxes.shape[0]
                    b3 = cls_loc * 4 + np.array([0, 1, 2, 3])
                    b4 = np.array(range(rows))
                    c = pred_boxes[np.repeat(b4, 4), b3.flatten()]
                    proposals = np.reshape(c, (-1, 4))
                else:
                    proposals = pred_boxes[:, 4:8]

                keep = _filter_boxes(proposals, min_size, im_info[0])

                proposals = proposals[keep, :]
                batch_inds = batch_inds[keep]

                batch_inds = np.reshape(batch_inds, [len(batch_inds), 1])

                proposals = np.concatenate((batch_inds, proposals), axis=1)

                from modeling.collect_and_distribute_fpn_rpn_proposals import CollectAndDistributeFpnRpnProposalsOp
                self.CollectAndDistributeFpnRpnProposals = CollectAndDistributeFpnRpnProposalsOp(
                )
                self.CollectAndDistributeFpnRpnProposals.training = self.training
                # proposals.astype('float32')
                blobs_out = self.CollectAndDistributeFpnRpnProposals(proposals,
                                                                     roidb,
                                                                     im_info,
                                                                     stage=i)
                # Update rpn_ret
                new_rpn_ret = {}
                for key, value in rpn_ret.items():
                    if 'rpn' in key:
                        new_rpn_ret[key] = value

                new_rpn_ret.update(blobs_out)

                if not self.training:
                    return_dict['blob_conv'] = blob_conv

                if not cfg.MODEL.RPN_ONLY:
                    if i == 1:
                        if cfg.MODEL.SHARE_RES5 and self.training:
                            box_feat, res5_feat = self.Box_Head_2(
                                blob_conv, new_rpn_ret)
                        else:
                            box_feat = self.Box_Head_2(blob_conv, new_rpn_ret)

                        cls_score, bbox_pred = self.Box_Outs_2(box_feat)
                    elif i == 2:
                        if cfg.MODEL.SHARE_RES5 and self.training:
                            box_feat, res5_feat = self.Box_Head_3(
                                blob_conv, new_rpn_ret)
                        else:
                            box_feat = self.Box_Head_3(blob_conv, new_rpn_ret)

                        cls_score, bbox_pred = self.Box_Outs_3(box_feat)

                if self.training:
                    # rpn loss
                    rpn_kwargs.update(
                        dict((k, new_rpn_ret[k]) for k in new_rpn_ret.keys()
                             if (k.startswith('rpn_cls_logits')
                                 or k.startswith('rpn_bbox_pred'))))
                    loss_rpn_cls, loss_rpn_bbox = rpn_heads.generic_rpn_losses(
                        **rpn_kwargs)
                    if cfg.FPN.FPN_ON:
                        for k, lvl in enumerate(
                                range(cfg.FPN.RPN_MIN_LEVEL,
                                      cfg.FPN.RPN_MAX_LEVEL + 1)):
                            return_dict['losses']['loss_rpn_cls_fpn%d' %
                                                  lvl] += loss_rpn_cls[k]
                            return_dict['losses']['loss_rpn_bbox_fpn%d' %
                                                  lvl] += loss_rpn_bbox[k]
                    else:
                        return_dict['losses']['loss_rpn_cls'] += loss_rpn_cls
                        return_dict['losses']['loss_rpn_bbox'] += loss_rpn_bbox

                    # bbox loss
                    loss_cls, loss_bbox, accuracy_cls = fast_rcnn_heads.fast_rcnn_losses(
                        cls_score,
                        bbox_pred,
                        new_rpn_ret['labels_int32'],
                        new_rpn_ret['bbox_targets'],
                        new_rpn_ret['bbox_inside_weights'],
                        new_rpn_ret['bbox_outside_weights'],
                        stage=i)

                    return_dict['losses']['loss_cls'] += loss_cls
                    return_dict['losses']['loss_bbox'] += loss_bbox
                    return_dict['metrics']['accuracy_cls'] += accuracy_cls

                return_dict['rois' + '_{}'.format(i)] = deepcopy(
                    new_rpn_ret['rois'])
                return_dict['cls_score' + '_{}'.format(i)] = cls_score.detach()
                return_dict['bbox_pred' + '_{}'.format(i)] = bbox_pred.detach()

                rpn_ret = new_rpn_ret.copy()

        return return_dict
Exemple #13
0
def im_detect_bbox(model, im, target_scale, target_max_size, boxes=None):
    """Prepare the bbox for testing"""

    inputs, im_scale = _get_blobs(im, boxes, target_scale, target_max_size)

    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes,
                                        return_index=True,
                                        return_inverse=True)
        inputs['rois'] = inputs['rois'][index, :]
        boxes = boxes[index, :]

    # Add multi-level rois for FPN
    if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN:
        _add_multilevel_rois_for_test(inputs, 'rois')

    if cfg.PYTORCH_VERSION_LESS_THAN_040:
        inputs['data'] = [
            Variable(torch.from_numpy(inputs['data']), volatile=True)
        ]
        inputs['im_info'] = [
            Variable(torch.from_numpy(inputs['im_info']), volatile=True)
        ]
    else:
        inputs['data'] = [torch.from_numpy(inputs['data'])]
        inputs['im_info'] = [torch.from_numpy(inputs['im_info'])]

    return_dict = model(**inputs)

    # Names for output blobs
    rois_name = 'rois'
    cls_prob_name = 'cls_score'
    bbox_pred_name = 'bbox_pred'
    # bbox regression weights
    bbox_reg_weights = cfg.MODEL.BBOX_REG_WEIGHTS
    score_rescalar = 1.0
    if cfg.MODEL.CASCADE_ON:
        stage = cfg.CASCADE_RCNN.TEST_STAGE
        if stage <= 0:
            stage = cfg.CASCADE_RCNN.NUM_STAGE - 1
        assert stage <= cfg.CASCADE_RCNN.NUM_STAGE
        if stage >= 1:
            rois_name += '_{}'.format(stage)
            cls_prob_name += '_{}'.format(stage)
            bbox_pred_name += '_{}'.format(stage)
            bbox_reg_weights = cfg.CASCADE_RCNN.BBOX_REG_WEIGHTS[stage]
        if cfg.CASCADE_RCNN.TEST_ENSEMBLE:
            assert stage >= 2
            cls_prob_name += '_sum'
            score_rescalar /= stage

    if cfg.MODEL.FASTER_RCNN:
        rois = return_dict[rois_name].data.cpu().numpy()
        # unscale back to raw image space
        boxes = rois[:, 1:5] / im_scale

    # cls prob (activations after softmax)
    scores = return_dict[cls_prob_name].data.cpu().numpy().squeeze()
    # In case there is 1 proposal
    scores = scores.reshape([-1, scores.shape[-1]])
    # Cascade R-CNN
    scores *= score_rescalar

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = return_dict[bbox_pred_name].data.cpu().numpy().squeeze()
        # In case there is 1 proposal
        box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]])
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            # Remove predictions for bg class (compat with MSRA code)
            box_deltas = box_deltas[:, -4:]
        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            # (legacy) Optionally normalize targets by a precomputed mean and stdev
            box_deltas = box_deltas.view(-1, 4) * cfg.TRAIN.BBOX_NORMALIZE_STDS \
                         + cfg.TRAIN.BBOX_NORMALIZE_MEANS
        pred_boxes = box_utils.bbox_transform(boxes, box_deltas,
                                              bbox_reg_weights)
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            pred_boxes = np.tile(pred_boxes, (1, scores.shape[1]))
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    return scores, pred_boxes, im_scale, return_dict['blob_conv']
Exemple #14
0
    def _forward(self, data, im_info, roidb=None, **rpn_kwargs):
        im_data = data
        if self.training:
            roidb = list(map(lambda x: blob_utils.deserialize(x)[0], roidb))

        device_id = im_data.get_device()

        return_dict = {}  # A dict to collect return variables

        blob_conv = self.Conv_Body(im_data)

        rpn_ret = self.RPN(blob_conv, im_info, roidb)

        # if self.training:
        #     # can be used to infer fg/bg ratio
        #     return_dict['rois_label'] = rpn_ret['labels_int32']

        if cfg.FPN.FPN_ON:
            # Retain only the blobs that will be used for RoI heads. `blob_conv` may include
            # extra blobs that are used for RPN proposals, but not for RoI heads.
            blob_conv = blob_conv[-self.num_roi_levels:]

        if not self.training:
            return_dict['blob_conv'] = blob_conv

        if not cfg.MODEL.RPN_ONLY:
            if cfg.MODEL.SHARE_RES5 and self.training:
                box_feat, res5_feat = self.Box_Head(blob_conv, rpn_ret)
            else:
                box_feat = self.Box_Head(blob_conv, rpn_ret)
            cls_score, bbox_pred = self.Box_Outs(box_feat)
        else:
            # TODO: complete the returns for RPN only situation
            pass

        if self.training:
            return_dict['losses'] = {}
            return_dict['metrics'] = {}
            # rpn loss
            rpn_kwargs.update(
                dict((k, rpn_ret[k]) for k in rpn_ret.keys()
                     if (k.startswith('rpn_cls_logits')
                         or k.startswith('rpn_bbox_pred'))))
            loss_rpn_cls, loss_rpn_bbox = rpn_heads.generic_rpn_losses(
                **rpn_kwargs)

            if cfg.RPN.VIS_QUANT_TARGET:
                import numpy as np
                import json
                import os
                import time

                gt_boxes = []
                gt_label = roidb[0]['gt_classes']
                for inds, item in enumerate(gt_label):
                    if item != 0:
                        gt_boxes.append(roidb[0]['boxes'][inds])

                gt_boxes = np.array(gt_boxes, dtype=np.float32)
                gt_boxes *= im_info.detach().numpy()[:, 2]

                path = "/nfs/project/libo_i/Boosting/Targets_Info"
                if not os.path.exists(path):
                    os.makedirs(path)
                b, c, h, w = rpn_kwargs['rpn_cls_logits_fpn3'].shape
                sample_targets = rpn_kwargs[
                    'rpn_bbox_targets_wide_fpn3'][:, :, :h, :w]

                line_targets = sample_targets.detach().data.cpu().numpy()
                with open(os.path.join(path, "quant_anchors.json"), "r") as fp:
                    quant_anchors = np.array(json.load(fp), dtype=np.float32)
                    quant_anchors = quant_anchors[:h, :w]

                line_targets = line_targets[:, 4:8, :, :].transpose(
                    (0, 2, 3, 1)).reshape(quant_anchors.shape)
                line_targets = line_targets.reshape(-1, 4)

                width = im_data.shape[3]
                height = im_data.shape[2]
                # 在这里加上targets的偏移

                line_quant_anchors = quant_anchors.reshape(-1, 4)
                pred_boxes = box_utils.onedim_bbox_transform(
                    line_quant_anchors, line_targets)
                pred_boxes = box_utils.clip_tiled_boxes(
                    pred_boxes, (height, width, 3))

                im = im_data.detach().cpu().numpy().reshape(3, height,
                                                            width).transpose(
                                                                (1, 2, 0))

                means = np.squeeze(cfg.PIXEL_MEANS)
                for i in range(3):
                    im[:, :, i] += means[i]

                im = im.astype(int)
                dpi = 200
                fig = plt.figure(frameon=False)
                fig.set_size_inches(im.shape[1] / dpi, im.shape[0] / dpi)
                ax = plt.Axes(fig, [0., 0., 1., 1.])
                ax.axis('off')
                fig.add_axes(ax)
                ax.imshow(im[:, :, ::-1])
                # 在im上添加gt
                for item in gt_boxes:
                    ax.add_patch(
                        plt.Rectangle((item[0], item[1]),
                                      item[2] - item[0],
                                      item[3] - item[1],
                                      fill=False,
                                      edgecolor='white',
                                      linewidth=1,
                                      alpha=1))

                cnt = 0
                for inds, before_item in enumerate(line_quant_anchors):
                    after_item = pred_boxes[inds]
                    targets_i = line_targets[inds]
                    if np.sum(targets_i) == 0:
                        continue
                    ax.add_patch(
                        plt.Rectangle((before_item[0], before_item[1]),
                                      before_item[2] - before_item[0],
                                      before_item[3] - before_item[1],
                                      fill=False,
                                      edgecolor='r',
                                      linewidth=1,
                                      alpha=1))

                    ax.add_patch(
                        plt.Rectangle((after_item[0], after_item[1]),
                                      after_item[2] - after_item[0],
                                      after_item[3] - after_item[1],
                                      fill=False,
                                      edgecolor='g',
                                      linewidth=1,
                                      alpha=1))

                    logger.info("valid boxes: {}".format(cnt))
                    cnt += 1

                if cnt != 0:
                    ticks = time.time()
                    fig.savefig(
                        "/nfs/project/libo_i/Boosting/Targets_Info/{}.png".
                        format(ticks),
                        dpi=dpi)

                plt.close('all')

            if cfg.FPN.FPN_ON:
                for i, lvl in enumerate(
                        range(cfg.FPN.RPN_MIN_LEVEL,
                              cfg.FPN.RPN_MAX_LEVEL + 1)):
                    return_dict['losses']['loss_rpn_cls_fpn%d' %
                                          lvl] = loss_rpn_cls[i]
                    return_dict['losses']['loss_rpn_bbox_fpn%d' %
                                          lvl] = loss_rpn_bbox[i]
            else:
                return_dict['losses']['loss_rpn_cls'] = loss_rpn_cls
                return_dict['losses']['loss_rpn_bbox'] = loss_rpn_bbox

            # bbox loss
            loss_cls, loss_bbox, accuracy_cls = fast_rcnn_heads.fast_rcnn_losses(
                cls_score, bbox_pred, rpn_ret['labels_int32'],
                rpn_ret['bbox_targets'], rpn_ret['bbox_inside_weights'],
                rpn_ret['bbox_outside_weights'])
            if cfg.RPN.ZEROLOSS:
                zero_loss_bbox = torch.Tensor([0.]).squeeze().cuda()
                zero_loss_bbox.requires_grad = True
                zero_loss_cls = torch.Tensor([0.]).squeeze().cuda()
                zero_loss_cls.requires_grad = True
                return_dict['losses']['loss_bbox'] = zero_loss_bbox
                return_dict['losses']['loss_cls'] = zero_loss_cls

            else:
                return_dict['losses']['loss_bbox'] = loss_bbox
                return_dict['losses']['loss_cls'] = loss_cls

            return_dict['metrics']['accuracy_cls'] = accuracy_cls

            if cfg.MODEL.MASK_ON:
                if getattr(self.Mask_Head, 'SHARE_RES5', False):
                    mask_feat = self.Mask_Head(
                        res5_feat,
                        rpn_ret,
                        roi_has_mask_int32=rpn_ret['roi_has_mask_int32'])
                else:
                    mask_feat = self.Mask_Head(blob_conv, rpn_ret)
                mask_pred = self.Mask_Outs(mask_feat)
                # return_dict['mask_pred'] = mask_pred
                # mask loss
                loss_mask = mask_rcnn_heads.mask_rcnn_losses(
                    mask_pred, rpn_ret['masks_int32'])
                return_dict['losses']['loss_mask'] = loss_mask

            if cfg.MODEL.KEYPOINTS_ON:
                if getattr(self.Keypoint_Head, 'SHARE_RES5', False):
                    # No corresponding keypoint head implemented yet (Neither in Detectron)
                    # Also, rpn need to generate the label 'roi_has_keypoints_int32'
                    kps_feat = self.Keypoint_Head(
                        res5_feat,
                        rpn_ret,
                        roi_has_keypoints_int32=rpn_ret[
                            'roi_has_keypoint_int32'])
                else:
                    kps_feat = self.Keypoint_Head(blob_conv, rpn_ret)
                kps_pred = self.Keypoint_Outs(kps_feat)
                # return_dict['keypoints_pred'] = kps_pred
                # keypoints loss
                if cfg.KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS:
                    loss_keypoints = keypoint_rcnn_heads.keypoint_losses(
                        kps_pred, rpn_ret['keypoint_locations_int32'],
                        rpn_ret['keypoint_weights'])
                else:
                    loss_keypoints = keypoint_rcnn_heads.keypoint_losses(
                        kps_pred, rpn_ret['keypoint_locations_int32'],
                        rpn_ret['keypoint_weights'],
                        rpn_ret['keypoint_loss_normalizer'])
                return_dict['losses']['loss_kps'] = loss_keypoints

            # pytorch0.4 bug on gathering scalar(0-dim) tensors
            for k, v in return_dict['losses'].items():
                return_dict['losses'][k] = v.unsqueeze(0)
            for k, v in return_dict['metrics'].items():
                return_dict['metrics'][k] = v.unsqueeze(0)

        else:
            # Testing
            return_dict['rois'] = rpn_ret['rois']
            return_dict['cls_score'] = cls_score
            return_dict['bbox_pred'] = bbox_pred
            if cfg.TEST.PROPOSALS_OUT:
                import os
                import json
                import numpy as np

                # 直接在这里做变换,输出经过变换之后的1000个框
                bbox_pred = bbox_pred.data.cpu().numpy().squeeze()
                box_deltas = bbox_pred.reshape([-1, bbox_pred.shape[-1]])
                shift_boxes = box_utils.bbox_transform(
                    rpn_ret['rois'][:, 1:5], box_deltas,
                    cfg.MODEL.BBOX_REG_WEIGHTS)
                shift_boxes = box_utils.clip_tiled_boxes(
                    shift_boxes,
                    im_info.data.cpu().numpy().squeeze()[0:2])

                num_classes = cfg.MODEL.NUM_CLASSES
                onecls_pred_boxes = []
                inds_all = []
                for j in range(1, num_classes):
                    inds = np.where(cls_score[:, j] > cfg.TEST.SCORE_THRESH)[0]
                    boxes_j = shift_boxes[inds, j * 4:(j + 1) * 4]
                    onecls_pred_boxes += boxes_j.tolist()
                    inds_all.extend(inds.tolist())

                inds_all = np.array(inds_all, dtype=np.int)
                aligned_proposals = rpn_ret['rois'][:, 1:5][inds_all]
                aligned_boxes = np.array(onecls_pred_boxes, dtype=np.float32)

                assert inds_all.shape[0] == aligned_boxes.shape[0]
                assert aligned_proposals.size == aligned_boxes.size

                path = "/nfs/project/libo_i/Boosting/Anchor_Info"
                with open(os.path.join(path, "proposals.json"), "w") as fp:
                    json.dump(aligned_proposals.tolist(), fp)

                with open(os.path.join(path, "boxes.json"), "w") as fp:
                    json.dump(aligned_boxes.tolist(), fp)

        return return_dict
    def _forward(self, data, im_info, roidb=None, **rpn_kwargs):
        im_data = data
        if self.training:
            roidb = list(map(lambda x: blob_utils.deserialize(x)[0], roidb))

        device_id = im_data.get_device()

        return_dict = {}  # A dict to collect return variables

        blob_conv = self.Conv_Body(im_data)

        rpn_ret = self.RPN(blob_conv, im_info, roidb)
        # rpn proposals

        # if self.training:
        #     # can be used to infer fg/bg ratio
        #     return_dict['rois_label'] = rpn_ret['labels_int32']

        rois_certification = False
        if cfg.FPN.FPN_ON:
            # Retain only the blobs that will be used for RoI heads. `blob_conv` may include
            # extra blobs that are used for RPN proposals, but not for RoI heads.
            blob_conv = blob_conv[-self.num_roi_levels:]

        if not self.training:
            return_dict['blob_conv'] = blob_conv

        if rois_certification:
            lvl_min = cfg.FPN.ROI_MIN_LEVEL
            lvl_max = cfg.FPN.ROI_MAX_LEVEL
            test_rpn_ret = {'rois': rpn_ret['rois']}
            lvls = fpn_utils.map_rois_to_fpn_levels(test_rpn_ret['rois'],
                                                    lvl_min, lvl_max)
            rois_idx_order = np.empty((0, ))
            test_rois = test_rpn_ret['rois']

            for output_idx, lvl in enumerate(range(lvl_min, lvl_max + 1)):
                idx_lvl = np.where(lvls == lvl)[0]
                rois_lvl = test_rois[idx_lvl, :]
                rois_idx_order = np.concatenate((rois_idx_order, idx_lvl))
                test_rpn_ret['rois_fpn{}'.format(lvl)] = rois_lvl

            rois_idx_restore = np.argsort(rois_idx_order).astype(np.int32,
                                                                 copy=False)
            test_rpn_ret['rois_idx_restore_int32'] = rois_idx_restore

            test_feat = self.Box_Head(blob_conv, test_rpn_ret)
            test_cls_score, test_bbox_pred = self.Box_Outs(test_feat)

            test_cls_score = test_cls_score.data.cpu().numpy().squeeze()
            test_bbox_pred = test_bbox_pred.data.cpu().numpy().squeeze()

        if not cfg.MODEL.RPN_ONLY:
            if cfg.MODEL.SHARE_RES5 and self.training:
                box_feat, res5_feat = self.Box_Head(blob_conv, rpn_ret)
            # bbox proposals
            else:
                box_feat = self.Box_Head(blob_conv, rpn_ret)
            cls_score, bbox_pred = self.Box_Outs(box_feat)
        else:
            # TODO: complete the returns for RPN only situation
            pass

        # 在这里开始计算loss
        if self.training:
            return_dict['losses'] = {}
            return_dict['metrics'] = {}
            # rpn loss
            rpn_kwargs.update(
                dict((k, rpn_ret[k]) for k in rpn_ret.keys()
                     if (k.startswith('rpn_cls_logits')
                         or k.startswith('rpn_bbox_pred'))))
            loss_rpn_cls, loss_rpn_bbox = rpn_heads.generic_rpn_losses(
                **rpn_kwargs)
            if cfg.FPN.FPN_ON:
                for i, lvl in enumerate(
                        range(cfg.FPN.RPN_MIN_LEVEL,
                              cfg.FPN.RPN_MAX_LEVEL + 1)):
                    return_dict['losses']['loss_rpn_cls_fpn%d' %
                                          lvl] = loss_rpn_cls[i]
                    return_dict['losses']['loss_rpn_bbox_fpn%d' %
                                          lvl] = loss_rpn_bbox[i]
            else:
                return_dict['losses']['loss_rpn_cls'] = loss_rpn_cls
                return_dict['losses']['loss_rpn_bbox'] = loss_rpn_bbox

            # bbox loss
            loss_cls, loss_bbox, accuracy_cls = fast_rcnn_heads.fast_rcnn_losses(
                cls_score, bbox_pred, rpn_ret['labels_int32'],
                rpn_ret['bbox_targets'], rpn_ret['bbox_inside_weights'],
                rpn_ret['bbox_outside_weights'])
            return_dict['losses']['loss_cls'] = loss_cls
            return_dict['losses']['loss_bbox'] = loss_bbox
            return_dict['metrics']['accuracy_cls'] = accuracy_cls

            if cfg.MODEL.MASK_ON:
                if getattr(self.Mask_Head, 'SHARE_RES5', False):
                    mask_feat = self.Mask_Head(
                        res5_feat,
                        rpn_ret,
                        roi_has_mask_int32=rpn_ret['roi_has_mask_int32'])
                else:
                    mask_feat = self.Mask_Head(blob_conv, rpn_ret)
                mask_pred = self.Mask_Outs(mask_feat)
                # return_dict['mask_pred'] = mask_pred
                # mask loss
                loss_mask = mask_rcnn_heads.mask_rcnn_losses(
                    mask_pred, rpn_ret['masks_int32'])
                return_dict['losses']['loss_mask'] = loss_mask

            if cfg.MODEL.KEYPOINTS_ON:
                if getattr(self.Keypoint_Head, 'SHARE_RES5', False):
                    # No corresponding keypoint head implemented yet (Neither in Detectron)
                    # Also, rpn need to generate the label 'roi_has_keypoints_int32'
                    kps_feat = self.Keypoint_Head(
                        res5_feat,
                        rpn_ret,
                        roi_has_keypoints_int32=rpn_ret[
                            'roi_has_keypoint_int32'])
                else:
                    kps_feat = self.Keypoint_Head(blob_conv, rpn_ret)
                kps_pred = self.Keypoint_Outs(kps_feat)
                # return_dict['keypoints_pred'] = kps_pred
                # keypoints loss
                if cfg.KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS:
                    loss_keypoints = keypoint_rcnn_heads.keypoint_losses(
                        kps_pred, rpn_ret['keypoint_locations_int32'],
                        rpn_ret['keypoint_weights'])
                else:
                    loss_keypoints = keypoint_rcnn_heads.keypoint_losses(
                        kps_pred, rpn_ret['keypoint_locations_int32'],
                        rpn_ret['keypoint_weights'],
                        rpn_ret['keypoint_loss_normalizer'])
                return_dict['losses']['loss_kps'] = loss_keypoints

            # pytorch0.4 bug on gathering scalar(0-dim) tensors
            for k, v in return_dict['losses'].items():
                return_dict['losses'][k] = v.unsqueeze(0)
            for k, v in return_dict['metrics'].items():
                return_dict['metrics'][k] = v.unsqueeze(0)

        else:
            # Testing
            return_dict['rois'] = rpn_ret['rois']
            import json
            if cfg.TEST.IOU_OUT:
                # 直接通过rpn_ret可以取出rois
                with open(
                        "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/raw_roi.json",
                        'w') as f:
                    json.dump((return_dict['rois'][:, 1:] /
                               im_info.numpy()[0][2]).tolist(), f)

                # 如果在FPN模式下,需要进到一个collect_and_distribute...的函数去取出分配后的scores
                # ,我直接在collect_and_distribute_fpn_rpn_proposals.py里把json输出
                # 因此这里直接考虑RPN_ONLY模式的取值。
                if not cfg.FPN.FPN_ON:
                    with open(
                            "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/rois_score.json",
                            'w') as f:
                        score_2_json = []
                        for item in rpn_ret['rpn_roi_probs']:
                            score_2_json.append(item.item())
                        json.dump(score_2_json, f)

            # 开始第二个fast_head阶段,首先通过rois和bbox_delta计算pred_box
            if cfg.FAST_RCNN.FAST_HEAD2_DEBUG:
                lvl_min = cfg.FPN.ROI_MIN_LEVEL
                lvl_max = cfg.FPN.ROI_MAX_LEVEL
                if cfg.FPN.FPN_ON:
                    im_scale = im_info.data.cpu().numpy().squeeze()[2]
                    rois = rpn_ret['rois'][:, 1:5] / im_scale
                    bbox_pred = bbox_pred.data.cpu().numpy().squeeze()
                    box_deltas = bbox_pred.reshape([-1, bbox_pred.shape[-1]])
                    shift_boxes = box_utils.bbox_transform(
                        rois, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS)
                    shift_boxes = box_utils.clip_tiled_boxes(
                        shift_boxes,
                        im_info.data.cpu().numpy().squeeze()[0:2])
                    num_classes = cfg.MODEL.NUM_CLASSES

                    onecls_pred_boxes = []
                    onecls_score = []
                    dets_cls = {}
                    count = 0
                    for j in range(1, num_classes):
                        inds = np.where(
                            cls_score[:, j] > cfg.TEST.SCORE_THRESH)[0]
                        boxes_j = shift_boxes[inds, j * 4:(j + 1) * 4]
                        score_j = cls_score[inds, j]
                        onecls_pred_boxes += boxes_j.tolist()
                        onecls_score += score_j.tolist()
                        dets_cls.update({j: []})
                        for k in range(len(boxes_j.tolist())):
                            dets_cls[j].append(count)
                            count += 1

                    assert count == len(onecls_pred_boxes)
                    stage2_rois_score = np.array(onecls_score,
                                                 dtype=np.float32)
                    stage2_rois = np.array(onecls_pred_boxes, dtype=np.float32)

                    # Redistribute stage2_rois using fpn_utils module provided functions
                    # calculate by formula
                    cls_tracker = {}
                    if not stage2_rois.tolist():
                        stage1_pred_iou = stage2_rois_score.tolist()
                        stage2_final_boxes = np.empty((0, ))
                        stage2_final_score = np.empty((0, ))

                        logger.info("Detections above threshold is null.")
                    else:
                        alter_rpn = {}
                        unresize_stage2_rois = stage2_rois * im_scale
                        # unresize_stage2_rois = np.concatenate((unresize_stage2_rois, unresized_rois[:, 1:5]))

                        lvls = fpn_utils.map_rois_to_fpn_levels(
                            unresize_stage2_rois, lvl_min, lvl_max)
                        # TAG: We might need to visualize "stage2_rois" to make sure.
                        rois_idx_order = np.empty((0, ))
                        dummy_batch = np.zeros(
                            (unresize_stage2_rois.shape[0], 1),
                            dtype=np.float32)
                        alter_rpn["rois"] = np.hstack(
                            (dummy_batch,
                             unresize_stage2_rois)).astype(np.float32,
                                                           copy=False)
                        # alter_rpn['rois'] = np.concatenate((alter_rpn['rois'], unresized_rois))

                        for output_idx, lvl in enumerate(
                                range(lvl_min, lvl_max + 1)):
                            idx_lvl = np.where(lvls == lvl)[0]
                            rois_lvl = unresize_stage2_rois[idx_lvl, :]
                            rois_idx_order = np.concatenate(
                                (rois_idx_order, idx_lvl))
                            _ = np.zeros((rois_lvl.shape[0], 1),
                                         dtype=np.float32)
                            alter_rpn['rois_fpn{}'.format(lvl)] = np.hstack(
                                (_, rois_lvl)).astype(np.float32, copy=False)

                        rois_idx_restore = np.argsort(rois_idx_order).astype(
                            np.int32, copy=False)
                        alter_rpn['rois_idx_restore_int32'] = rois_idx_restore
                        # Go through 2nd stage of FPN and fast_head
                        stage2_feat = self.Box_Head(blob_conv, alter_rpn)
                        stage2_cls_score, stage2_bbox_pred = self.Box_Outs(
                            stage2_feat)

                        # Transform shift value to original one to get final pred boxes coordinates
                        stage2_bbox_pred = stage2_bbox_pred.data.cpu().numpy(
                        ).squeeze()
                        stage2_cls_score = stage2_cls_score.data.cpu().numpy()

                        stage2_box_deltas = stage2_bbox_pred.reshape(
                            [-1, bbox_pred.shape[-1]])
                        # Add some variance to box delta
                        if cfg.FAST_RCNN.STAGE1_TURBULENCE:
                            import random
                            for i in range(len(stage2_box_deltas)):
                                for j in range(len(stage2_box_deltas[i])):
                                    stage2_box_deltas[i][j] *= random.uniform(
                                        0.9, 1.1)

                        stage2_cls_out = box_utils.bbox_transform(
                            stage2_rois, stage2_box_deltas,
                            cfg.MODEL.BBOX_REG_WEIGHTS)
                        stage2_cls_out = box_utils.clip_tiled_boxes(
                            stage2_cls_out,
                            im_info.data.cpu().numpy().squeeze()[0:2])
                        onecls_pred_boxes = []
                        onecls_score = []
                        count = 0
                        for j in range(1, num_classes):
                            inds = np.where(
                                stage2_cls_score[:,
                                                 j] > cfg.TEST.SCORE_THRESH)[0]
                            boxes_j = stage2_cls_out[inds, j * 4:(j + 1) * 4]
                            score_j = stage2_cls_score[inds, j]
                            dets_j = np.hstack(
                                (boxes_j,
                                 score_j[:, np.newaxis])).astype(np.float32,
                                                                 copy=False)
                            keep = box_utils.nms(dets_j, cfg.TEST.NMS)
                            boxes_j = boxes_j[keep]
                            score_j = score_j[keep]
                            # 用于记录每个框属于第几类
                            onecls_score += score_j.tolist()
                            onecls_pred_boxes += boxes_j.tolist()

                            for k in range(len(score_j)):
                                cls_tracker.update({count: j})
                                count += 1

                        assert count == len(onecls_score)
                        stage2_final_boxes = np.array(onecls_pred_boxes,
                                                      dtype=np.float32)
                        stage2_final_score = np.array(onecls_score,
                                                      dtype=np.float32)
                        inds = np.where(stage2_final_score > 0.3)[0]

                        # Filtered by keep index...
                        preserve_stage2_final_boxes = copy.deepcopy(
                            stage2_final_boxes)
                        preserve_stage2_final_score = copy.deepcopy(
                            stage2_final_score)
                        stage2_final_boxes = stage2_final_boxes[inds]
                        stage2_final_score = stage2_final_score[inds]

                        # if nothing left after 0.3 threshold filter, reserve whole boxes to original.
                        if stage2_final_boxes.size == 0:
                            lower_inds = np.where(
                                preserve_stage2_final_score > 0.1)[0]
                            stage2_final_boxes = preserve_stage2_final_boxes[
                                lower_inds]
                            stage2_final_score = preserve_stage2_final_score[
                                lower_inds]

                        else:
                            del preserve_stage2_final_boxes
                            del preserve_stage2_final_score

                        # if all boxes are clsfied into bg class.
                        if stage2_final_boxes.size == 0:
                            stage1_pred_iou = stage2_rois_score.tolist()
                            stage2_final_boxes = np.empty((0, ))
                            stage2_final_score = np.empty((0, ))
                            logger.info("Detections above threshold is null.")

                        else:
                            # Restore stage2_pred_boxes to match the index with stage2_rois, Compute IOU between
                            # final_boxes and stage2_rois, one by one
                            flag = "cross_product"
                            if flag == "element_wise":
                                if stage2_final_boxes.shape[
                                        0] == stage2_rois.shape[0]:
                                    restored_stage2_final_boxes = stage2_final_boxes[
                                        rois_idx_restore]
                                    stage1_pred_iou = []
                                    for ind, item in enumerate(stage2_rois):
                                        stage1 = np.array(
                                            item, dtype=np.float32).reshape(
                                                (1, 4))
                                        stage2 = np.array(
                                            restored_stage2_final_boxes[ind],
                                            dtype=np.float32).reshape((1, 4))
                                        iou = box_utils.bbox_overlaps(
                                            stage1, stage2)
                                        stage1_pred_iou.append(
                                            iou.squeeze().item())
                                else:
                                    logger.info(
                                        "Mistake while processing {}".format(
                                            str(im_info)))
                            elif flag == "cross_product":
                                iou = box_utils.bbox_overlaps(
                                    stage2_rois, stage2_final_boxes)
                                stage1_pred_iou = iou.max(axis=1).tolist()

                    # stage1_pred is another name of stage2_rois
                    assert len(stage1_pred_iou) == len(stage2_rois)
                    if cfg.FAST_RCNN.IOU_NMS:
                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/iou_stage1_score.json",
                                "w") as f:
                            json.dump(stage2_rois_score.tolist(), f)

                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/iou_stage2_score.json",
                                "w") as f:
                            json.dump(stage2_final_score.tolist(), f)

                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/iou_stage1_pred_boxes.json",
                                'w') as f:
                            json.dump(stage2_rois.tolist(), f)

                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/iou_stage1_pred_iou.json",
                                'w') as f:
                            json.dump(stage1_pred_iou, f)

                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/iou_stage2_pred_boxes.json",
                                'w') as f:
                            json.dump(stage2_final_boxes.tolist(), f)

                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/iou_dets_cls.json",
                                'w') as f:
                            json.dump(dets_cls, f)

                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/iou_cls_tracker.json",
                                'w') as f:
                            json.dump(cls_tracker, f)

                    elif cfg.FAST_RCNN.SCORE_NMS:
                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/score_stage1_score.json",
                                "w") as f:
                            json.dump(stage2_rois_score.tolist(), f)

                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/score_stage2_score.json",
                                "w") as f:
                            json.dump(stage2_final_score.tolist(), f)

                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/score_stage1_pred_boxes.json",
                                'w') as f:
                            json.dump(stage2_rois.tolist(), f)

                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/score_stage1_pred_iou.json",
                                'w') as f:
                            json.dump(stage1_pred_iou, f)

                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/score_stage2_pred_boxes.json",
                                'w') as f:
                            json.dump(stage2_final_boxes.tolist(), f)

                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/score_dets_cls.json",
                                'w') as f:
                            json.dump(dets_cls, f)

                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/score_cls_tracker.json",
                                'w') as f:
                            json.dump(cls_tracker, f)

                else:
                    im_scale = im_info.data.cpu().numpy().squeeze()[2]
                    rois = rpn_ret['rois'][:, 1:5] / im_scale
                    # unscale back to raw image space
                    box_deltas = bbox_pred.data.cpu().numpy().squeeze()
                    fast_stage1_score = cls_score.data.cpu().numpy().squeeze()

                    box_deltas = box_deltas.reshape([-1, bbox_pred.shape[-1]])
                    stage2_rois = box_utils.bbox_transform(
                        rois, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS)
                    stage2_rois = box_utils.clip_tiled_boxes(
                        stage2_rois,
                        im_info.data.cpu().numpy().squeeze()[0:2])

                    num_classes = cfg.MODEL.NUM_CLASSES

                    onecls_pred_boxes = []
                    onecls_cls_score = []
                    for j in range(1, num_classes):
                        inds = np.where(
                            cls_score[:, j] > cfg.TEST.SCORE_THRESH)[0]
                        boxes_j = stage2_rois[inds, j * 4:(j + 1) * 4]
                        score_j = fast_stage1_score[inds, j]
                        onecls_pred_boxes += boxes_j.tolist()
                        onecls_cls_score += score_j.tolist()

                    stage2_rois = np.array(onecls_pred_boxes, dtype=np.float32)
                    stage2_rois_score = np.array(onecls_cls_score,
                                                 dtype=np.float32)

                    assert len(stage2_rois) == len(stage2_rois_score)

                    # Send stage2 rois to next stage fast head, do ROI ALIGN again
                    # to modify rpn_ret['rois] , rpn_ret['rpn_rois'] and rpn['rois_rpn_score']

                    rpn_ret['rois'] = stage2_rois
                    rpn_ret['rpn_rois'] = stage2_rois
                    rpn_ret['rpn_roi_probs'] = stage2_rois_score
                    stage2_box_feat = self.Box_Head(blob_conv, rpn_ret)
                    stage2_cls_score, stage2_bbox_pred = self.Box_Outs(
                        stage2_box_feat)

                    stage2_bbox_pred = stage2_bbox_pred.data.cpu().numpy(
                    ).squeeze()
                    stage2_bbox_pred = stage2_bbox_pred.reshape(
                        [-1, bbox_pred.shape[-1]])

                    stage2_cls_pred_boxes = box_utils.bbox_transform(
                        stage2_rois, stage2_bbox_pred,
                        cfg.MODEL.BBOX_REG_WEIGHTS)
                    stage2_cls_pred_boxes = box_utils.clip_tiled_boxes(
                        stage2_cls_pred_boxes,
                        im_info.data.cpu().numpy().squeeze()[0:2])

                    onecls_pred_boxes = []
                    onecls_cls_score = []
                    for j in range(1, num_classes):
                        inds = np.where(
                            stage2_cls_score[:, j] > cfg.TEST.SCORE_THRESH)[0]
                        if len(inds) != 0:
                            print("KKKKK")
                        boxes_j = stage2_cls_pred_boxes[inds,
                                                        j * 4:(j + 1) * 4]
                        score_j = stage2_cls_score[inds, j]
                        onecls_pred_boxes += boxes_j.tolist()
                        onecls_cls_score += score_j.tolist()

                    stage2_bbox_pred = np.array(onecls_pred_boxes,
                                                dtype=np.float32)
                    stage2_bbox_pred_score = np.array(onecls_cls_score,
                                                      dtype=np.float32)

        # get stage2 pred_boxes here

        return_dict['cls_score'] = cls_score
        return_dict['bbox_pred'] = bbox_pred
        return return_dict
Exemple #16
0
    def proposals_for_one_image(self, im_info, all_anchors, bbox_deltas,
                                scores):
        # Get mode-dependent configuration
        cfg_key = 'TRAIN' if self._train else 'TEST'
        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE
        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #   - bbox deltas will be (4 * A, H, W) format from conv output
        #   - transpose to (H, W, 4 * A)
        #   - reshape to (H * W * A, 4) where rows are ordered by (H, W, A)
        #     in slowest to fastest order to match the enumerated anchors
        bbox_deltas = bbox_deltas.transpose((1, 2, 0)).reshape((-1, 4))

        # Same story for the scores:
        #   - scores are (A, H, W) format from conv output
        #   - transpose to (H, W, A)
        #   - reshape to (H * W * A, 1) where rows are ordered by (H, W, A)
        #     to match the order of anchors and bbox_deltas
        scores = scores.transpose((1, 2, 0)).reshape((-1, 1))

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        if pre_nms_topN <= 0 or pre_nms_topN >= len(scores):
            order = np.argsort(-scores.squeeze())
        else:
            # Avoid sorting possibly large arrays; First partition to get top K
            # unsorted and then sort just those (~20x faster for 200k scores)
            inds = np.argpartition(-scores.squeeze(),
                                   pre_nms_topN)[:pre_nms_topN]
            order = np.argsort(-scores[inds].squeeze())
            order = inds[order]
        bbox_deltas = bbox_deltas[order, :]
        all_anchors = all_anchors[order, :]
        scores = scores[order]

        # Transform anchors into proposals via bbox transformations
        proposals = box_utils.bbox_transform(all_anchors, bbox_deltas,
                                             (1.0, 1.0, 1.0, 1.0))

        # 2. clip proposals to image (may result in proposals with zero area
        # that will be removed in the next step)
        proposals = box_utils.clip_tiled_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < min_size
        keep = _filter_boxes(proposals, min_size, im_info)
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 6. apply loose nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        if nms_thresh > 0:
            keep = box_utils.nms(np.hstack((proposals, scores)), nms_thresh)
            if post_nms_topN > 0:
                keep = keep[:post_nms_topN]
            proposals = proposals[keep, :]
            scores = scores[keep]
        return proposals, scores
Exemple #17
0
def im_detect_bbox(model, im, boxes=None):
    """Bounding box object detection for an image with given box proposals.

    Arguments:
        model (DetectionModelHelper): the detection model to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals in 0-indexed
            [x1, y1, x2, y2] format, or None if using RPN

    Returns:
        scores (ndarray): R x K array of object class scores for K classes
            (K includes background as object category 0)
        boxes (ndarray): R x 4*K array of predicted bounding boxes
        im_scales (list): list of image scales used in the input blob (as
            returned by _get_blobs and for use with im_detect_mask, etc.)
    """
    inputs, im_scales = _get_blobs(im, boxes)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        raise NotImplementedError('Can not handle tubes, need to extend dedup')
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(
            hashes, return_index=True, return_inverse=True)
        inputs['rois'] = inputs['rois'][index, :]
        boxes = boxes[index, :]

    # Add multi-level rois for FPN
    if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN:
        _add_multilevel_rois_for_test(inputs, 'rois')

    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v)
    workspace.RunNet(model.net.Proto().name)

    # dump workspace blobs (debugging)
    # if 0:
    #    from utils.io import robust_pickle_dump
    #    import os, sys
    #    saved_blobs = {}
    #    ws_blobs = workspace.Blobs()
    #    for dst_name in ws_blobs:
    #        ws_blob = workspace.FetchBlob(dst_name)
    #        saved_blobs[dst_name] = ws_blob
    #    det_file = os.path.join('/tmp/output/data_dump_inflT1.pkl')
    #    robust_pickle_dump(saved_blobs, det_file)
    #    logger.info("DUMPED BLOBS")
    #    sys.exit(0)

    # Read out blobs
    if cfg.MODEL.FASTER_RCNN:
        assert len(im_scales) == 1, \
            'Only single-image / single-scale batch implemented'
        rois = workspace.FetchBlob(core.ScopedName('rois'))
        # unscale back to raw image space
        boxes = rois[:, 1:] / im_scales[0]

    if cfg.TEST.SVM:
        # use the raw scores before softmax under the assumption they were
        # trained as linear SVMs
        scores = workspace.FetchBlob(core.ScopedName('cls_score')).squeeze()
    else:
        # use softmax estimated probabilities
        scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze()
    # In case there is 1 proposal
    scores = scores.reshape([-1, scores.shape[-1]])
    time_dim = boxes.shape[-1] // 4

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = workspace.FetchBlob(core.ScopedName('bbox_pred')).squeeze()
        # In case there is 1 proposal
        box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]])
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            # Remove predictions for bg class (compat with MSRA code)
            box_deltas = box_deltas[:, -4 * time_dim:]
        pred_boxes = box_utils.bbox_transform(
            boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS)
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im[0].shape)
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            pred_boxes = np.tile(pred_boxes, (1, scores.shape[1]))
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    return scores, pred_boxes, im_scales
Exemple #18
0
def im_detect_bbox(model, im, target_scale, target_max_size, boxes=None):
    """Prepare the bbox for testing"""

    inputs, im_scale = _get_blobs(im, boxes, target_scale, target_max_size)

    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(
            hashes, return_index=True, return_inverse=True
        )
        inputs['rois'] = inputs['rois'][index, :]
        boxes = boxes[index, :]

    # Add multi-level rois for FPN
    if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN:
        _add_multilevel_rois_for_test(inputs, 'rois')

    if cfg.PYTORCH_VERSION_LESS_THAN_040:
        inputs['data'] = [Variable(torch.from_numpy(inputs['data']), volatile=True)]
        inputs['im_info'] = [Variable(torch.from_numpy(inputs['im_info']), volatile=True)]
    else:
        inputs['data'] = [torch.from_numpy(inputs['data'])]
        inputs['im_info'] = [torch.from_numpy(inputs['im_info'])]

    return_dict = model(**inputs)

    if cfg.MODEL.FASTER_RCNN:
        rois = return_dict['rois'].data.cpu().numpy()
        # unscale back to raw image space
        boxes = rois[:, 1:5] / im_scale

    # cls prob (activations after softmax)
    scores = return_dict['cls_score'].data.cpu().numpy().squeeze()
    # In case there is 1 proposal
    scores = scores.reshape([-1, scores.shape[-1]])

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = return_dict['bbox_pred'].data.cpu().numpy().squeeze()
        # In case there is 1 proposal
        box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]])
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            # Remove predictions for bg class (compat with MSRA code)
            box_deltas = box_deltas[:, -4:]
        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            # (legacy) Optionally normalize targets by a precomputed mean and stdev
            box_deltas = box_deltas.view(-1, 4) * cfg.TRAIN.BBOX_NORMALIZE_STDS \
                         + cfg.TRAIN.BBOX_NORMALIZE_MEANS
        pred_boxes = box_utils.bbox_transform(boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS)
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            pred_boxes = np.tile(pred_boxes, (1, scores.shape[1]))
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    return scores, pred_boxes, im_scale, return_dict['blob_conv']
Exemple #19
0
def im_detect_bbox(model,
                   im,
                   target_scale,
                   target_max_size,
                   roidb=None,
                   boxes=None):
    """Prepare the bbox for testing"""

    inputs, im_scale = _get_blobs(im, boxes, target_scale, target_max_size)

    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes,
                                        return_index=True,
                                        return_inverse=True)
        inputs['rois'] = [inputs['rois'][index, :]]
        boxes = boxes[index, :]

    # Add multi-level rois for FPN
    if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN:
        _add_multilevel_rois_for_test(inputs, 'rois')

    if cfg.PYTORCH_VERSION_LESS_THAN_040:
        inputs['data'] = [
            Variable(torch.from_numpy(inputs['data']), volatile=True)
        ]
        inputs['im_info'] = [
            Variable(torch.from_numpy(inputs['im_info']), volatile=True)
        ]
    else:
        inputs['data'] = [torch.from_numpy(inputs['data'])]
        inputs['im_info'] = [torch.from_numpy(inputs['im_info'])]

    # The model will fail when no proposal is given, so we add a pseudo roi and remove it after the forward
    zero_rois = 'rois' in inputs and inputs['rois'][0].shape[
        0] == 0 and not cfg.TEST.TAGGING
    if zero_rois:
        inputs['rois'] = [np.zeros((1, 5), dtype=np.float32)]

    if roidb is not None and cfg.TEST.TAGGING:  # If TAGGING mode, we feed ground truth roidb and get recall score
        inputs['roidb'] = [[roidb]]

    return_dict = model(**inputs)

    if cfg.MODEL.FASTER_RCNN or cfg.TEST.TAGGING:
        rois = return_dict['rois'].data.cpu().numpy()
        # unscale back to raw image space
        boxes = rois[:, 1:5] / im_scale

    # cls prob (activations after softmax)
    scores = return_dict['cls_score'].data.cpu().numpy().squeeze()
    # In case there is 1 proposal
    scores = scores.reshape([-1, scores.shape[-1]])

    if zero_rois or return_dict['rois'].sum() == 0:
        scores = scores[:-1, :]

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = return_dict['bbox_pred'].data.cpu().numpy().squeeze()
        # In case there is 1 proposal
        box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]])
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            # Remove predictions for bg class (compat with MSRA code)
            box_deltas = box_deltas[:, -4:]
        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            # (legacy) Optionally normalize targets by a precomputed mean and stdev
            box_deltas = box_deltas.view(-1, 4) * cfg.TRAIN.BBOX_NORMALIZE_STDS \
                         + cfg.TRAIN.BBOX_NORMALIZE_MEANS
        pred_boxes = box_utils.bbox_transform(boxes, box_deltas,
                                              cfg.MODEL.BBOX_REG_WEIGHTS)
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            pred_boxes = np.tile(pred_boxes, (1, scores.shape[1]))
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN and not cfg.TEST.TAGGING:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    return scores, pred_boxes, im_scale, return_dict['blob_conv']
Exemple #20
0
def im_detections(model, im, anchors):
    """Generate RetinaNet detections on a single image."""
    k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
    A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS)
    inputs = {}
    inputs['data'], inputs['im_info'] = _get_image_blob(im)
    cls_probs, box_preds = [], []
    for lvl in range(k_min, k_max + 1):
        suffix = 'fpn{}'.format(lvl)
        cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix)))
        box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix)))
    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False))

    workspace.RunNet(model.net.Proto().name)
    scale = inputs['im_info'][0, 2]
    cls_probs = workspace.FetchBlobs(cls_probs)
    box_preds = workspace.FetchBlobs(box_preds)

    # here the boxes_all are [x0, y0, x1, y1, score]
    boxes_all = defaultdict(list)

    cnt = 0
    for lvl in range(k_min, k_max + 1):
        # create cell anchors array
        stride = 2. ** lvl
        cell_anchors = anchors[lvl]

        # fetch per level probability
        cls_prob = cls_probs[cnt]
        box_pred = box_preds[cnt]
        cls_prob = cls_prob.reshape((
            cls_prob.shape[0], A, int(cls_prob.shape[1] / A),
            cls_prob.shape[2], cls_prob.shape[3]))
        box_pred = box_pred.reshape((
            box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3]))
        cnt += 1

        if cfg.RETINANET.SOFTMAX:
            cls_prob = cls_prob[:, :, 1::, :, :]

        cls_prob_ravel = cls_prob.ravel()
        # In some cases [especially for very small img sizes], it's possible that
        # candidate_ind is empty if we impose threshold 0.05 at all levels. This
        # will lead to errors since no detections are found for this image. Hence,
        # for lvl 7 which has small spatial resolution, we take the threshold 0.0
        th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0
        candidate_inds = np.where(cls_prob_ravel > th)[0]
        if (len(candidate_inds) == 0):
            continue

        pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds))
        inds = np.argpartition(
            cls_prob_ravel[candidate_inds], -pre_nms_topn)[-pre_nms_topn:]
        inds = candidate_inds[inds]

        inds_5d = np.array(np.unravel_index(inds, cls_prob.shape)).transpose()
        classes = inds_5d[:, 2]
        anchor_ids, y, x = inds_5d[:, 1], inds_5d[:, 3], inds_5d[:, 4]
        scores = cls_prob[:, anchor_ids, classes, y, x]

        boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32)
        boxes *= stride
        boxes += cell_anchors[anchor_ids, :]

        if not cfg.RETINANET.CLASS_SPECIFIC_BBOX:
            box_deltas = box_pred[0, anchor_ids, :, y, x]
        else:
            box_cls_inds = classes * 4
            box_deltas = np.vstack(
                [box_pred[0, ind:ind + 4, yi, xi]
                 for ind, yi, xi in zip(box_cls_inds, y, x)]
            )
        pred_boxes = (
            box_utils.bbox_transform(boxes, box_deltas)
            if cfg.TEST.BBOX_REG else boxes)
        pred_boxes /= scale
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
        box_scores = np.zeros((pred_boxes.shape[0], 5))
        box_scores[:, 0:4] = pred_boxes
        box_scores[:, 4] = scores

        for cls in range(1, cfg.MODEL.NUM_CLASSES):
            inds = np.where(classes == cls - 1)[0]
            if len(inds) > 0:
                boxes_all[cls].extend(box_scores[inds, :])

    # Combine predictions across all levels and retain the top scoring by class
    detections = []
    for cls, boxes in boxes_all.items():
        cls_dets = np.vstack(boxes).astype(dtype=np.float32)
        # do class specific nms here
        keep = box_utils.nms(cls_dets, cfg.TEST.NMS)
        cls_dets = cls_dets[keep, :]
        out = np.zeros((len(keep), 6))
        out[:, 0:5] = cls_dets
        out[:, 5].fill(cls)
        detections.append(out)

    detections = np.vstack(detections)
    # sort all again
    inds = np.argsort(-detections[:, 4])
    detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :]
    boxes = detections[:, 0:4]
    scores = detections[:, 4]
    classes = detections[:, 5]
    return boxes, scores, classes
Exemple #21
0
def im_detect_bbox(model, im, boxes=None):
    """Bounding box object detection for an image with given box proposals.

    Arguments:
        model (DetectionModelHelper): the detection model to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals in 0-indexed
            [x1, y1, x2, y2] format, or None if using RPN

    Returns:
        scores (ndarray): R x K array of object class scores for K classes
            (K includes background as object category 0)
        boxes (ndarray): R x 4*K array of predicted bounding boxes
        im_scales (list): list of image scales used in the input blob (as
            returned by _get_blobs and for use with im_detect_mask, etc.)
    """
    inputs, im_scales = _get_blobs(im, boxes)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes,
                                        return_index=True,
                                        return_inverse=True)
        inputs['rois'] = inputs['rois'][index, :]
        boxes = boxes[index, :]

    # Add multi-level rois for FPN
    if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN:
        _add_multilevel_rois_for_test(inputs, 'rois')

    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v)
    workspace.RunNet(model.net.Proto().name)

    # Read out blobs
    if cfg.MODEL.FASTER_RCNN:
        assert len(im_scales) == 1, \
            'Only single-image / single-scale batch implemented'
        rois = workspace.FetchBlob(core.ScopedName('rois'))
        # unscale back to raw image space
        boxes = rois[:, 1:5] / im_scales[0]

    # Softmax class probabilities
    scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze()
    # In case there is 1 proposal
    scores = scores.reshape([-1, scores.shape[-1]])

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        #for b in workspace.Blobs():
        #print(b)
        box_deltas = workspace.FetchBlob(
            core.ScopedName('bbox_pred_voc')).squeeze()
        # In case there is 1 proposal
        box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]])
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            # Remove predictions for bg class (compat with MSRA code)
            box_deltas = box_deltas[:, -4:]
        pred_boxes = box_utils.bbox_transform(boxes, box_deltas,
                                              cfg.MODEL.BBOX_REG_WEIGHTS)
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            pred_boxes = np.tile(pred_boxes, (1, scores.shape[1]))
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    return scores, pred_boxes, im_scales
Exemple #22
0
def im_detect_bbox(model, im, timers=None):
    """Generate RetinaNet detections on a single image."""
    if timers is None:
        timers = defaultdict(Timer)
    # Although anchors are input independent and could be precomputed,
    # recomputing them per image only brings a small overhead
    anchors = _create_cell_anchors()
    timers['im_detect_bbox'].tic()
    k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
    A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS)
    inputs = {}
    inputs['data'], inputs['im_info'] = _get_image_blob(im)
    cls_probs, box_preds = [], []
    for lvl in range(k_min, k_max + 1):
        suffix = 'fpn{}'.format(lvl)
        cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix)))
        box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix)))
    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32,
                                                        copy=False))

    workspace.RunNet(model.net.Proto().name)
    scale = inputs['im_info'][0, 2]
    cls_probs = workspace.FetchBlobs(cls_probs)
    box_preds = workspace.FetchBlobs(box_preds)

    # here the boxes_all are [x0, y0, x1, y1, score]
    boxes_all = defaultdict(list)

    cnt = 0
    for lvl in range(k_min, k_max + 1):
        # create cell anchors array
        stride = 2.**lvl
        cell_anchors = anchors[lvl]

        # fetch per level probability
        cls_prob = cls_probs[cnt]
        box_pred = box_preds[cnt]
        cls_prob = cls_prob.reshape(
            (cls_prob.shape[0], A, int(cls_prob.shape[1] / A),
             cls_prob.shape[2], cls_prob.shape[3]))
        box_pred = box_pred.reshape(
            (box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3]))
        cnt += 1

        if cfg.RETINANET.SOFTMAX:
            cls_prob = cls_prob[:, :, 1::, :, :]

        cls_prob_ravel = cls_prob.ravel()
        # In some cases [especially for very small img sizes], it's possible that
        # candidate_ind is empty if we impose threshold 0.05 at all levels. This
        # will lead to errors since no detections are found for this image. Hence,
        # for lvl 7 which has small spatial resolution, we take the threshold 0.0
        th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0
        candidate_inds = np.where(cls_prob_ravel > th)[0]
        if (len(candidate_inds) == 0):
            continue

        pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds))
        inds = np.argpartition(cls_prob_ravel[candidate_inds],
                               -pre_nms_topn)[-pre_nms_topn:]
        inds = candidate_inds[inds]

        inds_5d = np.array(np.unravel_index(inds, cls_prob.shape)).transpose()
        classes = inds_5d[:, 2]
        anchor_ids, y, x = inds_5d[:, 1], inds_5d[:, 3], inds_5d[:, 4]
        scores = cls_prob[:, anchor_ids, classes, y, x]

        boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32)
        boxes *= stride
        boxes += cell_anchors[anchor_ids, :]

        if not cfg.RETINANET.CLASS_SPECIFIC_BBOX:
            box_deltas = box_pred[0, anchor_ids, :, y, x]
        else:
            box_cls_inds = classes * 4
            box_deltas = np.vstack([
                box_pred[0, ind:ind + 4, yi, xi]
                for ind, yi, xi in zip(box_cls_inds, y, x)
            ])
        pred_boxes = (box_utils.bbox_transform(boxes, box_deltas)
                      if cfg.TEST.BBOX_REG else boxes)
        pred_boxes /= scale
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
        box_scores = np.zeros((pred_boxes.shape[0], 5))
        box_scores[:, 0:4] = pred_boxes
        box_scores[:, 4] = scores

        for cls in range(1, cfg.MODEL.NUM_CLASSES):
            inds = np.where(classes == cls - 1)[0]
            if len(inds) > 0:
                boxes_all[cls].extend(box_scores[inds, :])
    timers['im_detect_bbox'].toc()

    # Combine predictions across all levels and retain the top scoring by class
    timers['misc_bbox'].tic()
    detections = []
    for cls, boxes in boxes_all.items():
        cls_dets = np.vstack(boxes).astype(dtype=np.float32)
        # do class specific nms here
        keep = box_utils.nms(cls_dets, cfg.TEST.NMS)
        cls_dets = cls_dets[keep, :]
        out = np.zeros((len(keep), 6))
        out[:, 0:5] = cls_dets
        out[:, 5].fill(cls)
        detections.append(out)

    # detections (N, 6) format:
    #   detections[:, :4] - boxes
    #   detections[:, 4] - scores
    #   detections[:, 5] - classes
    detections = np.vstack(detections)
    # sort all again
    inds = np.argsort(-detections[:, 4])
    detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :]

    # Convert the detections to image cls_ format (see core/test_engine.py)
    num_classes = cfg.MODEL.NUM_CLASSES
    cls_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)]
    for c in range(1, num_classes):
        inds = np.where(detections[:, 5] == c)[0]
        cls_boxes[c] = detections[inds, :5]
    timers['misc_bbox'].toc()

    return cls_boxes
    def proposals_for_one_image(
            self, im_info, all_anchors, bbox_deltas, scores, frames_per_vid):
        # Get mode-dependent configuration
        cfg_key = 'TRAIN' if self._train else 'TEST'
        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE
        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #   - bbox deltas will be (4 * A * frames_per_vid, H, W) format from
        #     conv output
        #   - transpose to (H, W, 4 * A * frames_per_vid)
        #   - reshape to (H * W * A, 4 * frames_per_vid) where rows are ordered
        #     by (H, W, A) in slowest to fastest order to match the enumerated
        #     anchors
        bbox_deltas = bbox_deltas.transpose((1, 2, 0)).reshape((
            -1, 4 * frames_per_vid))

        # Same story for the scores:
        #   - scores are (A, H, W) format from conv output
        #     This computes the score for the tube
        #   - transpose to (H, W, A)
        #   - reshape to (H * W * A, 1) where rows are ordered by (H, W, A)
        #     to match the order of anchors and bbox_deltas
        scores = scores.transpose((1, 2, 0)).reshape((-1, 1))

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        if pre_nms_topN <= 0 or pre_nms_topN > len(scores):
            order = np.argsort(-scores.squeeze())
        else:
            # Avoid sorting possibly large arrays; First partition to get top K
            # unsorted and then sort just those (~20x faster for 200k scores)
            inds = np.argpartition(
                -scores.squeeze(), pre_nms_topN
            )[:pre_nms_topN]
            order = np.argsort(-scores[inds].squeeze())
            order = inds[order]
        bbox_deltas = bbox_deltas[order, :]
        all_anchors = all_anchors[order, :]
        scores = scores[order]

        # 1. Transform anchors into proposals via bbox transformations
        proposals = box_utils.bbox_transform(
            all_anchors, bbox_deltas, (1.0, 1.0, 1.0, 1.0))

        # 2. clip proposals to image (may result in proposals with zero area
        # that will be removed in the next step)
        proposals = box_utils.clip_tiled_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < min_size
        # again, needs to be done one each frame and need to "AND" over frames
        keep = np.arange(proposals.shape[0])
        for frame_id in range(frames_per_vid):
            keep = np.intersect1d(
                keep, _filter_boxes(
                    proposals[:, frame_id * 4: (frame_id + 1) * 4],
                    min_size, im_info))
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 6. apply loose nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        if nms_thresh > 0:
            # When we're training on multiple GPUs, running NMS on the GPU
            # causes serious perf issues. We need to debug, but for now force
            # running on the CPU when training
            keep = nms(np.hstack((proposals, scores)), nms_thresh)
            if post_nms_topN > 0:
                keep = keep[:post_nms_topN]
            proposals = proposals[keep, :]
            scores = scores[keep]
        return proposals, scores