Ejemplo n.º 1
0
 def test_bbox_dataset_to_prediction_roundtrip(self):
     """Simulate the process of reading a ground-truth box from a dataset,
     make predictions from proposals, convert the predictions back to the
     dataset format, and then use the COCO API to compute IoU overlap between
     the gt box and the predictions. These should have IoU of 1.
     """
     weights = (5, 5, 10, 10)
     # 1/ "read" a box from a dataset in the default (x1, y1, w, h) format
     gt_xywh_box = [10, 20, 100, 150]
     # 2/ convert it to our internal (x1, y1, x2, y2) format
     gt_xyxy_box = box_utils.xywh_to_xyxy(gt_xywh_box)
     # 3/ consider nearby proposal boxes
     prop_xyxy_boxes = random_boxes(gt_xyxy_box, 10, 10)
     # 4/ compute proposal-to-gt transformation deltas
     deltas = box_utils.bbox_transform_inv(
         prop_xyxy_boxes, np.array([gt_xyxy_box]), weights=weights
     )
     # 5/ use deltas to transform proposals to xyxy predicted box
     pred_xyxy_boxes = box_utils.bbox_transform(
         prop_xyxy_boxes, deltas, weights=weights
     )
     # 6/ convert xyxy predicted box to xywh predicted box
     pred_xywh_boxes = box_utils.xyxy_to_xywh(pred_xyxy_boxes)
     # 7/ use COCO API to compute IoU
     not_crowd = [int(False)] * pred_xywh_boxes.shape[0]
     ious = COCOmask.iou(pred_xywh_boxes, np.array([gt_xywh_box]), not_crowd)
     np.testing.assert_array_almost_equal(ious, np.ones(ious.shape))
Ejemplo n.º 2
0
 def test_bbox_transform_and_inverse(self):
     weights = (5, 5, 10, 10)
     src_boxes = random_boxes([10, 10, 20, 20], 1, 10)
     dst_boxes = random_boxes([10, 10, 20, 20], 1, 10)
     deltas = box_utils.bbox_transform_inv(
         src_boxes, dst_boxes, weights=weights
     )
     dst_boxes_reconstructed = box_utils.bbox_transform(
         src_boxes, deltas, weights=weights
     )
     np.testing.assert_array_almost_equal(
         dst_boxes, dst_boxes_reconstructed, decimal=5
     )
Ejemplo n.º 3
0
def postprocess_output(rois,scaling_factor,im_size,class_scores,bbox_deltas,bbox_reg_weights = (10.0,10.0,5.0,5.0)):
    boxes = to_np(rois.div(scaling_factor).squeeze(0))
    bbox_deltas = to_np(bbox_deltas)    
    orig_im_size = to_np(im_size).squeeze()    
    # apply deltas
    pred_boxes = box_utils.bbox_transform(boxes, bbox_deltas, bbox_reg_weights)
    # clip on boundaries
    pred_boxes = box_utils.clip_tiled_boxes(pred_boxes,orig_im_size)    
    scores = to_np(class_scores)
    # Map scores and predictions back to the original set of boxes
    # This re-duplicates the previously removed boxes
    # Is there any use for this?
#    inv_index = to_np(batch['proposal_inv_index']).squeeze().astype(np.int64)
#    scores = scores[inv_index, :]
#    pred_boxes = pred_boxes[inv_index, :]
    # threshold on score and run nms to remove duplicates
    scores_final, boxes_final, boxes_per_class = box_results_with_nms_and_limit(scores, pred_boxes)
    
    return (scores_final, boxes_final, boxes_per_class)
Ejemplo n.º 4
0
def im_detect_bbox(model, im, target_scale, target_max_size, boxes=None):
    """Prepare the bbox for testing"""

    inputs, im_scale = _get_blobs(im, boxes, target_scale, target_max_size)

    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes,
                                        return_index=True,
                                        return_inverse=True)
        inputs['rois'] = inputs['rois'][index, :]
        boxes = boxes[index, :]

    # Add multi-level rois for FPN
    if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN:
        _add_multilevel_rois_for_test(inputs, 'rois')

    if cfg.PYTORCH_VERSION_LESS_THAN_040:
        inputs['data'] = [
            Variable(torch.from_numpy(inputs['data']), volatile=True)
        ]
        inputs['im_info'] = [
            Variable(torch.from_numpy(inputs['im_info']), volatile=True)
        ]
    else:
        inputs['data'] = [torch.from_numpy(inputs['data'])]
        inputs['im_info'] = [torch.from_numpy(inputs['im_info'])]

    return_dict = model(**inputs)

    if cfg.MODEL.FASTER_RCNN:
        rois = return_dict['rois'].data.cpu().numpy()
        # unscale back to raw image space
        boxes = rois[:, 1:5] / im_scale

    # cls prob (activations after softmax)
    scores = return_dict['cls_score'].data.cpu().numpy().squeeze()
    # In case there is 1 proposal
    scores = scores.reshape([-1, scores.shape[-1]])

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = return_dict['bbox_pred'].data.cpu().numpy().squeeze()
        # In case there is 1 proposal
        box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]])
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            # Remove predictions for bg class (compat with MSRA code)
            box_deltas = box_deltas[:, -4:]
        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            # (legacy) Optionally normalize targets by a precomputed mean and stdev
            box_deltas = box_deltas.view(-1, 4) * cfg.TRAIN.BBOX_NORMALIZE_STDS \
                         + cfg.TRAIN.BBOX_NORMALIZE_MEANS
        pred_boxes = box_utils.bbox_transform(boxes, box_deltas,
                                              cfg.MODEL.BBOX_REG_WEIGHTS)
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            pred_boxes = np.tile(pred_boxes, (1, scores.shape[1]))
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    return scores, pred_boxes, im_scale, return_dict['blob_conv']
Ejemplo n.º 5
0
def im_detections(model, im, anchors):
    """Generate RetinaNet detections on a single image."""
    k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
    A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS)
    inputs = {}
    inputs['data'], inputs['im_info'] = _get_image_blob(im)
    cls_probs, box_preds = [], []
    for lvl in range(k_min, k_max + 1):
        suffix = 'fpn{}'.format(lvl)
        cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix)))
        box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix)))
    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32,
                                                        copy=False))

    workspace.RunNet(model.net.Proto().name)
    scale = inputs['im_info'][0, 2]
    cls_probs = workspace.FetchBlobs(cls_probs)
    box_preds = workspace.FetchBlobs(box_preds)

    # here the boxes_all are [x0, y0, x1, y1, score]
    boxes_all = defaultdict(list)

    cnt = 0
    for lvl in range(k_min, k_max + 1):
        # create cell anchors array
        stride = 2.**lvl
        cell_anchors = anchors[lvl]

        # fetch per level probability
        cls_prob = cls_probs[cnt]
        box_pred = box_preds[cnt]
        cls_prob = cls_prob.reshape(
            (cls_prob.shape[0], A, int(cls_prob.shape[1] / A),
             cls_prob.shape[2], cls_prob.shape[3]))
        box_pred = box_pred.reshape(
            (box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3]))
        cnt += 1

        if cfg.RETINANET.SOFTMAX:
            cls_prob = cls_prob[:, :, 1::, :, :]

        cls_prob_ravel = cls_prob.ravel()
        # In some cases [especially for very small img sizes], it's possible that
        # candidate_ind is empty if we impose threshold 0.05 at all levels. This
        # will lead to errors since no detections are found for this image. Hence,
        # for lvl 7 which has small spatial resolution, we take the threshold 0.0
        th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0
        candidate_inds = np.where(cls_prob_ravel > th)[0]
        if (len(candidate_inds) == 0):
            continue

        pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds))
        inds = np.argpartition(cls_prob_ravel[candidate_inds],
                               -pre_nms_topn)[-pre_nms_topn:]
        inds = candidate_inds[inds]

        inds_5d = np.array(np.unravel_index(inds, cls_prob.shape)).transpose()
        classes = inds_5d[:, 2]
        anchor_ids, y, x = inds_5d[:, 1], inds_5d[:, 3], inds_5d[:, 4]
        scores = cls_prob[:, anchor_ids, classes, y, x]

        boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32)
        boxes *= stride
        boxes += cell_anchors[anchor_ids, :]

        if not cfg.RETINANET.CLASS_SPECIFIC_BBOX:
            box_deltas = box_pred[0, anchor_ids, :, y, x]
        else:
            box_cls_inds = classes * 4
            box_deltas = np.vstack([
                box_pred[0, ind:ind + 4, yi, xi]
                for ind, yi, xi in zip(box_cls_inds, y, x)
            ])
        pred_boxes = (box_utils.bbox_transform(boxes, box_deltas)
                      if cfg.TEST.BBOX_REG else boxes)
        pred_boxes /= scale
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
        box_scores = np.zeros((pred_boxes.shape[0], 5))
        box_scores[:, 0:4] = pred_boxes
        box_scores[:, 4] = scores

        for cls in range(1, cfg.MODEL.NUM_CLASSES):
            inds = np.where(classes == cls - 1)[0]
            if len(inds) > 0:
                boxes_all[cls].extend(box_scores[inds, :])

    # Combine predictions across all levels and retain the top scoring by class
    detections = []
    for cls, boxes in boxes_all.items():
        cls_dets = np.vstack(boxes).astype(dtype=np.float32)
        # do class specific nms here
        keep = box_utils.nms(cls_dets, cfg.TEST.NMS)
        cls_dets = cls_dets[keep, :]
        out = np.zeros((len(keep), 6))
        out[:, 0:5] = cls_dets
        out[:, 5].fill(cls)
        detections.append(out)

    # detections (N, 6) format:
    #   detections[:, :4] - boxes
    #   detections[:, 4] - scores
    #   detections[:, 5] - classes
    detections = np.vstack(detections)
    # sort all again
    inds = np.argsort(-detections[:, 4])
    detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :]

    # Convert the detections to image cls_ format (see core/test_engine.py)
    num_classes = cfg.MODEL.NUM_CLASSES
    cls_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)]
    for c in range(1, num_classes):
        inds = np.where(detections[:, 5] == c)[0]
        cls_boxes[c] = detections[inds, :5]

    return cls_boxes
    def proposals_for_one_image(self, im_info, all_anchors, bbox_deltas, scores):
        # Get mode-dependent configuration
        cfg_key = 'TRAIN' if self.training else 'TEST'
        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE
        min_area = cfg[cfg_key].RPN_MIN_AREA
        # print('generate_proposals:', pre_nms_topN, post_nms_topN, nms_thresh, min_size)

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #   - bbox deltas will be (4 * A, H, W) format from conv output
        #   - transpose to (H, W, 4 * A)
        #   - reshape to (H * W * A, 4) where rows are ordered by (H, W, A)
        #     in slowest to fastest order to match the enumerated anchors
        bbox_deltas = bbox_deltas.transpose((1, 2, 0)).reshape((-1, 4))

        # Same story for the scores:
        #   - scores are (A, H, W) format from conv output
        #   - transpose to (H, W, A)
        #   - reshape to (H * W * A, 1) where rows are ordered by (H, W, A)
        #     to match the order of anchors and bbox_deltas
        scores = scores.transpose((1, 2, 0)).reshape((-1, 1))
        # print('pre_nms:', bbox_deltas.shape, scores.shape)

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        if pre_nms_topN <= 0 or pre_nms_topN >= len(scores):
            order = np.argsort(-scores.squeeze())
        else:
            # Avoid sorting possibly large arrays; First partition to get top K
            # unsorted and then sort just those (~20x faster for 200k scores)
            inds = np.argpartition(-scores.squeeze(),
                                   pre_nms_topN)[:pre_nms_topN]
            order = np.argsort(-scores[inds].squeeze())
            order = inds[order]
        bbox_deltas = bbox_deltas[order, :]
        all_anchors = all_anchors[order, :]
        scores = scores[order]

        # Transform anchors into proposals via bbox transformations
        proposals = box_utils.bbox_transform(all_anchors, bbox_deltas,
                                             (1.0, 1.0, 1.0, 1.0))

        # 2. clip proposals to image (may result in proposals with zero area
        # that will be removed in the next step)
        proposals = box_utils.clip_tiled_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < min_size
        keep = _filter_boxes(proposals, min_size, im_info)
        proposals = proposals[keep, :]
        scores = scores[keep]
        # print('pre_nms:', proposals.shape, scores.shape)

        if min_area > 0:
            keep = _filter_areas(proposals, min_area, im_info)
            proposals = proposals[keep, :]
            scores = scores[keep]
        # 6. apply loose nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        if nms_thresh > 0:

            # NMS_timer = Timer()
            # NMS_timer.tic()
            keep = box_utils.nms(np.hstack((proposals, scores)), nms_thresh)
            # NMS_timer.toc()
            # logger.info('Only NMS time: {:.3f}s'.format(NMS_timer.average_time))

            # print('nms keep:', keep.shape)
            if post_nms_topN > 0:
                keep = keep[:post_nms_topN]
            proposals = proposals[keep, :]
            scores = scores[keep]
        # print('final proposals:', proposals.shape, scores.shape)
        return proposals, scores
Ejemplo n.º 7
0
def im_detect_bbox_batch(model, ims, boxes=None):
    """Bounding box object detection for an image with given box proposals.

    Arguments:
        model (DetectionModelHelper): the detection model to use
        ims (ndarray): cfg.TEST.IMS_PER_BATCH color images to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals in 0-indexed
            [x1, y1, x2, y2] format, or None if using RPN

    Returns:
        scores list((ndarray)): [R x K] array of object class scores for K classes
            (K includes background as object category 0)
        boxes list((ndarray)): [R x 4*K] array of predicted bounding boxes
        im_scales (list): list of image scales used in the input blob (as
            returned by _get_blobs and for use with im_detect_mask, etc.)
    """
    inputs, im_scales = _get_blobs(ims, boxes)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes,
                                        return_index=True,
                                        return_inverse=True)
        inputs['rois'] = inputs['rois'][index, :]
        boxes = boxes[index, :]

    # Add multi-level rois for FPN
    if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN:
        _add_multilevel_rois_for_test(inputs, 'rois')

    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v)
    workspace.RunNet(model.net.Proto().name)

    # Read out blobs
    if cfg.MODEL.FASTER_RCNN:
        rois = workspace.FetchBlob(core.ScopedName('rois'))

    # Softmax class probabilities
    scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze()

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = workspace.FetchBlob(
            core.ScopedName('bbox_pred')).squeeze()

    # In case there is 1 proposal
    scores_batch = []
    pred_boxes_batch = []
    for i in range(cfg.TEST.IMS_PER_BATCH):
        # select batch
        select_inds = np.where(rois[:, 0] == i)

        # unscale back to raw image space
        boxes = rois[select_inds, 1:5] / im_scales[i]
        boxes = boxes.reshape([-1, boxes.shape[-1]])
        scores_i = scores[select_inds, :]
        scores_i = scores_i.reshape([-1, scores_i.shape[-1]])
        scores_batch.append(scores_i)

        if cfg.TEST.BBOX_REG:
            # In case there is 1 proposal
            box_deltas_i = box_deltas[select_inds, :]
            box_deltas_i = box_deltas_i.reshape([-1, box_deltas_i.shape[-1]])
            if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
                # Remove predictions for bg class (compat with MSRA code)
                box_deltas_i = box_deltas_i[:, -4:]
            pred_boxes = box_utils.bbox_transform(boxes, box_deltas_i,
                                                  cfg.MODEL.BBOX_REG_WEIGHTS)
            pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, ims[i].shape)
            if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
                pred_boxes = (np.tile(pred_boxes, (1, scores_i.shape[1])))
            pred_boxes_batch.append(pred_boxes)
        else:
            logger.error('Not implemented.')
            return None, None, None

        if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
            logger.error('Not implemented.')
            return None, None, None

    return scores_batch, pred_boxes_batch, im_scales
Ejemplo n.º 8
0
    def _forward(self, data, im_info, roidb=None, **rpn_kwargs):
        im_data = data
        if self.training:
            roidb = list(map(lambda x: blob_utils.deserialize(x)[0], roidb))

        device_id = im_data.get_device()

        return_dict = {}  # A dict to collect return variables

        blob_conv = self.Conv_Body(im_data)

        rpn_ret = self.RPN(blob_conv, im_info, roidb)

        # if self.training:
        #     # can be used to infer fg/bg ratio
        #     return_dict['rois_label'] = rpn_ret['labels_int32']

        if cfg.FPN.FPN_ON:
            # Retain only the blobs that will be used for RoI heads. `blob_conv` may include
            # extra blobs that are used for RPN proposals, but not for RoI heads.
            blob_conv = blob_conv[-self.num_roi_levels:]

        if not self.training:
            return_dict['blob_conv'] = blob_conv

        if not cfg.MODEL.RPN_ONLY:
            if cfg.MODEL.SHARE_RES5 and self.training:
                box_feat, res5_feat = self.Box_Head(blob_conv, rpn_ret)
            else:
                box_feat = self.Box_Head(blob_conv, rpn_ret)
            cls_score, bbox_pred = self.Box_Outs(box_feat)
        else:
            # TODO: complete the returns for RPN only situation
            pass

        if self.training:
            return_dict['losses'] = {}
            return_dict['metrics'] = {}
            # rpn loss
            rpn_kwargs.update(
                dict((k, rpn_ret[k]) for k in rpn_ret.keys()
                     if (k.startswith('rpn_cls_logits')
                         or k.startswith('rpn_bbox_pred'))))
            loss_rpn_cls, loss_rpn_bbox = rpn_heads.generic_rpn_losses(
                **rpn_kwargs)

            if cfg.RPN.VIS_QUANT_TARGET:
                import numpy as np
                import json
                import os
                import time

                gt_boxes = []
                gt_label = roidb[0]['gt_classes']
                for inds, item in enumerate(gt_label):
                    if item != 0:
                        gt_boxes.append(roidb[0]['boxes'][inds])

                gt_boxes = np.array(gt_boxes, dtype=np.float32)
                gt_boxes *= im_info.detach().numpy()[:, 2]

                path = "/nfs/project/libo_i/Boosting/Targets_Info"
                if not os.path.exists(path):
                    os.makedirs(path)
                b, c, h, w = rpn_kwargs['rpn_cls_logits_fpn3'].shape
                sample_targets = rpn_kwargs[
                    'rpn_bbox_targets_wide_fpn3'][:, :, :h, :w]

                line_targets = sample_targets.detach().data.cpu().numpy()
                with open(os.path.join(path, "quant_anchors.json"), "r") as fp:
                    quant_anchors = np.array(json.load(fp), dtype=np.float32)
                    quant_anchors = quant_anchors[:h, :w]

                line_targets = line_targets[:, 4:8, :, :].transpose(
                    (0, 2, 3, 1)).reshape(quant_anchors.shape)
                line_targets = line_targets.reshape(-1, 4)

                width = im_data.shape[3]
                height = im_data.shape[2]
                # 在这里加上targets的偏移

                line_quant_anchors = quant_anchors.reshape(-1, 4)
                pred_boxes = box_utils.onedim_bbox_transform(
                    line_quant_anchors, line_targets)
                pred_boxes = box_utils.clip_tiled_boxes(
                    pred_boxes, (height, width, 3))

                im = im_data.detach().cpu().numpy().reshape(3, height,
                                                            width).transpose(
                                                                (1, 2, 0))

                means = np.squeeze(cfg.PIXEL_MEANS)
                for i in range(3):
                    im[:, :, i] += means[i]

                im = im.astype(int)
                dpi = 200
                fig = plt.figure(frameon=False)
                fig.set_size_inches(im.shape[1] / dpi, im.shape[0] / dpi)
                ax = plt.Axes(fig, [0., 0., 1., 1.])
                ax.axis('off')
                fig.add_axes(ax)
                ax.imshow(im[:, :, ::-1])
                # 在im上添加gt
                for item in gt_boxes:
                    ax.add_patch(
                        plt.Rectangle((item[0], item[1]),
                                      item[2] - item[0],
                                      item[3] - item[1],
                                      fill=False,
                                      edgecolor='white',
                                      linewidth=1,
                                      alpha=1))

                cnt = 0
                for inds, before_item in enumerate(line_quant_anchors):
                    after_item = pred_boxes[inds]
                    targets_i = line_targets[inds]
                    if np.sum(targets_i) == 0:
                        continue
                    ax.add_patch(
                        plt.Rectangle((before_item[0], before_item[1]),
                                      before_item[2] - before_item[0],
                                      before_item[3] - before_item[1],
                                      fill=False,
                                      edgecolor='r',
                                      linewidth=1,
                                      alpha=1))

                    ax.add_patch(
                        plt.Rectangle((after_item[0], after_item[1]),
                                      after_item[2] - after_item[0],
                                      after_item[3] - after_item[1],
                                      fill=False,
                                      edgecolor='g',
                                      linewidth=1,
                                      alpha=1))

                    logger.info("valid boxes: {}".format(cnt))
                    cnt += 1

                if cnt != 0:
                    ticks = time.time()
                    fig.savefig(
                        "/nfs/project/libo_i/Boosting/Targets_Info/{}.png".
                        format(ticks),
                        dpi=dpi)

                plt.close('all')

            if cfg.FPN.FPN_ON:
                for i, lvl in enumerate(
                        range(cfg.FPN.RPN_MIN_LEVEL,
                              cfg.FPN.RPN_MAX_LEVEL + 1)):
                    return_dict['losses']['loss_rpn_cls_fpn%d' %
                                          lvl] = loss_rpn_cls[i]
                    return_dict['losses']['loss_rpn_bbox_fpn%d' %
                                          lvl] = loss_rpn_bbox[i]
            else:
                return_dict['losses']['loss_rpn_cls'] = loss_rpn_cls
                return_dict['losses']['loss_rpn_bbox'] = loss_rpn_bbox

            # bbox loss
            loss_cls, loss_bbox, accuracy_cls = fast_rcnn_heads.fast_rcnn_losses(
                cls_score, bbox_pred, rpn_ret['labels_int32'],
                rpn_ret['bbox_targets'], rpn_ret['bbox_inside_weights'],
                rpn_ret['bbox_outside_weights'])
            if cfg.RPN.ZEROLOSS:
                zero_loss_bbox = torch.Tensor([0.]).squeeze().cuda()
                zero_loss_bbox.requires_grad = True
                zero_loss_cls = torch.Tensor([0.]).squeeze().cuda()
                zero_loss_cls.requires_grad = True
                return_dict['losses']['loss_bbox'] = zero_loss_bbox
                return_dict['losses']['loss_cls'] = zero_loss_cls

            else:
                return_dict['losses']['loss_bbox'] = loss_bbox
                return_dict['losses']['loss_cls'] = loss_cls

            return_dict['metrics']['accuracy_cls'] = accuracy_cls

            if cfg.MODEL.MASK_ON:
                if getattr(self.Mask_Head, 'SHARE_RES5', False):
                    mask_feat = self.Mask_Head(
                        res5_feat,
                        rpn_ret,
                        roi_has_mask_int32=rpn_ret['roi_has_mask_int32'])
                else:
                    mask_feat = self.Mask_Head(blob_conv, rpn_ret)
                mask_pred = self.Mask_Outs(mask_feat)
                # return_dict['mask_pred'] = mask_pred
                # mask loss
                loss_mask = mask_rcnn_heads.mask_rcnn_losses(
                    mask_pred, rpn_ret['masks_int32'])
                return_dict['losses']['loss_mask'] = loss_mask

            if cfg.MODEL.KEYPOINTS_ON:
                if getattr(self.Keypoint_Head, 'SHARE_RES5', False):
                    # No corresponding keypoint head implemented yet (Neither in Detectron)
                    # Also, rpn need to generate the label 'roi_has_keypoints_int32'
                    kps_feat = self.Keypoint_Head(
                        res5_feat,
                        rpn_ret,
                        roi_has_keypoints_int32=rpn_ret[
                            'roi_has_keypoint_int32'])
                else:
                    kps_feat = self.Keypoint_Head(blob_conv, rpn_ret)
                kps_pred = self.Keypoint_Outs(kps_feat)
                # return_dict['keypoints_pred'] = kps_pred
                # keypoints loss
                if cfg.KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS:
                    loss_keypoints = keypoint_rcnn_heads.keypoint_losses(
                        kps_pred, rpn_ret['keypoint_locations_int32'],
                        rpn_ret['keypoint_weights'])
                else:
                    loss_keypoints = keypoint_rcnn_heads.keypoint_losses(
                        kps_pred, rpn_ret['keypoint_locations_int32'],
                        rpn_ret['keypoint_weights'],
                        rpn_ret['keypoint_loss_normalizer'])
                return_dict['losses']['loss_kps'] = loss_keypoints

            # pytorch0.4 bug on gathering scalar(0-dim) tensors
            for k, v in return_dict['losses'].items():
                return_dict['losses'][k] = v.unsqueeze(0)
            for k, v in return_dict['metrics'].items():
                return_dict['metrics'][k] = v.unsqueeze(0)

        else:
            # Testing
            return_dict['rois'] = rpn_ret['rois']
            return_dict['cls_score'] = cls_score
            return_dict['bbox_pred'] = bbox_pred
            if cfg.TEST.PROPOSALS_OUT:
                import os
                import json
                import numpy as np

                # 直接在这里做变换,输出经过变换之后的1000个框
                bbox_pred = bbox_pred.data.cpu().numpy().squeeze()
                box_deltas = bbox_pred.reshape([-1, bbox_pred.shape[-1]])
                shift_boxes = box_utils.bbox_transform(
                    rpn_ret['rois'][:, 1:5], box_deltas,
                    cfg.MODEL.BBOX_REG_WEIGHTS)
                shift_boxes = box_utils.clip_tiled_boxes(
                    shift_boxes,
                    im_info.data.cpu().numpy().squeeze()[0:2])

                num_classes = cfg.MODEL.NUM_CLASSES
                onecls_pred_boxes = []
                inds_all = []
                for j in range(1, num_classes):
                    inds = np.where(cls_score[:, j] > cfg.TEST.SCORE_THRESH)[0]
                    boxes_j = shift_boxes[inds, j * 4:(j + 1) * 4]
                    onecls_pred_boxes += boxes_j.tolist()
                    inds_all.extend(inds.tolist())

                inds_all = np.array(inds_all, dtype=np.int)
                aligned_proposals = rpn_ret['rois'][:, 1:5][inds_all]
                aligned_boxes = np.array(onecls_pred_boxes, dtype=np.float32)

                assert inds_all.shape[0] == aligned_boxes.shape[0]
                assert aligned_proposals.size == aligned_boxes.size

                path = "/nfs/project/libo_i/Boosting/Anchor_Info"
                with open(os.path.join(path, "proposals.json"), "w") as fp:
                    json.dump(aligned_proposals.tolist(), fp)

                with open(os.path.join(path, "boxes.json"), "w") as fp:
                    json.dump(aligned_boxes.tolist(), fp)

        return return_dict
Ejemplo n.º 9
0
def im_detect_bbox(model, im, target_scale, target_max_size, boxes=None):
    """Bounding box object detection for an image with given box proposals.

    Arguments:
        model (DetectionModelHelper): the detection model to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals in 0-indexed
            [x1, y1, x2, y2] format, or None if using RPN

    Returns:
        scores (ndarray): R x K array of object class scores for K classes
            (K includes background as object category 0)
        boxes (ndarray): R x 4*K array of predicted bounding boxes
        im_scales (list): list of image scales used in the input blob (as
            returned by _get_blobs and for use with im_detect_mask, etc.)
    """
    inputs, im_scale = _get_blobs(im, boxes, target_scale, target_max_size)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(
            hashes, return_index=True, return_inverse=True
        )
        inputs['rois'] = inputs['rois'][index, :]
        boxes = boxes[index, :]

    # Add multi-level rois for FPN
    if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN:
        _add_multilevel_rois_for_test(inputs, 'rois')

    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v)
    workspace.RunNet(model.net.Proto().name)

    # Read out blobs
    if cfg.MODEL.FASTER_RCNN:
        rois = workspace.FetchBlob(core.ScopedName('rois'))
        # unscale back to raw image space
        boxes = rois[:, 1:5] / im_scale

    # Softmax class probabilities
    scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze()
    # In case there is 1 proposal
    scores = scores.reshape([-1, scores.shape[-1]])

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = workspace.FetchBlob(core.ScopedName('bbox_pred')).squeeze()
        # In case there is 1 proposal
        box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]])
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            # Remove predictions for bg class (compat with MSRA code)
            box_deltas = box_deltas[:, -4:]
        pred_boxes = box_utils.bbox_transform(
            boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS
        )
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            pred_boxes = np.tile(pred_boxes, (1, scores.shape[1]))
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    return scores, pred_boxes, im_scale
Ejemplo n.º 10
0
    def proposals_for_one_image(
            self, im_info, all_anchors, bbox_deltas, scores, frames_per_vid):
        # Get mode-dependent configuration
        cfg_key = 'TRAIN' if self._train else 'TEST'
        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE
        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #   - bbox deltas will be (4 * A * frames_per_vid, H, W) format from
        #     conv output
        #   - transpose to (H, W, 4 * A * frames_per_vid)
        #   - reshape to (H * W * A, 4 * frames_per_vid) where rows are ordered
        #     by (H, W, A) in slowest to fastest order to match the enumerated
        #     anchors
        bbox_deltas = bbox_deltas.transpose((1, 2, 0)).reshape((
            -1, 4 * frames_per_vid))

        # Same story for the scores:
        #   - scores are (A, H, W) format from conv output
        #     This computes the score for the tube
        #   - transpose to (H, W, A)
        #   - reshape to (H * W * A, 1) where rows are ordered by (H, W, A)
        #     to match the order of anchors and bbox_deltas
        scores = scores.transpose((1, 2, 0)).reshape((-1, 1))

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        if pre_nms_topN <= 0 or pre_nms_topN > len(scores):
            order = np.argsort(-scores.squeeze())
        else:
            # Avoid sorting possibly large arrays; First partition to get top K
            # unsorted and then sort just those (~20x faster for 200k scores)
            inds = np.argpartition(
                -scores.squeeze(), pre_nms_topN
            )[:pre_nms_topN]
            order = np.argsort(-scores[inds].squeeze())
            order = inds[order]
        bbox_deltas = bbox_deltas[order, :]
        all_anchors = all_anchors[order, :]
        scores = scores[order]

        # 1. Transform anchors into proposals via bbox transformations
        proposals = box_utils.bbox_transform(
            all_anchors, bbox_deltas, (1.0, 1.0, 1.0, 1.0))

        # 2. clip proposals to image (may result in proposals with zero area
        # that will be removed in the next step)
        proposals = box_utils.clip_tiled_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < min_size
        # again, needs to be done one each frame and need to "AND" over frames
        keep = np.arange(proposals.shape[0])
        for frame_id in range(frames_per_vid):
            keep = np.intersect1d(
                keep, _filter_boxes(
                    proposals[:, frame_id * 4: (frame_id + 1) * 4],
                    min_size, im_info))
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 6. apply loose nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        if nms_thresh > 0:
            # When we're training on multiple GPUs, running NMS on the GPU
            # causes serious perf issues. We need to debug, but for now force
            # running on the CPU when training
            keep = nms(np.hstack((proposals, scores)), nms_thresh)
            if post_nms_topN > 0:
                keep = keep[:post_nms_topN]
            proposals = proposals[keep, :]
            scores = scores[keep]
        return proposals, scores
Ejemplo n.º 11
0
def im_detect_bbox(model, im, boxes=None):
    """Bounding box object detection for an image with given box proposals.

    Arguments:
        model (DetectionModelHelper): the detection model to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals in 0-indexed
            [x1, y1, x2, y2] format, or None if using RPN

    Returns:
        scores (ndarray): R x K array of object class scores for K classes
            (K includes background as object category 0)
        boxes (ndarray): R x 4*K array of predicted bounding boxes
        im_scales (list): list of image scales used in the input blob (as
            returned by _get_blobs and for use with im_detect_mask, etc.)
    """
    inputs, im_scales = _get_blobs(im, boxes)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        raise NotImplementedError('Can not handle tubes, need to extend dedup')
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(
            hashes, return_index=True, return_inverse=True)
        inputs['rois'] = inputs['rois'][index, :]
        boxes = boxes[index, :]

    # Add multi-level rois for FPN
    if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN:
        _add_multilevel_rois_for_test(inputs, 'rois')

    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v)
    workspace.RunNet(model.net.Proto().name)

    # dump workspace blobs (debugging)
    # if 0:
    #    from utils.io import robust_pickle_dump
    #    import os, sys
    #    saved_blobs = {}
    #    ws_blobs = workspace.Blobs()
    #    for dst_name in ws_blobs:
    #        ws_blob = workspace.FetchBlob(dst_name)
    #        saved_blobs[dst_name] = ws_blob
    #    det_file = os.path.join('/tmp/output/data_dump_inflT1.pkl')
    #    robust_pickle_dump(saved_blobs, det_file)
    #    logger.info("DUMPED BLOBS")
    #    sys.exit(0)

    # Read out blobs
    if cfg.MODEL.FASTER_RCNN:
        assert len(im_scales) == 1, \
            'Only single-image / single-scale batch implemented'
        rois = workspace.FetchBlob(core.ScopedName('rois'))
        # unscale back to raw image space
        boxes = rois[:, 1:] / im_scales[0]

    if cfg.TEST.SVM:
        # use the raw scores before softmax under the assumption they were
        # trained as linear SVMs
        scores = workspace.FetchBlob(core.ScopedName('cls_score')).squeeze()
    else:
        # use softmax estimated probabilities
        scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze()
    # In case there is 1 proposal
    scores = scores.reshape([-1, scores.shape[-1]])
    time_dim = boxes.shape[-1] // 4

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = workspace.FetchBlob(core.ScopedName('bbox_pred')).squeeze()
        # In case there is 1 proposal
        box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]])
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            # Remove predictions for bg class (compat with MSRA code)
            box_deltas = box_deltas[:, -4 * time_dim:]
        pred_boxes = box_utils.bbox_transform(
            boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS)
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im[0].shape)
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            pred_boxes = np.tile(pred_boxes, (1, scores.shape[1]))
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    return scores, pred_boxes, im_scales
Ejemplo n.º 12
0
    def proposals_for_one_image(self, im_info, all_anchors, bbox_deltas,
                                scores, frames_per_vid):
        # Get mode-dependent configuration
        cfg_key = 'TRAIN' if self._train else 'TEST'
        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE
        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #   - bbox deltas will be (4 * A * frames_per_vid, H, W) format from
        #     conv output
        #   - transpose to (H, W, 4 * A * frames_per_vid)
        #   - reshape to (H * W * A, 4 * frames_per_vid) where rows are ordered
        #     by (H, W, A) in slowest to fastest order to match the enumerated
        #     anchors
        bbox_deltas = bbox_deltas.transpose((1, 2, 0)).reshape(
            (-1, 4 * frames_per_vid))

        # Same story for the scores:
        #   - scores are (A, H, W) format from conv output
        #     This computes the score for the tube
        #   - transpose to (H, W, A)
        #   - reshape to (H * W * A, 1) where rows are ordered by (H, W, A)
        #     to match the order of anchors and bbox_deltas
        scores = scores.transpose((1, 2, 0)).reshape((-1, 1))

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        if pre_nms_topN <= 0 or pre_nms_topN > len(scores):
            order = np.argsort(-scores.squeeze())
        else:
            # Avoid sorting possibly large arrays; First partition to get top K
            # unsorted and then sort just those (~20x faster for 200k scores)
            inds = np.argpartition(-scores.squeeze(),
                                   pre_nms_topN)[:pre_nms_topN]
            order = np.argsort(-scores[inds].squeeze())
            order = inds[order]
        bbox_deltas = bbox_deltas[order, :]
        all_anchors = all_anchors[order, :]
        scores = scores[order]

        # 1. Transform anchors into proposals via bbox transformations
        proposals = box_utils.bbox_transform(all_anchors, bbox_deltas,
                                             (1.0, 1.0, 1.0, 1.0))

        # 2. clip proposals to image (may result in proposals with zero area
        # that will be removed in the next step)
        proposals = box_utils.clip_tiled_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < min_size
        # again, needs to be done one each frame and need to "AND" over frames
        keep = np.arange(proposals.shape[0])
        for frame_id in range(frames_per_vid):
            keep = np.intersect1d(
                keep,
                _filter_boxes(proposals[:, frame_id * 4:(frame_id + 1) * 4],
                              min_size, im_info))
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 6. apply loose nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        if nms_thresh > 0:
            # When we're training on multiple GPUs, running NMS on the GPU
            # causes serious perf issues. We need to debug, but for now force
            # running on the CPU when training
            keep = nms(np.hstack((proposals, scores)), nms_thresh)
            if post_nms_topN > 0:
                keep = keep[:post_nms_topN]
            proposals = proposals[keep, :]
            scores = scores[keep]
        return proposals, scores
Ejemplo n.º 13
0
def im_detect_bbox(model, im, boxes=None):
    """Bounding box object detection for an image with given box proposals.

    Arguments:
        model (DetectionModelHelper): the detection model to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals in 0-indexed
            [x1, y1, x2, y2] format, or None if using RPN

    Returns:
        scores (ndarray): R x K array of object class scores for K classes
            (K includes background as object category 0)
        boxes (ndarray): R x 4*K array of predicted bounding boxes
        im_scales (list): list of image scales used in the input blob (as
            returned by _get_blobs and for use with im_detect_mask, etc.)
    """
    inputs, im_scales = _get_blobs(im, boxes)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes,
                                        return_index=True,
                                        return_inverse=True)
        inputs['rois'] = inputs['rois'][index, :]
        boxes = boxes[index, :]

    # Add multi-level rois for FPN
    if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN:
        _add_multilevel_rois_for_test(inputs, 'rois')

    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v)
    workspace.RunNet(model.net.Proto().name)

    # Read out blobs
    if cfg.MODEL.FASTER_RCNN:
        assert len(im_scales) == 1, \
            'Only single-image / single-scale batch implemented'
        rois = workspace.FetchBlob(core.ScopedName('rois'))
        # unscale back to raw image space
        boxes = rois[:, 1:5] / im_scales[0]

    # Softmax class probabilities
    scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze()
    # In case there is 1 proposal
    scores = scores.reshape([-1, scores.shape[-1]])

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        #for b in workspace.Blobs():
        #print(b)
        box_deltas = workspace.FetchBlob(
            core.ScopedName('bbox_pred_voc')).squeeze()
        # In case there is 1 proposal
        box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]])
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            # Remove predictions for bg class (compat with MSRA code)
            box_deltas = box_deltas[:, -4:]
        pred_boxes = box_utils.bbox_transform(boxes, box_deltas,
                                              cfg.MODEL.BBOX_REG_WEIGHTS)
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            pred_boxes = np.tile(pred_boxes, (1, scores.shape[1]))
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    return scores, pred_boxes, im_scales
Ejemplo n.º 14
0
def im_detect_bbox(model, im, target_scale, target_max_size, boxes=None):
    """Prepare the bbox for testing"""

    inputs, im_scale = _get_blobs(im, boxes, target_scale, target_max_size)

    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(
            hashes, return_index=True, return_inverse=True
        )
        inputs['rois'] = inputs['rois'][index, :]
        boxes = boxes[index, :]

    # Add multi-level rois for FPN
    if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN:
        _add_multilevel_rois_for_test(inputs, 'rois')

    if cfg.PYTORCH_VERSION_LESS_THAN_040:
        inputs['data'] = [Variable(torch.from_numpy(inputs['data']), volatile=True)]
        inputs['im_info'] = [Variable(torch.from_numpy(inputs['im_info']), volatile=True)]
    else:
        inputs['data'] = [torch.from_numpy(inputs['data'])]
        inputs['im_info'] = [torch.from_numpy(inputs['im_info'])]

    return_dict = model(**inputs)

    if cfg.MODEL.FASTER_RCNN:
        rois = return_dict['rois'].data.cpu().numpy()
        # unscale back to raw image space
        boxes = rois[:, 1:5] / im_scale

    # cls prob (activations after softmax)
    scores = return_dict['cls_score'].data.cpu().numpy().squeeze()
    # In case there is 1 proposal
    scores = scores.reshape([-1, scores.shape[-1]])

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = return_dict['bbox_pred'].data.cpu().numpy().squeeze()
        # In case there is 1 proposal
        box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]])
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            # Remove predictions for bg class (compat with MSRA code)
            box_deltas = box_deltas[:, -4:]
        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            # (legacy) Optionally normalize targets by a precomputed mean and stdev
            box_deltas = box_deltas.view(-1, 4) * cfg.TRAIN.BBOX_NORMALIZE_STDS \
                         + cfg.TRAIN.BBOX_NORMALIZE_MEANS
        pred_boxes = box_utils.bbox_transform(boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS)
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            pred_boxes = np.tile(pred_boxes, (1, scores.shape[1]))
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    return scores, pred_boxes, im_scale, return_dict['blob_conv']
Ejemplo n.º 15
0
    def forward(self, scores, gt_boxes, img_info):
        """
        Args:
            scores (Tensor[1, num_anchors * num_classes, H, W]): Classification scores.
            gt_boxes (Tensor[N, 6]): Ground-truth boxes in (x1, y1, x2, y2, class, person_id) format
            img_info (Tensor[3]): (height, width, scale)

        Returns:
            labels (Tensor): Ground-truth labels of the anchors.
            deltas (Tensor): Ground-truth regression deltas of the anchors.
            inside_weights, outside_weights (Tensor): Used to calculate smooth_l1_loss
        """
        # Algorithm:
        #
        # For each (H, W) location i
        #     Generate A anchors centered on cell i
        # Filter out-of-image anchors
        # Measure the overlaps between anchors and gt_boxes
        # Assign labels, deltas, inside_weights, outside_weights for each anchor

        assert scores.size(0) == 1, "Single batch only."
        height, width = scores.shape[-2:]

        # Enumerate all shifts (NOTE: torch.meshgrid is different from np.meshgrid)
        shift_x = torch.arange(0, width) * self.feat_stride
        shift_y = torch.arange(0, height) * self.feat_stride
        shift_y, shift_x = torch.meshgrid(shift_y, shift_x)
        shift_x, shift_y = shift_x.contiguous(), shift_y.contiguous()
        shifts = torch.stack((shift_x.view(-1), shift_y.view(-1),
                              shift_x.view(-1), shift_y.view(-1)),
                             dim=1)
        shifts = shifts.type_as(gt_boxes)

        # Enumerate all shifted anchors:
        # Add A anchors (1, A, 4) to K shifts (K, 1, 4) to get shifted anchors (K, A, 4)
        # Reshape to (K * A, 4) shifted anchors
        A = self.num_anchors
        K = shifts.size(0)
        self.anchors = self.anchors.type_as(gt_boxes)
        anchors = self.anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(
            1, 0, 2)
        anchors = anchors.view(K * A, 4)

        # Only keep anchors inside the image
        keep = torch.nonzero((anchors[:, 0] >= 0)
                             & (anchors[:, 1] >= 0)
                             & (anchors[:, 2] < img_info[1])
                             & (anchors[:, 3] < img_info[0]))[:, 0]
        anchors = anchors[keep]

        overlaps = bbox_overlaps(anchors, gt_boxes[:, :4])

        if "DEBUG" in os.environ:
            import numpy as np

            argmax_overlaps = np.argmax(overlaps.cpu(), axis=1)
            max_overlaps = torch.from_numpy(
                np.max(overlaps.cpu().numpy(), axis=1))
            gt_max_overlaps = torch.from_numpy(
                np.max(overlaps.cpu().numpy(), axis=0)).type_as(overlaps)
            gt_argmax_overlaps = np.nonzero(overlaps == gt_max_overlaps)[:, 0]
        else:
            max_overlaps, argmax_overlaps = overlaps.max(dim=1)
            gt_max_overlaps = overlaps.max(dim=0)[0]
            gt_argmax_overlaps = torch.nonzero(overlaps == gt_max_overlaps)[:,
                                                                            0]

        # label: 1 is positive, 0 is negative, -1 is dont care
        # The anchors which satisfied both positive and negative conditions will be as positive
        labels = gt_boxes.new(len(keep)).fill_(-1)
        # bg labels: below threshold IOU
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
        # fg label: for each gt, anchor with highest overlap
        labels[gt_argmax_overlaps] = 1
        # fg label: above threshold IOU
        labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

        # Subsample positive labels if we have too many
        num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
        fg_inds = torch.nonzero(labels == 1)[:, 0]
        if len(fg_inds) > num_fg:
            if "DEBUG" in os.environ:
                disable_inds = fg_inds[:len(fg_inds) - num_fg]
            else:
                disable_inds = torch_rand_choice(fg_inds,
                                                 len(fg_inds) - num_fg)
            labels[disable_inds] = -1

        # Subsample negative labels if we have too many
        num_bg = cfg.TRAIN.RPN_BATCHSIZE - torch.sum(labels == 1)
        bg_inds = torch.nonzero(labels == 0)[:, 0]
        if len(bg_inds) > num_bg:
            if "DEBUG" in os.environ:
                disable_inds = bg_inds[:len(bg_inds) - num_bg]
            else:
                disable_inds = torch_rand_choice(bg_inds,
                                                 len(bg_inds) - num_bg)
            labels[disable_inds] = -1

        deltas = bbox_transform(anchors, gt_boxes[argmax_overlaps, :4])

        inside_weights = gt_boxes.new(deltas.shape).zero_()
        inside_weights[labels == 1] = gt_boxes.new(
            cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)

        outside_weights = gt_boxes.new(deltas.shape).zero_()
        num_examples = torch.sum(labels >= 0)
        outside_weights[labels == 1] = gt_boxes.new(1,
                                                    4).fill_(1) / num_examples
        outside_weights[labels == 0] = gt_boxes.new(1,
                                                    4).fill_(1) / num_examples

        def map2origin(data, count=K * A, inds=keep, fill=0):
            """Map to original set."""
            shape = (count, ) + data.shape[1:]
            origin = torch.empty(shape).fill_(fill).type_as(gt_boxes)
            origin[inds] = data
            return origin

        labels = map2origin(labels, fill=-1)
        deltas = map2origin(deltas)
        inside_weights = map2origin(inside_weights)
        outside_weights = map2origin(outside_weights)

        labels = labels.view(1, height, width, A).permute(0, 3, 1, 2)
        labels = labels.contiguous().view(1, 1, A * height, width)
        deltas = deltas.view(1, height, width, A * 4).permute(0, 3, 1, 2)
        inside_weights = inside_weights.view(1, height, width,
                                             A * 4).permute(0, 3, 1, 2)
        outside_weights = outside_weights.view(1, height, width,
                                               A * 4).permute(0, 3, 1, 2)

        return labels, deltas, inside_weights, outside_weights
def im_detect_bbox(model, im, timers=None):
    """Generate RetinaNet detections on a single image."""
    if timers is None:
        timers = defaultdict(Timer)
    # Although anchors are input independent and could be precomputed,
    # recomputing them per image only brings a small overhead
    anchors = _create_cell_anchors()
    timers['im_detect_bbox'].tic()
    k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
    A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS)
    inputs = {}
    inputs['data'], im_scale, inputs['im_info'] = \
        blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE)
    #     cls_probs, box_preds = [], []
    #     for lvl in range(k_min, k_max + 1):
    #         suffix = 'fpn{}'.format(lvl)
    #         cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix)))
    #         box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix)))
    #     for k, v in inputs.items():
    #         workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False))

    #     workspace.RunNet(model.net.Proto().name)
    #     cls_probs = workspace.FetchBlobs(cls_probs)
    #     box_preds = workspace.FetchBlobs(box_preds)
    return_dict = model(**inputs)

    cls_probs = return_dict['cls_score']
    box_preds = return_dict['bbox_pred']
    # here the boxes_all are [x0, y0, x1, y1, score]
    boxes_all = defaultdict(list)

    cnt = 0
    for lvl in range(k_min, k_max + 1):
        # create cell anchors array
        stride = 2.**lvl
        cell_anchors = anchors[lvl]

        # fetch per level probability
        cls_prob = cls_probs[cnt]
        box_pred = box_preds[cnt]
        cls_prob = cls_prob.reshape((  # [1, 9, 80, 112, 160]
            cls_prob.shape[0], A, int(cls_prob.shape[1] / A),
            cls_prob.shape[2], cls_prob.shape[3]))
        box_pred = box_pred.reshape((  # 1 9 4 112 160
            box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3]))
        cnt += 1

        if cfg.RETINANET.SOFTMAX:
            cls_prob = cls_prob[:, :, 1::, :, :]

        cls_prob_ravel = cls_prob.data.cpu().numpy().squeeze().ravel()
        box_pred = box_pred.data.cpu().numpy()  #.squeeze()
        # print(box_pred.shape, box_pred.squeeze().shape) # 1 9 4 112 60 -> 9 4 112 160
        # In some cases [especially for very small img sizes], it's possible that
        # candidate_ind is empty if we impose threshold 0.05 at all levels. This
        # will lead to errors since no detections are found for this image. Hence,
        # for lvl 7 which has small spatial resolution, we take the threshold 0.0
        th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0  # 0.05 or 0
        candidate_inds = np.where(cls_prob_ravel > th)[0]
        if (len(candidate_inds) == 0):
            continue

        pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds))

        inds = np.argpartition(cls_prob_ravel[candidate_inds], -pre_nms_topn)[
            -pre_nms_topn:]  # select thos better than threshold and top 10000
        inds = candidate_inds[inds]

        inds_5d = np.array(np.unravel_index(inds, cls_prob.shape)).transpose(
        )  #to transform index according to data shape
        # before ravel: inds 5d shape (160, 112, 80, 10, 5)  from  (10, 80, 112, 160) torch.Size([1, 9, 80, 112, 160])
        # after ravel: inds 5d shape (10, 5)  from  (10,) torch.Size([1, 9, 80, 112, 160])
        # inds_5d: ?, anchor_id, classes, y, x
        classes = inds_5d[:, 2]
        anchor_ids, y, x = inds_5d[:, 1], inds_5d[:, 3], inds_5d[:, 4]
        scores = cls_prob[:, anchor_ids, classes, y, x]

        boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32)
        boxes *= stride
        boxes += cell_anchors[anchor_ids, :]

        if not cfg.RETINANET.CLASS_SPECIFIC_BBOX:
            box_deltas = box_pred[0, anchor_ids, :, y, x]
        else:
            box_cls_inds = classes * 4
            box_deltas = np.vstack([
                box_pred[0, ind:ind + 4, yi, xi]
                for ind, yi, xi in zip(box_cls_inds, y, x)
            ])
        pred_boxes = (box_utils.bbox_transform(boxes, box_deltas)
                      if cfg.TEST.BBOX_REG else boxes)
        pred_boxes /= im_scale
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
        box_scores = np.zeros((pred_boxes.shape[0], 5))
        box_scores[:, 0:4] = pred_boxes
        box_scores[:, 4] = scores

        for cls in range(1, cfg.MODEL.NUM_CLASSES):
            inds = np.where(classes == cls - 1)[0]
            if len(inds) > 0:
                boxes_all[cls].extend(box_scores[inds, :])
    timers['im_detect_bbox'].toc()

    # Combine predictions across all levels and retain the top scoring by class
    timers['misc_bbox'].tic()
    detections = []
    for cls, boxes in boxes_all.items():
        cls_dets = np.vstack(boxes).astype(dtype=np.float32)
        # do class specific nms here
        keep = box_utils.nms(cls_dets, cfg.TEST.NMS)
        cls_dets = cls_dets[keep, :]
        out = np.zeros((len(keep), 6))
        out[:, 0:5] = cls_dets
        out[:, 5].fill(cls)
        detections.append(out)

    # detections (N, 6) format:
    #   detections[:, :4] - boxes
    #   detections[:, 4] - scores
    #   detections[:, 5] - classes
    detections = np.vstack(detections)
    # sort all again
    inds = np.argsort(-detections[:, 4])
    detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :]

    # Convert the detections to image cls_ format (see core/test_engine.py)
    num_classes = cfg.MODEL.NUM_CLASSES
    cls_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)]
    for c in range(1, num_classes):
        inds = np.where(detections[:, 5] == c)[0]
        cls_boxes[c] = detections[inds, :5]
    timers['misc_bbox'].toc()

    return cls_boxes  #'''
    '''return None '''
Ejemplo n.º 17
0
def im_detect_bbox(model,
                   im,
                   target_scale,
                   target_max_size,
                   boxes=None,
                   region_box=None):
    """Prepare the bbox for testing"""
    h = im.shape[0]
    w = im.shape[1]

    inputs, im_scale = _get_blobs(im, boxes, target_scale, target_max_size)
    #print("inputs['data']:",inputs['data'])
    #print("im_scale:", im_scale)
    #print("im_scale len:", len(im_scale))
    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes,
                                        return_index=True,
                                        return_inverse=True)
        inputs['rois'] = inputs['rois'][index, :]
        boxes = boxes[index, :]

    # Add multi-level rois for FPN
    if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN:
        _add_multilevel_rois_for_test(inputs, 'rois')

    if cfg.PYTORCH_VERSION_LESS_THAN_040:
        inputs['data'] = [
            Variable(torch.from_numpy(inputs['data']), volatile=True)
        ]
        inputs['im_info'] = [
            Variable(torch.from_numpy(inputs['im_info']), volatile=True)
        ]
    else:
        inputs['data'] = [torch.from_numpy(inputs['data'])]
        inputs['im_info'] = [torch.from_numpy(inputs['im_info'])]

    return_dict = model(**inputs)
    #boxes_cp = boxes.copy()
    #rois = return_dict['rois'].data.cpu().numpy()
    #boxes_cp = rois[:, 1:5] / im_scale
    #edge_index = (boxes_cp[:,0]<2)|(boxes_cp[:,1]<2) | (boxes_cp[:,2] > w-3) | (boxes_cp[:,3] > h-3)
    #print("edge_index:",edge_index)
    scores = return_dict['cls_score'].data.cpu().numpy().squeeze()

    if cfg.MODEL.FASTER_RCNN:
        rois = return_dict['rois'].data.cpu().numpy()
        # unscale back to raw image space
        if region_box == None:
            boxes = rois[:, 1:5] / im_scale

        else:
            #print(rois)
            boxes = rois[:, 1:5] / im_scale
            #print("boxes111:",boxes)
            #print("111:",boxes.shape) #[1000 * 4]
            edge_index = (boxes[:, 0] < 2) | (boxes[:, 1] < 2) | (
                boxes[:, 2] > w - 2) | (boxes[:, 3] > h - 2)
            #boxes = boxes[~edge_index]
            scores[edge_index] = [0 for i in range(13)]
            #print("222:",boxes.shape) #[1000 * 4]
            x_1 = region_box[0]
            y_1 = region_box[1]
            #print("x_1,y_1:",x_1,y_1)
            #boxes = np.reshape(boxes, (-1,4))
            boxes[:, (0, 2)] = boxes[:, (0, 2)] + x_1 - 1
            #print("333:",boxes)
            boxes[:, (1, 3)] = boxes[:, (1, 3)] + y_1 - 1
            #print("boxes222:",boxes)
    # cls prob (activations after softmax)
    # scores = return_dict['cls_score'].data.cpu().numpy().squeeze()
    #print("scores shape...:", scores.shape) # [1000 * 13]
    # In case there is 1 proposal
    scores = scores.reshape([-1, scores.shape[-1]])
    #print("scores shape:", scores.shape)  # [1000 * 13]

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = return_dict['bbox_pred'].data.cpu().numpy().squeeze()
        # In case there is 1 proposal
        box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]])
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            # Remove predictions for bg class (compat with MSRA code)
            box_deltas = box_deltas[:, -4:]
        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            # (legacy) Optionally normalize targets by a precomputed mean and stdev
            box_deltas = box_deltas.view(-1, 4) * cfg.TRAIN.BBOX_NORMALIZE_STDS \
                         + cfg.TRAIN.BBOX_NORMALIZE_MEANS
        pred_boxes = box_utils.bbox_transform(boxes, box_deltas,
                                              cfg.MODEL.BBOX_REG_WEIGHTS)
        #print("pred_boxes shape:",pred_boxes.shape) # [1000 * 52]
        #pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            pred_boxes = np.tile(pred_boxes, (1, scores.shape[1]))
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]
    #print("pred_boxes shape:",pred_boxes.shape)
    #pred_boxes = pred_boxes[~edge_index]
    #print("pred_boxes shape...:",pred_boxes.shape)
    return scores, pred_boxes, im_scale, return_dict['blob_conv']
Ejemplo n.º 18
0
    def _forward(self, data, im_info, roidb=None, **rpn_kwargs):
        im_data = data
        if self.training:
            roidb = list(map(lambda x: blob_utils.deserialize(x)[0], roidb))

        device_id = im_data.get_device()

        return_dict = {}  # A dict to collect return variables

        blob_conv = self.Conv_Body(im_data)

        rpn_ret = self.RPN(blob_conv, im_info, roidb)
        # rpn proposals

        # if self.training:
        #     # can be used to infer fg/bg ratio
        #     return_dict['rois_label'] = rpn_ret['labels_int32']

        rois_certification = False
        if cfg.FPN.FPN_ON:
            # Retain only the blobs that will be used for RoI heads. `blob_conv` may include
            # extra blobs that are used for RPN proposals, but not for RoI heads.
            blob_conv = blob_conv[-self.num_roi_levels:]

        if not self.training:
            return_dict['blob_conv'] = blob_conv

        if rois_certification:
            lvl_min = cfg.FPN.ROI_MIN_LEVEL
            lvl_max = cfg.FPN.ROI_MAX_LEVEL
            test_rpn_ret = {'rois': rpn_ret['rois']}
            lvls = fpn_utils.map_rois_to_fpn_levels(test_rpn_ret['rois'],
                                                    lvl_min, lvl_max)
            rois_idx_order = np.empty((0, ))
            test_rois = test_rpn_ret['rois']

            for output_idx, lvl in enumerate(range(lvl_min, lvl_max + 1)):
                idx_lvl = np.where(lvls == lvl)[0]
                rois_lvl = test_rois[idx_lvl, :]
                rois_idx_order = np.concatenate((rois_idx_order, idx_lvl))
                test_rpn_ret['rois_fpn{}'.format(lvl)] = rois_lvl

            rois_idx_restore = np.argsort(rois_idx_order).astype(np.int32,
                                                                 copy=False)
            test_rpn_ret['rois_idx_restore_int32'] = rois_idx_restore

            test_feat = self.Box_Head(blob_conv, test_rpn_ret)
            test_cls_score, test_bbox_pred = self.Box_Outs(test_feat)

            test_cls_score = test_cls_score.data.cpu().numpy().squeeze()
            test_bbox_pred = test_bbox_pred.data.cpu().numpy().squeeze()

        if not cfg.MODEL.RPN_ONLY:
            if cfg.MODEL.SHARE_RES5 and self.training:
                box_feat, res5_feat = self.Box_Head(blob_conv, rpn_ret)
            # bbox proposals
            else:
                box_feat = self.Box_Head(blob_conv, rpn_ret)
            cls_score, bbox_pred = self.Box_Outs(box_feat)
        else:
            # TODO: complete the returns for RPN only situation
            pass

        # 在这里开始计算loss
        if self.training:
            return_dict['losses'] = {}
            return_dict['metrics'] = {}
            # rpn loss
            rpn_kwargs.update(
                dict((k, rpn_ret[k]) for k in rpn_ret.keys()
                     if (k.startswith('rpn_cls_logits')
                         or k.startswith('rpn_bbox_pred'))))
            loss_rpn_cls, loss_rpn_bbox = rpn_heads.generic_rpn_losses(
                **rpn_kwargs)
            if cfg.FPN.FPN_ON:
                for i, lvl in enumerate(
                        range(cfg.FPN.RPN_MIN_LEVEL,
                              cfg.FPN.RPN_MAX_LEVEL + 1)):
                    return_dict['losses']['loss_rpn_cls_fpn%d' %
                                          lvl] = loss_rpn_cls[i]
                    return_dict['losses']['loss_rpn_bbox_fpn%d' %
                                          lvl] = loss_rpn_bbox[i]
            else:
                return_dict['losses']['loss_rpn_cls'] = loss_rpn_cls
                return_dict['losses']['loss_rpn_bbox'] = loss_rpn_bbox

            # bbox loss
            loss_cls, loss_bbox, accuracy_cls = fast_rcnn_heads.fast_rcnn_losses(
                cls_score, bbox_pred, rpn_ret['labels_int32'],
                rpn_ret['bbox_targets'], rpn_ret['bbox_inside_weights'],
                rpn_ret['bbox_outside_weights'])
            return_dict['losses']['loss_cls'] = loss_cls
            return_dict['losses']['loss_bbox'] = loss_bbox
            return_dict['metrics']['accuracy_cls'] = accuracy_cls

            if cfg.MODEL.MASK_ON:
                if getattr(self.Mask_Head, 'SHARE_RES5', False):
                    mask_feat = self.Mask_Head(
                        res5_feat,
                        rpn_ret,
                        roi_has_mask_int32=rpn_ret['roi_has_mask_int32'])
                else:
                    mask_feat = self.Mask_Head(blob_conv, rpn_ret)
                mask_pred = self.Mask_Outs(mask_feat)
                # return_dict['mask_pred'] = mask_pred
                # mask loss
                loss_mask = mask_rcnn_heads.mask_rcnn_losses(
                    mask_pred, rpn_ret['masks_int32'])
                return_dict['losses']['loss_mask'] = loss_mask

            if cfg.MODEL.KEYPOINTS_ON:
                if getattr(self.Keypoint_Head, 'SHARE_RES5', False):
                    # No corresponding keypoint head implemented yet (Neither in Detectron)
                    # Also, rpn need to generate the label 'roi_has_keypoints_int32'
                    kps_feat = self.Keypoint_Head(
                        res5_feat,
                        rpn_ret,
                        roi_has_keypoints_int32=rpn_ret[
                            'roi_has_keypoint_int32'])
                else:
                    kps_feat = self.Keypoint_Head(blob_conv, rpn_ret)
                kps_pred = self.Keypoint_Outs(kps_feat)
                # return_dict['keypoints_pred'] = kps_pred
                # keypoints loss
                if cfg.KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS:
                    loss_keypoints = keypoint_rcnn_heads.keypoint_losses(
                        kps_pred, rpn_ret['keypoint_locations_int32'],
                        rpn_ret['keypoint_weights'])
                else:
                    loss_keypoints = keypoint_rcnn_heads.keypoint_losses(
                        kps_pred, rpn_ret['keypoint_locations_int32'],
                        rpn_ret['keypoint_weights'],
                        rpn_ret['keypoint_loss_normalizer'])
                return_dict['losses']['loss_kps'] = loss_keypoints

            # pytorch0.4 bug on gathering scalar(0-dim) tensors
            for k, v in return_dict['losses'].items():
                return_dict['losses'][k] = v.unsqueeze(0)
            for k, v in return_dict['metrics'].items():
                return_dict['metrics'][k] = v.unsqueeze(0)

        else:
            # Testing
            return_dict['rois'] = rpn_ret['rois']
            import json
            if cfg.TEST.IOU_OUT:
                # 直接通过rpn_ret可以取出rois
                with open(
                        "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/raw_roi.json",
                        'w') as f:
                    json.dump((return_dict['rois'][:, 1:] /
                               im_info.numpy()[0][2]).tolist(), f)

                # 如果在FPN模式下,需要进到一个collect_and_distribute...的函数去取出分配后的scores
                # ,我直接在collect_and_distribute_fpn_rpn_proposals.py里把json输出
                # 因此这里直接考虑RPN_ONLY模式的取值。
                if not cfg.FPN.FPN_ON:
                    with open(
                            "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/rois_score.json",
                            'w') as f:
                        score_2_json = []
                        for item in rpn_ret['rpn_roi_probs']:
                            score_2_json.append(item.item())
                        json.dump(score_2_json, f)

            # 开始第二个fast_head阶段,首先通过rois和bbox_delta计算pred_box
            if cfg.FAST_RCNN.FAST_HEAD2_DEBUG:
                lvl_min = cfg.FPN.ROI_MIN_LEVEL
                lvl_max = cfg.FPN.ROI_MAX_LEVEL
                if cfg.FPN.FPN_ON:
                    im_scale = im_info.data.cpu().numpy().squeeze()[2]
                    rois = rpn_ret['rois'][:, 1:5] / im_scale
                    bbox_pred = bbox_pred.data.cpu().numpy().squeeze()
                    box_deltas = bbox_pred.reshape([-1, bbox_pred.shape[-1]])
                    shift_boxes = box_utils.bbox_transform(
                        rois, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS)
                    shift_boxes = box_utils.clip_tiled_boxes(
                        shift_boxes,
                        im_info.data.cpu().numpy().squeeze()[0:2])
                    num_classes = cfg.MODEL.NUM_CLASSES

                    onecls_pred_boxes = []
                    onecls_score = []
                    dets_cls = {}
                    count = 0
                    for j in range(1, num_classes):
                        inds = np.where(
                            cls_score[:, j] > cfg.TEST.SCORE_THRESH)[0]
                        boxes_j = shift_boxes[inds, j * 4:(j + 1) * 4]
                        score_j = cls_score[inds, j]
                        onecls_pred_boxes += boxes_j.tolist()
                        onecls_score += score_j.tolist()
                        dets_cls.update({j: []})
                        for k in range(len(boxes_j.tolist())):
                            dets_cls[j].append(count)
                            count += 1

                    assert count == len(onecls_pred_boxes)
                    stage2_rois_score = np.array(onecls_score,
                                                 dtype=np.float32)
                    stage2_rois = np.array(onecls_pred_boxes, dtype=np.float32)

                    # Redistribute stage2_rois using fpn_utils module provided functions
                    # calculate by formula
                    cls_tracker = {}
                    if not stage2_rois.tolist():
                        stage1_pred_iou = stage2_rois_score.tolist()
                        stage2_final_boxes = np.empty((0, ))
                        stage2_final_score = np.empty((0, ))

                        logger.info("Detections above threshold is null.")
                    else:
                        alter_rpn = {}
                        unresize_stage2_rois = stage2_rois * im_scale
                        # unresize_stage2_rois = np.concatenate((unresize_stage2_rois, unresized_rois[:, 1:5]))

                        lvls = fpn_utils.map_rois_to_fpn_levels(
                            unresize_stage2_rois, lvl_min, lvl_max)
                        # TAG: We might need to visualize "stage2_rois" to make sure.
                        rois_idx_order = np.empty((0, ))
                        dummy_batch = np.zeros(
                            (unresize_stage2_rois.shape[0], 1),
                            dtype=np.float32)
                        alter_rpn["rois"] = np.hstack(
                            (dummy_batch,
                             unresize_stage2_rois)).astype(np.float32,
                                                           copy=False)
                        # alter_rpn['rois'] = np.concatenate((alter_rpn['rois'], unresized_rois))

                        for output_idx, lvl in enumerate(
                                range(lvl_min, lvl_max + 1)):
                            idx_lvl = np.where(lvls == lvl)[0]
                            rois_lvl = unresize_stage2_rois[idx_lvl, :]
                            rois_idx_order = np.concatenate(
                                (rois_idx_order, idx_lvl))
                            _ = np.zeros((rois_lvl.shape[0], 1),
                                         dtype=np.float32)
                            alter_rpn['rois_fpn{}'.format(lvl)] = np.hstack(
                                (_, rois_lvl)).astype(np.float32, copy=False)

                        rois_idx_restore = np.argsort(rois_idx_order).astype(
                            np.int32, copy=False)
                        alter_rpn['rois_idx_restore_int32'] = rois_idx_restore
                        # Go through 2nd stage of FPN and fast_head
                        stage2_feat = self.Box_Head(blob_conv, alter_rpn)
                        stage2_cls_score, stage2_bbox_pred = self.Box_Outs(
                            stage2_feat)

                        # Transform shift value to original one to get final pred boxes coordinates
                        stage2_bbox_pred = stage2_bbox_pred.data.cpu().numpy(
                        ).squeeze()
                        stage2_cls_score = stage2_cls_score.data.cpu().numpy()

                        stage2_box_deltas = stage2_bbox_pred.reshape(
                            [-1, bbox_pred.shape[-1]])
                        # Add some variance to box delta
                        if cfg.FAST_RCNN.STAGE1_TURBULENCE:
                            import random
                            for i in range(len(stage2_box_deltas)):
                                for j in range(len(stage2_box_deltas[i])):
                                    stage2_box_deltas[i][j] *= random.uniform(
                                        0.9, 1.1)

                        stage2_cls_out = box_utils.bbox_transform(
                            stage2_rois, stage2_box_deltas,
                            cfg.MODEL.BBOX_REG_WEIGHTS)
                        stage2_cls_out = box_utils.clip_tiled_boxes(
                            stage2_cls_out,
                            im_info.data.cpu().numpy().squeeze()[0:2])
                        onecls_pred_boxes = []
                        onecls_score = []
                        count = 0
                        for j in range(1, num_classes):
                            inds = np.where(
                                stage2_cls_score[:,
                                                 j] > cfg.TEST.SCORE_THRESH)[0]
                            boxes_j = stage2_cls_out[inds, j * 4:(j + 1) * 4]
                            score_j = stage2_cls_score[inds, j]
                            dets_j = np.hstack(
                                (boxes_j,
                                 score_j[:, np.newaxis])).astype(np.float32,
                                                                 copy=False)
                            keep = box_utils.nms(dets_j, cfg.TEST.NMS)
                            boxes_j = boxes_j[keep]
                            score_j = score_j[keep]
                            # 用于记录每个框属于第几类
                            onecls_score += score_j.tolist()
                            onecls_pred_boxes += boxes_j.tolist()

                            for k in range(len(score_j)):
                                cls_tracker.update({count: j})
                                count += 1

                        assert count == len(onecls_score)
                        stage2_final_boxes = np.array(onecls_pred_boxes,
                                                      dtype=np.float32)
                        stage2_final_score = np.array(onecls_score,
                                                      dtype=np.float32)
                        inds = np.where(stage2_final_score > 0.3)[0]

                        # Filtered by keep index...
                        preserve_stage2_final_boxes = copy.deepcopy(
                            stage2_final_boxes)
                        preserve_stage2_final_score = copy.deepcopy(
                            stage2_final_score)
                        stage2_final_boxes = stage2_final_boxes[inds]
                        stage2_final_score = stage2_final_score[inds]

                        # if nothing left after 0.3 threshold filter, reserve whole boxes to original.
                        if stage2_final_boxes.size == 0:
                            lower_inds = np.where(
                                preserve_stage2_final_score > 0.1)[0]
                            stage2_final_boxes = preserve_stage2_final_boxes[
                                lower_inds]
                            stage2_final_score = preserve_stage2_final_score[
                                lower_inds]

                        else:
                            del preserve_stage2_final_boxes
                            del preserve_stage2_final_score

                        # if all boxes are clsfied into bg class.
                        if stage2_final_boxes.size == 0:
                            stage1_pred_iou = stage2_rois_score.tolist()
                            stage2_final_boxes = np.empty((0, ))
                            stage2_final_score = np.empty((0, ))
                            logger.info("Detections above threshold is null.")

                        else:
                            # Restore stage2_pred_boxes to match the index with stage2_rois, Compute IOU between
                            # final_boxes and stage2_rois, one by one
                            flag = "cross_product"
                            if flag == "element_wise":
                                if stage2_final_boxes.shape[
                                        0] == stage2_rois.shape[0]:
                                    restored_stage2_final_boxes = stage2_final_boxes[
                                        rois_idx_restore]
                                    stage1_pred_iou = []
                                    for ind, item in enumerate(stage2_rois):
                                        stage1 = np.array(
                                            item, dtype=np.float32).reshape(
                                                (1, 4))
                                        stage2 = np.array(
                                            restored_stage2_final_boxes[ind],
                                            dtype=np.float32).reshape((1, 4))
                                        iou = box_utils.bbox_overlaps(
                                            stage1, stage2)
                                        stage1_pred_iou.append(
                                            iou.squeeze().item())
                                else:
                                    logger.info(
                                        "Mistake while processing {}".format(
                                            str(im_info)))
                            elif flag == "cross_product":
                                iou = box_utils.bbox_overlaps(
                                    stage2_rois, stage2_final_boxes)
                                stage1_pred_iou = iou.max(axis=1).tolist()

                    # stage1_pred is another name of stage2_rois
                    assert len(stage1_pred_iou) == len(stage2_rois)
                    if cfg.FAST_RCNN.IOU_NMS:
                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/iou_stage1_score.json",
                                "w") as f:
                            json.dump(stage2_rois_score.tolist(), f)

                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/iou_stage2_score.json",
                                "w") as f:
                            json.dump(stage2_final_score.tolist(), f)

                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/iou_stage1_pred_boxes.json",
                                'w') as f:
                            json.dump(stage2_rois.tolist(), f)

                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/iou_stage1_pred_iou.json",
                                'w') as f:
                            json.dump(stage1_pred_iou, f)

                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/iou_stage2_pred_boxes.json",
                                'w') as f:
                            json.dump(stage2_final_boxes.tolist(), f)

                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/iou_dets_cls.json",
                                'w') as f:
                            json.dump(dets_cls, f)

                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/iou_cls_tracker.json",
                                'w') as f:
                            json.dump(cls_tracker, f)

                    elif cfg.FAST_RCNN.SCORE_NMS:
                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/score_stage1_score.json",
                                "w") as f:
                            json.dump(stage2_rois_score.tolist(), f)

                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/score_stage2_score.json",
                                "w") as f:
                            json.dump(stage2_final_score.tolist(), f)

                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/score_stage1_pred_boxes.json",
                                'w') as f:
                            json.dump(stage2_rois.tolist(), f)

                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/score_stage1_pred_iou.json",
                                'w') as f:
                            json.dump(stage1_pred_iou, f)

                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/score_stage2_pred_boxes.json",
                                'w') as f:
                            json.dump(stage2_final_boxes.tolist(), f)

                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/score_dets_cls.json",
                                'w') as f:
                            json.dump(dets_cls, f)

                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/score_cls_tracker.json",
                                'w') as f:
                            json.dump(cls_tracker, f)

                else:
                    im_scale = im_info.data.cpu().numpy().squeeze()[2]
                    rois = rpn_ret['rois'][:, 1:5] / im_scale
                    # unscale back to raw image space
                    box_deltas = bbox_pred.data.cpu().numpy().squeeze()
                    fast_stage1_score = cls_score.data.cpu().numpy().squeeze()

                    box_deltas = box_deltas.reshape([-1, bbox_pred.shape[-1]])
                    stage2_rois = box_utils.bbox_transform(
                        rois, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS)
                    stage2_rois = box_utils.clip_tiled_boxes(
                        stage2_rois,
                        im_info.data.cpu().numpy().squeeze()[0:2])

                    num_classes = cfg.MODEL.NUM_CLASSES

                    onecls_pred_boxes = []
                    onecls_cls_score = []
                    for j in range(1, num_classes):
                        inds = np.where(
                            cls_score[:, j] > cfg.TEST.SCORE_THRESH)[0]
                        boxes_j = stage2_rois[inds, j * 4:(j + 1) * 4]
                        score_j = fast_stage1_score[inds, j]
                        onecls_pred_boxes += boxes_j.tolist()
                        onecls_cls_score += score_j.tolist()

                    stage2_rois = np.array(onecls_pred_boxes, dtype=np.float32)
                    stage2_rois_score = np.array(onecls_cls_score,
                                                 dtype=np.float32)

                    assert len(stage2_rois) == len(stage2_rois_score)

                    # Send stage2 rois to next stage fast head, do ROI ALIGN again
                    # to modify rpn_ret['rois] , rpn_ret['rpn_rois'] and rpn['rois_rpn_score']

                    rpn_ret['rois'] = stage2_rois
                    rpn_ret['rpn_rois'] = stage2_rois
                    rpn_ret['rpn_roi_probs'] = stage2_rois_score
                    stage2_box_feat = self.Box_Head(blob_conv, rpn_ret)
                    stage2_cls_score, stage2_bbox_pred = self.Box_Outs(
                        stage2_box_feat)

                    stage2_bbox_pred = stage2_bbox_pred.data.cpu().numpy(
                    ).squeeze()
                    stage2_bbox_pred = stage2_bbox_pred.reshape(
                        [-1, bbox_pred.shape[-1]])

                    stage2_cls_pred_boxes = box_utils.bbox_transform(
                        stage2_rois, stage2_bbox_pred,
                        cfg.MODEL.BBOX_REG_WEIGHTS)
                    stage2_cls_pred_boxes = box_utils.clip_tiled_boxes(
                        stage2_cls_pred_boxes,
                        im_info.data.cpu().numpy().squeeze()[0:2])

                    onecls_pred_boxes = []
                    onecls_cls_score = []
                    for j in range(1, num_classes):
                        inds = np.where(
                            stage2_cls_score[:, j] > cfg.TEST.SCORE_THRESH)[0]
                        if len(inds) != 0:
                            print("KKKKK")
                        boxes_j = stage2_cls_pred_boxes[inds,
                                                        j * 4:(j + 1) * 4]
                        score_j = stage2_cls_score[inds, j]
                        onecls_pred_boxes += boxes_j.tolist()
                        onecls_cls_score += score_j.tolist()

                    stage2_bbox_pred = np.array(onecls_pred_boxes,
                                                dtype=np.float32)
                    stage2_bbox_pred_score = np.array(onecls_cls_score,
                                                      dtype=np.float32)

        # get stage2 pred_boxes here

        return_dict['cls_score'] = cls_score
        return_dict['bbox_pred'] = bbox_pred
        return return_dict
Ejemplo n.º 19
0
def im_detections(model, im, anchors):
    """Generate RetinaNet detections on a single image."""
    k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
    A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS)
    inputs = {}
    inputs['data'], inputs['im_info'] = _get_image_blob(im)
    cls_probs, box_preds = [], []
    for lvl in range(k_min, k_max + 1):
        suffix = 'fpn{}'.format(lvl)
        cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix)))
        box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix)))
    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False))

    workspace.RunNet(model.net.Proto().name)
    scale = inputs['im_info'][0, 2]
    cls_probs = workspace.FetchBlobs(cls_probs)
    box_preds = workspace.FetchBlobs(box_preds)

    # here the boxes_all are [x0, y0, x1, y1, score]
    boxes_all = defaultdict(list)

    cnt = 0
    for lvl in range(k_min, k_max + 1):
        # create cell anchors array
        stride = 2. ** lvl
        cell_anchors = anchors[lvl]

        # fetch per level probability
        cls_prob = cls_probs[cnt]
        box_pred = box_preds[cnt]
        cls_prob = cls_prob.reshape((
            cls_prob.shape[0], A, int(cls_prob.shape[1] / A),
            cls_prob.shape[2], cls_prob.shape[3]))
        box_pred = box_pred.reshape((
            box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3]))
        cnt += 1

        if cfg.RETINANET.SOFTMAX:
            cls_prob = cls_prob[:, :, 1::, :, :]

        cls_prob_ravel = cls_prob.ravel()
        # In some cases [especially for very small img sizes], it's possible that
        # candidate_ind is empty if we impose threshold 0.05 at all levels. This
        # will lead to errors since no detections are found for this image. Hence,
        # for lvl 7 which has small spatial resolution, we take the threshold 0.0
        th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0
        candidate_inds = np.where(cls_prob_ravel > th)[0]
        if (len(candidate_inds) == 0):
            continue

        pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds))
        inds = np.argpartition(
            cls_prob_ravel[candidate_inds], -pre_nms_topn)[-pre_nms_topn:]
        inds = candidate_inds[inds]

        inds_5d = np.array(np.unravel_index(inds, cls_prob.shape)).transpose()
        classes = inds_5d[:, 2]
        anchor_ids, y, x = inds_5d[:, 1], inds_5d[:, 3], inds_5d[:, 4]
        scores = cls_prob[:, anchor_ids, classes, y, x]

        boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32)
        boxes *= stride
        boxes += cell_anchors[anchor_ids, :]

        if not cfg.RETINANET.CLASS_SPECIFIC_BBOX:
            box_deltas = box_pred[0, anchor_ids, :, y, x]
        else:
            box_cls_inds = classes * 4
            box_deltas = np.vstack(
                [box_pred[0, ind:ind + 4, yi, xi]
                 for ind, yi, xi in zip(box_cls_inds, y, x)]
            )
        pred_boxes = (
            box_utils.bbox_transform(boxes, box_deltas)
            if cfg.TEST.BBOX_REG else boxes)
        pred_boxes /= scale
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
        box_scores = np.zeros((pred_boxes.shape[0], 5))
        box_scores[:, 0:4] = pred_boxes
        box_scores[:, 4] = scores

        for cls in range(1, cfg.MODEL.NUM_CLASSES):
            inds = np.where(classes == cls - 1)[0]
            if len(inds) > 0:
                boxes_all[cls].extend(box_scores[inds, :])

    # Combine predictions across all levels and retain the top scoring by class
    detections = []
    for cls, boxes in boxes_all.items():
        cls_dets = np.vstack(boxes).astype(dtype=np.float32)
        # do class specific nms here
        keep = box_utils.nms(cls_dets, cfg.TEST.NMS)
        cls_dets = cls_dets[keep, :]
        out = np.zeros((len(keep), 6))
        out[:, 0:5] = cls_dets
        out[:, 5].fill(cls)
        detections.append(out)

    detections = np.vstack(detections)
    # sort all again
    inds = np.argsort(-detections[:, 4])
    detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :]
    boxes = detections[:, 0:4]
    scores = detections[:, 4]
    classes = detections[:, 5]
    return boxes, scores, classes
Ejemplo n.º 20
0
    def _forward(self, data, im_info, roidb=None, **rpn_kwargs):
        im_data = data
        if self.training:
            roidb = list(map(lambda x: blob_utils.deserialize(x)[0], roidb))

        device_id = im_data.get_device()

        return_dict = {}  # A dict to collect return variables

        blob_conv = self.Conv_Body(im_data)

        rpn_ret = self.RPN(blob_conv, im_info, roidb)

        # if self.training:
        #     # can be used to infer fg/bg ratio
        #     return_dict['rois_label'] = rpn_ret['labels_int32']

        if cfg.FPN.FPN_ON:
            # Retain only the blobs that will be used for RoI heads. `blob_conv` may include
            # extra blobs that are used for RPN proposals, but not for RoI heads.
            blob_conv = blob_conv[-self.num_roi_levels:]

        if not self.training:
            return_dict['blob_conv'] = blob_conv

        if not cfg.MODEL.RPN_ONLY:
            if cfg.MODEL.SHARE_RES5 and self.training:
                box_feat, res5_feat = self.Box_Head(blob_conv, rpn_ret)
            else:
                box_feat = self.Box_Head(blob_conv, rpn_ret)
            cls_score, bbox_pred = self.Box_Outs(box_feat)
        else:
            # TODO: complete the returns for RPN only situation
            pass

        if self.training:
            return_dict['losses'] = {}
            return_dict['metrics'] = {}
            # rpn loss
            rpn_kwargs.update(
                dict((k, rpn_ret[k]) for k in rpn_ret.keys()
                     if (k.startswith('rpn_cls_logits')
                         or k.startswith('rpn_bbox_pred'))))
            loss_rpn_cls, loss_rpn_bbox = rpn_heads.generic_rpn_losses(
                **rpn_kwargs)
            if cfg.FPN.FPN_ON:
                for k, lvl in enumerate(
                        range(cfg.FPN.RPN_MIN_LEVEL,
                              cfg.FPN.RPN_MAX_LEVEL + 1)):
                    return_dict['losses']['loss_rpn_cls_fpn%d' %
                                          lvl] = loss_rpn_cls[k]
                    return_dict['losses']['loss_rpn_bbox_fpn%d' %
                                          lvl] = loss_rpn_bbox[k]
            else:
                return_dict['losses']['loss_rpn_cls'] = loss_rpn_cls
                return_dict['losses']['loss_rpn_bbox'] = loss_rpn_bbox

            # bbox loss
            loss_cls, loss_bbox, accuracy_cls = fast_rcnn_heads.fast_rcnn_losses(
                cls_score,
                bbox_pred,
                rpn_ret['labels_int32'],
                rpn_ret['bbox_targets'],
                rpn_ret['bbox_inside_weights'],
                rpn_ret['bbox_outside_weights'],
                stage=0)
            return_dict['losses']['loss_cls'] = loss_cls
            return_dict['losses']['loss_bbox'] = loss_bbox
            return_dict['metrics']['accuracy_cls'] = accuracy_cls

            if cfg.MODEL.MASK_ON:
                if getattr(self.Mask_Head, 'SHARE_RES5', False):
                    mask_feat = self.Mask_Head(
                        res5_feat,
                        rpn_ret,
                        roi_has_mask_int32=rpn_ret['roi_has_mask_int32'])
                else:
                    mask_feat = self.Mask_Head(blob_conv, rpn_ret)
                mask_pred = self.Mask_Outs(mask_feat)
                # return_dict['mask_pred'] = mask_pred
                # mask loss
                loss_mask = mask_rcnn_heads.mask_rcnn_losses(
                    mask_pred, rpn_ret['masks_int32'])
                return_dict['losses']['loss_mask'] = loss_mask

            if cfg.MODEL.KEYPOINTS_ON:
                if getattr(self.Keypoint_Head, 'SHARE_RES5', False):
                    # No corresponding keypoint head implemented yet (Neither in Detectron)
                    # Also, rpn need to generate the label 'roi_has_keypoints_int32'
                    kps_feat = self.Keypoint_Head(
                        res5_feat,
                        rpn_ret,
                        roi_has_keypoints_int32=rpn_ret[
                            'roi_has_keypoint_int32'])
                else:
                    kps_feat = self.Keypoint_Head(blob_conv, rpn_ret)
                kps_pred = self.Keypoint_Outs(kps_feat)
                # return_dict['keypoints_pred'] = kps_pred
                # keypoints loss
                if cfg.KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS:
                    loss_keypoints = keypoint_rcnn_heads.keypoint_losses(
                        kps_pred, rpn_ret['keypoint_locations_int32'],
                        rpn_ret['keypoint_weights'])
                else:
                    loss_keypoints = keypoint_rcnn_heads.keypoint_losses(
                        kps_pred, rpn_ret['keypoint_locations_int32'],
                        rpn_ret['keypoint_weights'],
                        rpn_ret['keypoint_loss_normalizer'])
                return_dict['losses']['loss_kps'] = loss_keypoints

            # pytorch0.4 bug on gathering scalar(0-dim) tensors
            for k, v in return_dict['losses'].items():
                return_dict['losses'][k] = v.unsqueeze(0)
            for k, v in return_dict['metrics'].items():
                return_dict['metrics'][k] = v.unsqueeze(0)

        if not cfg.FAST_RCNN.USE_CASCADE:
            return_dict['rois'] = rpn_ret['rois']
            return_dict['cls_score'] = cls_score
            return_dict['bbox_pred'] = bbox_pred
        else:
            return_dict['rois' + '_{}'.format(0)] = rpn_ret['rois']
            return_dict['cls_score' + '_{}'.format(0)] = cls_score.detach()
            return_dict['bbox_pred' + '_{}'.format(0)] = bbox_pred.detach()

        if cfg.FAST_RCNN.USE_CASCADE:
            for i in range(2):
                i += 1
                pre_stage_name = '_{}'.format(i - 1)
                cls_score_cpu = cls_score.data.cpu().numpy()
                import utils.boxes as box_utils
                bbox_pred_cpu = bbox_pred.reshape(
                    [-1, bbox_pred.shape[-1]]).data.cpu().numpy().squeeze()
                rois = deepcopy(return_dict['rois' + pre_stage_name])

                assert cfg.MODEL.CLS_AGNOSTIC_BBOX_REG is True
                if not cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
                    cls_loc = np.argmax(cls_score_cpu[:, 1:], axis=1) + 1
                    cls_loc = np.reshape(cls_loc, (cls_loc.shape[0], 1))
                    # Based on scores, we can select transformed rois
                    scores = np.zeros((cls_score_cpu.shape[0], 1))
                    for k in range(len(cls_loc)):
                        scores[k] = cls_score_cpu[k, cls_loc[k]]

                batch_inds = rois[:, 0]
                uni_inds = np.unique(batch_inds)

                # WE suppose the WIDTH of image is EQUAL to its HEIGHT

                # We also provide an example to show how to perform the operation batch-wise
                # Scale forward
                batch_se = []
                for e in range(len(uni_inds)):
                    id_min = min(np.where(batch_inds == uni_inds[e])[0])
                    id_max = max(np.where(batch_inds == uni_inds[e])[0])
                    rois[id_min:id_max + 1, 1:5] /= im_info[e][2]
                    batch_se.append([id_min, id_max])

                pred_boxes = box_utils.bbox_transform(
                    rois[:, 1:5], bbox_pred_cpu,
                    cfg.CASCADE_RCNN.BBOX_REG_WEIGHTS[i])
                # Scale back
                for e in range(len(uni_inds)):
                    id_min = batch_se[e][0]
                    id_max = batch_se[e][1]
                    pred_boxes[id_min:id_max + 1] *= im_info[e][2]
                    pred_boxes[id_min:id_max + 1] = box_utils.clip_tiled_boxes(
                        pred_boxes[id_min:id_max + 1], im_info[e][0:2])

                cfg_key = 'TRAIN' if self.training else 'TEST'
                min_size = cfg[cfg_key].RPN_MIN_SIZE

                if not cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
                    # Cannot use for loop here which may cause "illegal memory access"
                    # Thanks to Chen-Wei Xie !
                    rows = pred_boxes.shape[0]
                    b3 = cls_loc * 4 + np.array([0, 1, 2, 3])
                    b4 = np.array(range(rows))
                    c = pred_boxes[np.repeat(b4, 4), b3.flatten()]
                    proposals = np.reshape(c, (-1, 4))
                else:
                    proposals = pred_boxes[:, 4:8]

                keep = _filter_boxes(proposals, min_size, im_info[0])

                proposals = proposals[keep, :]
                batch_inds = batch_inds[keep]

                batch_inds = np.reshape(batch_inds, [len(batch_inds), 1])

                proposals = np.concatenate((batch_inds, proposals), axis=1)

                from modeling.collect_and_distribute_fpn_rpn_proposals import CollectAndDistributeFpnRpnProposalsOp
                self.CollectAndDistributeFpnRpnProposals = CollectAndDistributeFpnRpnProposalsOp(
                )
                self.CollectAndDistributeFpnRpnProposals.training = self.training
                # proposals.astype('float32')
                blobs_out = self.CollectAndDistributeFpnRpnProposals(proposals,
                                                                     roidb,
                                                                     im_info,
                                                                     stage=i)
                # Update rpn_ret
                new_rpn_ret = {}
                for key, value in rpn_ret.items():
                    if 'rpn' in key:
                        new_rpn_ret[key] = value

                new_rpn_ret.update(blobs_out)

                if not self.training:
                    return_dict['blob_conv'] = blob_conv

                if not cfg.MODEL.RPN_ONLY:
                    if i == 1:
                        if cfg.MODEL.SHARE_RES5 and self.training:
                            box_feat, res5_feat = self.Box_Head_2(
                                blob_conv, new_rpn_ret)
                        else:
                            box_feat = self.Box_Head_2(blob_conv, new_rpn_ret)

                        cls_score, bbox_pred = self.Box_Outs_2(box_feat)
                    elif i == 2:
                        if cfg.MODEL.SHARE_RES5 and self.training:
                            box_feat, res5_feat = self.Box_Head_3(
                                blob_conv, new_rpn_ret)
                        else:
                            box_feat = self.Box_Head_3(blob_conv, new_rpn_ret)

                        cls_score, bbox_pred = self.Box_Outs_3(box_feat)

                if self.training:
                    # rpn loss
                    rpn_kwargs.update(
                        dict((k, new_rpn_ret[k]) for k in new_rpn_ret.keys()
                             if (k.startswith('rpn_cls_logits')
                                 or k.startswith('rpn_bbox_pred'))))
                    loss_rpn_cls, loss_rpn_bbox = rpn_heads.generic_rpn_losses(
                        **rpn_kwargs)
                    if cfg.FPN.FPN_ON:
                        for k, lvl in enumerate(
                                range(cfg.FPN.RPN_MIN_LEVEL,
                                      cfg.FPN.RPN_MAX_LEVEL + 1)):
                            return_dict['losses']['loss_rpn_cls_fpn%d' %
                                                  lvl] += loss_rpn_cls[k]
                            return_dict['losses']['loss_rpn_bbox_fpn%d' %
                                                  lvl] += loss_rpn_bbox[k]
                    else:
                        return_dict['losses']['loss_rpn_cls'] += loss_rpn_cls
                        return_dict['losses']['loss_rpn_bbox'] += loss_rpn_bbox

                    # bbox loss
                    loss_cls, loss_bbox, accuracy_cls = fast_rcnn_heads.fast_rcnn_losses(
                        cls_score,
                        bbox_pred,
                        new_rpn_ret['labels_int32'],
                        new_rpn_ret['bbox_targets'],
                        new_rpn_ret['bbox_inside_weights'],
                        new_rpn_ret['bbox_outside_weights'],
                        stage=i)

                    return_dict['losses']['loss_cls'] += loss_cls
                    return_dict['losses']['loss_bbox'] += loss_bbox
                    return_dict['metrics']['accuracy_cls'] += accuracy_cls

                return_dict['rois' + '_{}'.format(i)] = deepcopy(
                    new_rpn_ret['rois'])
                return_dict['cls_score' + '_{}'.format(i)] = cls_score.detach()
                return_dict['bbox_pred' + '_{}'.format(i)] = bbox_pred.detach()

                rpn_ret = new_rpn_ret.copy()

        return return_dict