Python bbox_transform Beispiele, detectron.utils.boxes.bbox_transform Python Beispiele

Beispiel #1

0

Datei anzeigen

    def forward(self, inputs, outputs):
        """See modeling.detector.DecodeBBoxes for inputs/outputs
        documentation.
        """

        bbox_deltas = inputs[0].data
        assert cfg.MODEL.CLS_AGNOSTIC_BBOX_REG
        assert bbox_deltas.shape[1] == 8
        bbox_deltas = bbox_deltas[:, -4:]
        bbox_data = inputs[1].data
        assert bbox_data.shape[1] == 5
        batch_inds = bbox_data[:, :1]
        bbox_prior = bbox_data[:, 1:]

        # Transform bbox priors into proposals via bbox transformations
        bbox_decode = box_utils.bbox_transform(bbox_prior, bbox_deltas,
                                               self._bbox_reg_weights)

        # remove mal-boxes with non-positive width or height and ground
        # truth boxes during training
        if len(inputs) > 2:
            mapped_gt_boxes = inputs[2].data
            max_overlap = mapped_gt_boxes[:, 4]
            keep = _filter_boxes(bbox_decode, max_overlap)
            bbox_decode = bbox_decode[keep, :]
            batch_inds = batch_inds[keep, :]

        bbox_decode = np.hstack((batch_inds, bbox_decode))
        outputs[0].reshape(bbox_decode.shape)
        outputs[0].data[...] = bbox_decode

Beispiel #2

0

Datei anzeigen

 def test_bbox_dataset_to_prediction_roundtrip(self):
     """Simulate the process of reading a ground-truth box from a dataset,
     make predictions from proposals, convert the predictions back to the
     dataset format, and then use the COCO API to compute IoU overlap between
     the gt box and the predictions. These should have IoU of 1.
     """
     weights = (5, 5, 10, 10)
     # 1/ "read" a box from a dataset in the default (x1, y1, w, h) format
     gt_xywh_box = [10, 20, 100, 150]
     # 2/ convert it to our internal (x1, y1, x2, y2) format
     gt_xyxy_box = box_utils.xywh_to_xyxy(gt_xywh_box)
     # 3/ consider nearby proposal boxes
     prop_xyxy_boxes = random_boxes(gt_xyxy_box, 10, 10)
     # 4/ compute proposal-to-gt transformation deltas
     deltas = box_utils.bbox_transform_inv(
         prop_xyxy_boxes, np.array([gt_xyxy_box]), weights=weights
     )
     # 5/ use deltas to transform proposals to xyxy predicted box
     pred_xyxy_boxes = box_utils.bbox_transform(
         prop_xyxy_boxes, deltas, weights=weights
     )
     # 6/ convert xyxy predicted box to xywh predicted box
     pred_xywh_boxes = box_utils.xyxy_to_xywh(pred_xyxy_boxes)
     # 7/ use COCO API to compute IoU
     not_crowd = [int(False)] * pred_xywh_boxes.shape[0]
     ious = COCOmask.iou(pred_xywh_boxes, np.array([gt_xywh_box]), not_crowd)
     np.testing.assert_array_almost_equal(ious, np.ones(ious.shape))

Beispiel #3

0

Datei anzeigen

    def forward(self, inputs, outputs):
        """See modeling.detector.AddBBoxAccuracy for inputs/outputs
        documentation.
        """

        # predicted bbox deltas
        bbox_deltas = inputs[0].data
        # proposals
        bbox_data = inputs[1].data
        assert bbox_data.shape[1] == 5
        bbox_prior = bbox_data[:, 1:]
        # labels
        labels = inputs[2].data
        # mapped gt boxes
        mapped_gt_boxes = inputs[3].data
        gt_boxes = mapped_gt_boxes[:, :4]
        max_overlap = mapped_gt_boxes[:, 4]

        # bbox iou only for fg and non-gt boxes
        keep_inds = np.where((labels > 0) & (max_overlap < 1.0))[0]
        num_boxes = keep_inds.size
        bbox_deltas = bbox_deltas[keep_inds, :]
        bbox_prior = bbox_prior[keep_inds, :]
        labels = labels[keep_inds]
        gt_boxes = gt_boxes[keep_inds, :]
        max_overlap = max_overlap[keep_inds]

        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG or num_boxes == 0:
            bbox_deltas = bbox_deltas[:, -4:]
        else:
            bbox_deltas = np.vstack([
                bbox_deltas[i, labels[i] * 4:labels[i] * 4 + 4]
                for i in range(num_boxes)
            ])
        pred_boxes = box_utils.bbox_transform(bbox_prior, bbox_deltas,
                                              self._bbox_reg_weights)

        avg_iou = 0.
        pre_avg_iou = sum(max_overlap)
        for i in range(num_boxes):
            gt_box = gt_boxes[i, :]
            pred_box = pred_boxes[i, :]
            tmp_iou = box_utils.bbox_overlaps(
                gt_box[np.newaxis, :].astype(dtype=np.float32, copy=False),
                pred_box[np.newaxis, :].astype(dtype=np.float32, copy=False),
            )
            avg_iou += tmp_iou[0]
        if num_boxes > 0:
            avg_iou /= num_boxes
            pre_avg_iou /= num_boxes
        outputs[0].reshape([1])
        outputs[0].data[...] = avg_iou
        outputs[1].reshape([1])
        outputs[1].data[...] = pre_avg_iou

Beispiel #4

0

Datei anzeigen

 def test_bbox_transform_and_inverse(self):
     weights = (5, 5, 10, 10)
     src_boxes = random_boxes([10, 10, 20, 20], 1, 10)
     dst_boxes = random_boxes([10, 10, 20, 20], 1, 10)
     deltas = box_utils.bbox_transform_inv(
         src_boxes, dst_boxes, weights=weights
     )
     dst_boxes_reconstructed = box_utils.bbox_transform(
         src_boxes, deltas, weights=weights
     )
     np.testing.assert_array_almost_equal(
         dst_boxes, dst_boxes_reconstructed, decimal=5
     )

Beispiel #5

0

Datei anzeigen

def im_detect_bbox(workspace, predict_net, im, target_scale, target_max_size, boxes=None):
    """Bounding box object detection for an image with given box proposals.

    Arguments:
        workspace: the caffe2 workspace to use
        predict_net: the prediction network
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals in 0-indexed
            [x1, y1, x2, y2] format, or None if using RPN

    Returns:
        scores (ndarray): R x K array of object class scores for K classes
            (K includes background as object category 0)
        boxes (ndarray): R x 4*K array of predicted bounding boxes
        im_scales (list): list of image scales used in the input blob (as
            returned by _get_blobs and for use with im_detect_mask, etc.)
    """
    inputs, im_scale = _get_blobs(im, boxes, target_scale, target_max_size)

    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v, get_device_option_cpu())
    workspace.RunNet(predict_net.name)

    # Read out blobs
    #rois = workspace.FetchBlob(core.ScopedName('rois'))
    rois = workspace.FetchBlob(core.ScopedName('rpn_rois'))

    # unscale back to raw image space
    boxes = rois[:, 1:5] / im_scale

    # Softmax class probabilities
    scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze()
    # In case there is 1 proposal
    scores = scores.reshape([-1, scores.shape[-1]])

    # Apply bounding-box regression deltas
    box_deltas = workspace.FetchBlob(core.ScopedName('bbox_pred')).squeeze()
    # In case there is 1 proposal
    box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]])

    pred_boxes = box_utils.bbox_transform(
        boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS
    )
    pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
    if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
        pred_boxes = np.tile(pred_boxes, (1, scores.shape[1]))

    return scores, pred_boxes, im_scale

Beispiel #6

0

Datei anzeigen

    def forward(self, inputs, outputs):
        # The inputs contains [bbox_pred, cls_prob, rois]
        # rois --> np.array((num,5)), (batch_idx, x1, y2, x2, y2)
        # print('++++++++++++++++++++++ Decode BBox of rcnn stage {} +++++++++++++++++++++++'.format(self._stage_num))
        cls_prob = inputs[0].data[...]
        bbox_pred = inputs[1].data[...]
        rois = inputs[2].data[...]
        if self._train:
            overlaps = inputs[3].data[...]
            im_info = inputs[4].data
        else:
            im_info = inputs[3].data

        proposals_next = rois[:, 1:5]

        # Use delta with max cls_score as deltas adding to rois
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            delta = bbox_pred[:, 4:bbox_pred.shape[1]]
        else:
            cls_idx = cls_prob.argmax(axis=1)
            delta = np.zeros((bbox_pred.shape[0], 4), dtype=bbox_pred.dtype)
            for i in range(cls_idx.shape[0]):
                delta[i, :] = bbox_pred[i, cls_idx[i] * 4:cls_idx[i] * 4 + 4]

        # Add bbox deltas onto rois to generate new rois
        if self._stage_num == 1:
            bbox_reg_weights = cfg.CASCADERCNN.BBOX_REG_WEIGHTS_STAGE1
        elif self._stage_num == 2:
            bbox_reg_weights = cfg.CASCADERCNN.BBOX_REG_WEIGHTS_STAGE2

        new_rois = box_utils.bbox_transform(proposals_next, delta,
                                            bbox_reg_weights)
        batch_idxs = rois[:, 0].reshape(rois.shape[0], 1)
        new_rois = np.hstack((batch_idxs, new_rois))

        # remove invalid boxes
        output_rois = remove_invalid_boxes(new_rois)

        if self._train:
            # screen out high IOU boxes, to remove redundant gt boxes
            output_rois = remove_high_iou_boxes(output_rois, overlaps)
        else:
            output_rois = output_rois

        # clip tiled boxes into image
        output_rois = clip_tiled_bboxes(output_rois, im_info[0, :2])
        blob_utils.py_op_copy_blob(output_rois, outputs[0])

Beispiel #7

0

Datei anzeigen

    def forward(self, inputs, outputs):
        """See modeling.detector.DecodeBBoxes for inputs/outputs
        documentation.
        """
        ###输入是上一阶段的回归参数，以及proposals
        ###bbox_deltas的shape为(num of proposals, 8)
        bbox_deltas = inputs[0].data
        ####必须要是这种类型： CLS_AGNOSTIC_BBOX_REG
        assert cfg.MODEL.CLS_AGNOSTIC_BBOX_REG
        assert bbox_deltas.shape[1] == 8
        bbox_deltas = bbox_deltas[:, -4:]
        bbox_data = inputs[1].data
        ###bbox_data的shape为（num of proposals, 5）
        assert bbox_data.shape[1] == 5
        batch_inds = bbox_data[:, :1]
        bbox_prior = bbox_data[:, 1:]

        # Transform bbox priors into proposals via bbox transformations
        ### 将bbox priors--->bbox predictions或proposals(bbox proposals在cascade 中
        # 即为输入给下一级的proposals)

        bbox_decode = box_utils.bbox_transform(bbox_prior, bbox_deltas,
                                               self._bbox_reg_weights)

        # remove mal-boxes with non-positive width or height and ground
        # truth boxes during training
        ###滤除具有负的宽/高的boxes
        if len(inputs) > 2:
            ###在训练阶段，inputs[2]为gt boxes, 推断时自然没有这一维，
            ### mapped_gt_boxes为某张图片（或某个batch??）的所有与proposals
            # 对应的gt boxes
            mapped_gt_boxes = inputs[2].data
            max_overlap = mapped_gt_boxes[:, 4]
            ###max_overlap什么作用？？？
            keep = _filter_boxes(bbox_decode, max_overlap)
            ####keep保留所有满足要求的bboxes
            bbox_decode = bbox_decode[keep, :]
            batch_inds = batch_inds[keep, :]

        bbox_decode = np.hstack((batch_inds, bbox_decode))
        ###outputs的shape为（1,）
        outputs[0].reshape(bbox_decode.shape)
        outputs[0].data[...] = bbox_decode

Beispiel #8

0

Datei anzeigen

Datei: test_retinanet.py Projekt: Yagami360/densepose_wrapper

def im_detect_bbox(model, im, timers=None):
    """Generate RetinaNet detections on a single image."""
    if timers is None:
        timers = defaultdict(Timer)
    # Although anchors are input independent and could be precomputed,
    # recomputing them per image only brings a small overhead
    anchors = _create_cell_anchors()
    timers['im_detect_bbox'].tic()
    k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
    A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS)
    inputs = {}
    inputs['data'], im_scale, inputs['im_info'] = \
        blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE)
    cls_probs, box_preds = [], []
    for lvl in range(k_min, k_max + 1):
        suffix = 'fpn{}'.format(lvl)
        cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix)))
        box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix)))
    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32,
                                                        copy=False))

    workspace.RunNet(model.net.Proto().name)
    cls_probs = workspace.FetchBlobs(cls_probs)
    box_preds = workspace.FetchBlobs(box_preds)

    # here the boxes_all are [x0, y0, x1, y1, score]
    boxes_all = defaultdict(list)

    cnt = 0
    for lvl in range(k_min, k_max + 1):
        # create cell anchors array
        stride = 2.**lvl
        cell_anchors = anchors[lvl]

        # fetch per level probability
        cls_prob = cls_probs[cnt]
        box_pred = box_preds[cnt]
        cls_prob = cls_prob.reshape(
            (cls_prob.shape[0], A, int(cls_prob.shape[1] / A),
             cls_prob.shape[2], cls_prob.shape[3]))
        box_pred = box_pred.reshape(
            (box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3]))
        cnt += 1

        if cfg.RETINANET.SOFTMAX:
            cls_prob = cls_prob[:, :, 1::, :, :]

        cls_prob_ravel = cls_prob.ravel()
        # In some cases [especially for very small img sizes], it's possible that
        # candidate_ind is empty if we impose threshold 0.05 at all levels. This
        # will lead to errors since no detections are found for this image. Hence,
        # for lvl 7 which has small spatial resolution, we take the threshold 0.0
        th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0
        candidate_inds = np.where(cls_prob_ravel > th)[0]
        if (len(candidate_inds) == 0):
            continue

        pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds))
        inds = np.argpartition(cls_prob_ravel[candidate_inds],
                               -pre_nms_topn)[-pre_nms_topn:]
        inds = candidate_inds[inds]

        inds_5d = np.array(np.unravel_index(inds, cls_prob.shape)).transpose()
        classes = inds_5d[:, 2]
        anchor_ids, y, x = inds_5d[:, 1], inds_5d[:, 3], inds_5d[:, 4]
        scores = cls_prob[:, anchor_ids, classes, y, x]

        boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32)
        boxes *= stride
        boxes += cell_anchors[anchor_ids, :]

        if not cfg.RETINANET.CLASS_SPECIFIC_BBOX:
            box_deltas = box_pred[0, anchor_ids, :, y, x]
        else:
            box_cls_inds = classes * 4
            box_deltas = np.vstack([
                box_pred[0, ind:ind + 4, yi, xi]
                for ind, yi, xi in zip(box_cls_inds, y, x)
            ])
        pred_boxes = (box_utils.bbox_transform(boxes, box_deltas)
                      if cfg.TEST.BBOX_REG else boxes)
        pred_boxes /= im_scale
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
        box_scores = np.zeros((pred_boxes.shape[0], 5))
        box_scores[:, 0:4] = pred_boxes
        box_scores[:, 4] = scores

        for cls in range(1, cfg.MODEL.NUM_CLASSES):
            inds = np.where(classes == cls - 1)[0]
            if len(inds) > 0:
                boxes_all[cls].extend(box_scores[inds, :])
    timers['im_detect_bbox'].toc()

    # Combine predictions across all levels and retain the top scoring by class
    timers['misc_bbox'].tic()
    detections = []
    for cls, boxes in boxes_all.items():
        cls_dets = np.vstack(boxes).astype(dtype=np.float32)
        # do class specific nms here
        keep = box_utils.nms(cls_dets, cfg.TEST.NMS)
        cls_dets = cls_dets[keep, :]
        out = np.zeros((len(keep), 6))
        out[:, 0:5] = cls_dets
        out[:, 5].fill(cls)
        detections.append(out)

    # detections (N, 6) format:
    #   detections[:, :4] - boxes
    #   detections[:, 4] - scores
    #   detections[:, 5] - classes
    detections = np.vstack(detections)
    # sort all again
    inds = np.argsort(-detections[:, 4])
    detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :]

    # Convert the detections to image cls_ format (see core/test_engine.py)
    num_classes = cfg.MODEL.NUM_CLASSES
    cls_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)]
    for c in range(1, num_classes):
        inds = np.where(detections[:, 5] == c)[0]
        cls_boxes[c] = detections[inds, :5]
    timers['misc_bbox'].toc()

    return cls_boxes

Beispiel #9

0

Datei anzeigen

Datei: generate_proposals.py Projekt: Mrggggg/DensePose

    def proposals_for_one_image(
        self, im_info, all_anchors, bbox_deltas, scores
    ):
        # Get mode-dependent configuration
        cfg_key = 'TRAIN' if self._train else 'TEST'
        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE
        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #   - bbox deltas will be (4 * A, H, W) format from conv output
        #   - transpose to (H, W, 4 * A)
        #   - reshape to (H * W * A, 4) where rows are ordered by (H, W, A)
        #     in slowest to fastest order to match the enumerated anchors
        bbox_deltas = bbox_deltas.transpose((1, 2, 0)).reshape((-1, 4))

        # Same story for the scores:
        #   - scores are (A, H, W) format from conv output
        #   - transpose to (H, W, A)
        #   - reshape to (H * W * A, 1) where rows are ordered by (H, W, A)
        #     to match the order of anchors and bbox_deltas
        scores = scores.transpose((1, 2, 0)).reshape((-1, 1))

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        if pre_nms_topN <= 0 or pre_nms_topN >= len(scores):
            order = np.argsort(-scores.squeeze())
        else:
            # Avoid sorting possibly large arrays; First partition to get top K
            # unsorted and then sort just those (~20x faster for 200k scores)
            inds = np.argpartition(
                -scores.squeeze(), pre_nms_topN
            )[:pre_nms_topN]
            order = np.argsort(-scores[inds].squeeze())
            order = inds[order]
        bbox_deltas = bbox_deltas[order, :]
        all_anchors = all_anchors[order, :]
        scores = scores[order]

        # Transform anchors into proposals via bbox transformations
        proposals = box_utils.bbox_transform(
            all_anchors, bbox_deltas, (1.0, 1.0, 1.0, 1.0))

        # 2. clip proposals to image (may result in proposals with zero area
        # that will be removed in the next step)
        proposals = box_utils.clip_tiled_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < min_size
        keep = _filter_boxes(proposals, min_size, im_info)
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 6. apply loose nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        if nms_thresh > 0:
            keep = box_utils.nms(np.hstack((proposals, scores)), nms_thresh)
            if post_nms_topN > 0:
                keep = keep[:post_nms_topN]
            proposals = proposals[keep, :]
            scores = scores[keep]
        return proposals, scores

Beispiel #10

0

Datei anzeigen

Datei: test.py Projekt: zhonhel/Master_Thesis

def im_detect_bbox(model, im, target_scale, target_max_size, boxes=None):
    """Bounding box object detection for an image with given box proposals.

    Arguments:
        model (DetectionModelHelper): the detection model to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals in 0-indexed
            [x1, y1, x2, y2] format, or None if using RPN

    Returns:
        scores (ndarray): R x K array of object class scores for K classes
            (K includes background as object category 0)
        boxes (ndarray): R x 4*K array of predicted bounding boxes
        im_scales (list): list of image scales used in the input blob (as
            returned by _get_blobs and for use with im_detect_mask, etc.)
    """
    inputs, im_scale = _get_blobs(im, boxes, target_scale, target_max_size)

    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v)
    workspace.RunNet(model.net.Proto().name)

    rois = workspace.FetchBlob(core.ScopedName('rois'))
    split = workspace.FetchBlob(core.ScopedName('roi_numbers'))

    # Softmax class probabilities
    scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze()
    # In case there is 1 proposal
    scores = scores.reshape([-1, scores.shape[-1]])

    if cfg.TEST.WHAT == 'coco':
        boxes = rois[:int(split[0]), 1:5] / im_scale
        scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze()
        box_deltas = workspace.FetchBlob(
            core.ScopedName('bbox_pred')).squeeze()
        scores = scores.reshape([-1, scores.shape[-1]])
        box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]])
        pred_boxes = box_utils.bbox_transform(boxes, box_deltas,
                                              cfg.MODEL.BBOX_REG_WEIGHTS)
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
    elif cfg.TEST.WHAT == 'toothbrush':
        boxes = rois[int(split[0]):, 1:5] / im_scale
        scores = workspace.FetchBlob(
            core.ScopedName('cls_prob_toothbrush')).squeeze()
        box_deltas = workspace.FetchBlob(
            core.ScopedName('bbox_pred_toothbrush')).squeeze()
        scores = scores.reshape([-1, scores.shape[-1]])
        box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]])
        pred_boxes = box_utils.bbox_transform(boxes, box_deltas,
                                              cfg.MODEL.BBOX_REG_WEIGHTS)
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
    elif cfg.TEST.WHAT == 'toothbrush_rpn':
        boxes = rois[int(split[0]):, 1:5] / im_scale
        box_deltas = workspace.FetchBlob(
            core.ScopedName('bbox_pred_toothbrush')).squeeze()
        box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]])
        pred_boxes = box_utils.bbox_transform(boxes, box_deltas,
                                              cfg.MODEL.BBOX_REG_WEIGHTS)
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)

        scores = workspace.FetchBlob(core.ScopedName('roi_scores'))
        scores = scores[int(split[0]):]
        #for i in range(scores.shape[0]):
        #    scores[i]=min(scores[i]+0.05,1.0)
        #print(min(scores))
        scores = scores[:, np.newaxis]
        tmp = np.zeros(scores.shape, dtype=np.float32)
        scores = np.concatenate((tmp, scores), axis=1)

    return scores, pred_boxes, im_scale

Beispiel #11

0

Datei anzeigen

def im_detect_bbox(model,
                   im,
                   target_scale,
                   target_max_size,
                   size_fix=None,
                   timers=None,
                   model1=None,
                   boxes=None):
    """Bounding box object detection for an image with given box proposals.

    Arguments:
        model (DetectionModelHelper): the detection model to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals in 0-indexed
            [x1, y1, x2, y2] format, or None if using RPN

    Returns:
        scores (ndarray): R x K array of object class scores for K classes
            (K includes background as object category 0)
        boxes (ndarray): R x 4*K array of predicted bounding boxes
        im_scales (list): list of image scales used in the input blob (as
            returned by _get_blobs and for use with im_detect_mask, etc.)
    """
    if timers is None:
        timers = defaultdict(Timer)

    if model1 is None and os.environ.get('COSIM'):
        print("cosim must has model1")

    fp32_ws_name = "__fp32_ws__"
    int8_ws_name = "__int8_ws__"

    timers['data1'].tic()
    inputs, im_scale = _get_blobs(im, boxes, target_scale, target_max_size,
                                  size_fix)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes,
                                        return_index=True,
                                        return_inverse=True)
        inputs['rois'] = inputs['rois'][index, :]
        boxes = boxes[index, :]

    # Add multi-level rois for FPN
    if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN:
        _add_multilevel_rois_for_test(inputs, 'rois')
    for k, v in inputs.items():
        if os.environ.get('COSIM'):
            workspace.SwitchWorkspace(int8_ws_name, True)
        workspace.FeedBlob(core.ScopedName(k), v)
        if os.environ.get('COSIM'):
            workspace.SwitchWorkspace(fp32_ws_name, True)
            workspace.FeedBlob(core.ScopedName(k), v)
    timers['data1'].toc()
    # run first time to warm up
    if os.environ.get('EPOCH2OLD') == "1":
        workspace.RunNet(model.net.Proto().name)
    timers['run'].tic()
    if os.environ.get('INT8INFO') == "1":
        algorithm = AbsmaxCalib()
        kind = os.environ.get('INT8CALIB')
        if kind == "moving_average":
            ema_alpha = 0.5
            algorithm = EMACalib(ema_alpha)
        elif kind == "kl_divergence":
            kl_iter_num_for_range = os.environ.get('INT8KLNUM')
            if not kl_iter_num_for_range:
                kl_iter_num_for_range = 100
            kl_iter_num_for_range = int(kl_iter_num_for_range)
            algorithm = KLCalib(kl_iter_num_for_range)
        calib = Calibrator(algorithm)
        calib.RunCalibIter(workspace, model.net.Proto())
    else:
        if os.environ.get('COSIM'):
            cosim_alg = os.environ.get('COSIM')
            with open("int8.txt", "wb") as p:
                p.write(str(model.net.Proto()))
            with open("fp32.txt", "wb") as p:
                p.write(str(model1.net.Proto()))
            for i in range(len(model.net.Proto().op)):
                workspace.SwitchWorkspace(int8_ws_name)
                int8_inputs = []
                for inp in model.net.Proto().op[i].input:
                    int8_inputs.append(workspace.FetchBlob(str(inp)))
                logging.warning(" opint8[{0}] is  {1}".format(
                    i,
                    model.net.Proto().op[i]))
                workspace.RunOperatorOnce(model.net.Proto().op[i])
                int8_results = []
                for res in model.net.Proto().op[i].output:
                    int8_results.append(workspace.FetchBlob(str(res)))
                workspace.SwitchWorkspace(fp32_ws_name)
                fp32_inputs = []
                for inp1 in model1.net.Proto().op[i].input:
                    fp32_inputs.append(workspace.FetchBlob(str(inp1)))
                logging.warning(" opfp32[{0}] is  {1}".format(
                    i,
                    model1.net.Proto().op[i]))
                workspace.RunOperatorOnce(model1.net.Proto().op[i])
                fp32_results = []
                for res1 in model1.net.Proto().op[i].output:
                    fp32_results.append(workspace.FetchBlob(str(res1)))
                if len(int8_inputs) != len(fp32_inputs):
                    logging.error("Wrong number of inputs")
                    return
                if len(int8_results) != len(fp32_results):
                    logging.error("Wrong number of outputs")
                    return
                logging.warning("begin to check op[{}] {} input".format(
                    i,
                    model.net.Proto().op[i].type))
                for k in range(len(int8_inputs)):
                    if model.net.Proto().op[i].input[k][0] == '_':
                        continue
                    #assert_allclose(int8_inputs[k], fp32_inputs[k], **tol)
                logging.warning("pass checking op[{0}] {1} input".format(
                    i,
                    model.net.Proto().op[i].type))
                logging.warning("begin to check op[{0}] {1} output".format(
                    i,
                    model.net.Proto().op[i].type))
                for j, int8_result in enumerate(int8_results):
                    if model.net.Proto().op[i].output[j][0] == '_':
                        continue
                    #logging.warning("int8_outputis {} and fp32 output is {} ".format(int8_results[j], fp32_results[j]))
                    #if not compare_utils.assert_allclose(int8_results[j], fp32_results[j], **tol):
                    if not compare_utils.assert_compare(
                            int8_result, fp32_results[j], 1e-01, cosim_alg):
                        for k, int8_input in enumerate(int8_inputs):
                            logging.warning("int8_input[{}] is {}".format(
                                k, int8_input))
                            logging.warning("fp32_input[{}] is {}".format(
                                k, fp32_inputs[k]))
                    #assert_allclose(int8_results[j], fp32_results[j], **tol)
                logging.warning("pass checking op[{0}] {1} output".format(
                    i,
                    model.net.Proto().op[i].type))
        else:
            workspace.RunNet(model.net.Proto().name)
    timers['run'].toc()
    timers['result'].tic()
    # Read out blobs
    if cfg.MODEL.FASTER_RCNN:
        rois = workspace.FetchBlob(core.ScopedName('rois'))
        # unscale back to raw image space
        boxes = rois[:, 1:5] / im_scale
        batch_indices = rois[:, 0]

    # Softmax class probabilities
    scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze()
    # In case there is 1 proposal
    scores = scores.reshape([-1, scores.shape[-1]])

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = workspace.FetchBlob(
            core.ScopedName('bbox_pred')).squeeze()
        # In case there is 1 proposal
        box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]])
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            # Remove predictions for bg class (compat with MSRA code)
            box_deltas = box_deltas[:, -4:]
        pred_boxes = box_utils.bbox_transform(boxes, box_deltas,
                                              cfg.MODEL.BBOX_REG_WEIGHTS)
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im[0].shape)
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            pred_boxes = np.tile(pred_boxes, (1, scores.shape[1]))
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]
    timers['result'].toc()
    return scores, pred_boxes, im_scale, batch_indices

Beispiel #12

0

Datei anzeigen

Datei: test.py Projekt: JanYperman/Detectron

def im_detect_bbox_batch(model,
                         ims,
                         target_scale,
                         target_max_size,
                         boxes=None):
    """Bounding box object detection for an image with given box proposals.

    Arguments:
        model (DetectionModelHelper): the detection model to use
        ims (list): cfg.TEST.IMS_PER_BATCH color images to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals in 0-indexed
            [x1, y1, x2, y2] format, or None if using RPN

    Returns:
        scores (ndarray): R x K array of object class scores for K classes
            (K includes background as object category 0)
        boxes (ndarray): R x 4*K array of predicted bounding boxes
        im_scales (list): list of image scales used in the input blob (as
            returned by _get_blobs and for use with im_detect_mask, etc.)
    """
    inputs, im_scales = _get_blobs_batch(ims, boxes, target_scale,
                                         target_max_size)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes,
                                        return_index=True,
                                        return_inverse=True)
        inputs['rois'] = inputs['rois'][index, :]
        boxes = boxes[index, :]

    # Add multi-level rois for FPN
    if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN:
        _add_multilevel_rois_for_test(inputs, 'rois')

    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v)
    workspace.RunNet(model.net.Proto().name)

    # Read out blobs
    if cfg.MODEL.FASTER_RCNN:
        rois = workspace.FetchBlob(core.ScopedName('rois'))
        ## # unscale back to raw image space
        ## boxes = rois[:, 1:5] / im_scale

    # Softmax class probabilities
    scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze()
    # # In case there is 1 proposal
    # scores = scores.reshape([-1, scores.shape[-1]])

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = workspace.FetchBlob(
            core.ScopedName('bbox_pred')).squeeze()

    scores_batch = []
    pred_boxes_batch = []
    for i in range(len(ims)):
        # select batch
        select_inds = np.where(rois[:, 0] == i)

        # unscale back to raw image space
        boxes = rois[select_inds, 1:5] / im_scales[i]
        boxes = boxes.reshape([-1, boxes.shape[-1]])
        scores_i = scores[select_inds, :]
        scores_i = scores_i.reshape([-1, scores_i.shape[-1]])
        scores_batch.append(scores_i)

        if cfg.TEST.BBOX_REG:
            # In case there is 1 proposal
            box_deltas_i = box_deltas[select_inds, :]
            box_deltas_i = box_deltas_i.reshape([-1, box_deltas_i.shape[-1]])
            if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
                # Remove predictions for bg class (compat with MSRA code)
                box_deltas_i = box_deltas_i[:, -4:]
            pred_boxes = box_utils.bbox_transform(boxes, box_deltas_i,
                                                  cfg.MODEL.BBOX_REG_WEIGHTS)
            pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, ims[i].shape)
            if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
                pred_boxes = (np.tile(pred_boxes, (1, scores_i.shape[1])))
            pred_boxes_batch.append(pred_boxes)
        else:
            logger.error('Not implemented.')
            return None, None, None

    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        raise NotImplementedError(
            'Deduplication not implemented with batch inference, set TEST.IMS_PER_BATCH to 1'
        )

    return scores_batch, pred_boxes_batch, im_scales

Beispiel #13

0

Datei anzeigen

Datei: MaskRcnnHeadEndToEnd.py Projekt: y742035557/instpred

def im_detect_bbox_given_features(model, features, im_info, im_scales,
                                  im_shape):
    """Bounding box object detection for provided features with given box proposals.

    Arguments:
        model (DetectionModelHelper): the detection model to use
        features (dictionary of ndarray): high level features from which to run detection

    Returns:
        scores (ndarray): R x K array of object class scores for K classes
            (K includes background as object category 0)
        boxes (ndarray): R x 4*K array of predicted bounding boxes
        im_scales (list): list of image scales used in the input blob (as
            returned by _get_blobs and for use with im_detect_mask, etc.)
    """
    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    # Function simply adapted to use the input features and forward through the
    # head rather than input images through the entire network.

    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes,
                                        return_index=True,
                                        return_inverse=True)
        inputs['rois'] = inputs['rois'][index, :]
        boxes = boxes[index, :]

    # Add multi-level rois for FPN
    if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN:
        _add_multilevel_rois_for_test(inputs, 'rois')

    blobs = copy.copy(features)
    blobs['im_info'] = im_info
    for k, v in blobs.items():
        workspace.FeedBlob(caffe2_core.ScopedName(k), v)
    workspace.RunNet(model.faster_rnn_head.Proto().name)

    # Read out blobs
    if cfg.MODEL.FASTER_RCNN:
        assert len(im_scales) == 1, \
            'Only single-image / single-scale batch implemented'
        rois = workspace.FetchBlob(caffe2_core.ScopedName('rois'))
        # unscale back to raw image space
        boxes = rois[:, 1:5] / im_scales[0]

    # use softmax estimated probabilities
    scores = workspace.FetchBlob(caffe2_core.ScopedName('cls_prob')).squeeze()

    # In case there is 1 proposal
    scores = scores.reshape([-1, scores.shape[-1]])

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = workspace.FetchBlob(
            caffe2_core.ScopedName('bbox_pred')).squeeze()
        # In case there is 1 proposal
        box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]])
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            # Remove predictions for bg class (compat with MSRA code)
            box_deltas = box_deltas[:, -4:]
        pred_boxes = box_utils.bbox_transform(boxes, box_deltas,
                                              cfg.MODEL.BBOX_REG_WEIGHTS)
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im_shape)
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            pred_boxes = np.tile(pred_boxes, (1, scores.shape[1]))
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    return scores, pred_boxes, im_scales

Beispiel #14

0

Datei anzeigen

Datei: bbox_accuracy.py Projekt: zhaott618/cascade_rcnn_code_notes

    def forward(self, inputs, outputs):
        """See modeling.detector.AddBBoxAccuracy for inputs/outputs
        documentation.
        """

        # predicted bbox deltas, shape为（R, C*4）
        bbox_deltas = inputs[0].data
        # proposals的坐标集合, shape为（R, 5）
        bbox_data = inputs[1].data
        assert bbox_data.shape[1] == 5
        ### bbox_prior为所有的proposals坐标, shape为(R, 4)
        bbox_prior = bbox_data[:, 1:]

        # labels
        labels = inputs[2].data

        # mapped gt boxes
        mapped_gt_boxes = inputs[3].data
        gt_boxes = mapped_gt_boxes[:, :4]
        max_overlap = mapped_gt_boxes[:, 4]

        # bbox iou only for fg and non-gt boxes
        ###这里的labels指的是mapped_gt_bbox对应的labels吧？？？
        ###同时一移除所有的gt boxes
        ###相当于对这些gt bbox或proposals进行筛选
        keep_inds = np.where((labels > 0) & (max_overlap < 1.0))[0]
        ###所有符合要求的proposals个数
        num_boxes = keep_inds.size
        bbox_deltas = bbox_deltas[keep_inds, :]
        bbox_prior = bbox_prior[keep_inds, :]
        labels = labels[keep_inds]
        gt_boxes = gt_boxes[keep_inds, :]
        max_overlap = max_overlap[keep_inds]

        ### 关于AGNOSTIC_BBOX_REG 这个什么意思我始终云里雾里
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG or num_boxes == 0:

            bbox_deltas = bbox_deltas[:, -4:]
        else:
            ### 将bbox_deltas的数据结构重新组织，即只保留bbox_deltas中
            ### 每一组回归参数对应的类别和labels（对应的gt真值）类别相同的回归参数
            ### 处理后的bbox_deltas的shape为(num_boxes, 4)
            bbox_deltas = np.vstack([
                bbox_deltas[i, labels[i] * 4:labels[i] * 4 + 4]
                for i in range(num_boxes)
            ])

        ### 通过bbox_transform函数将得到的proposals经过回归参数回归后
        ### 得到预测框predicted_bboxes，注意_bbox_reg_weights
        pred_boxes = box_utils.bbox_transform(bbox_prior, bbox_deltas,
                                              self._bbox_reg_weights)

        #####平均iou初值为0
        avg_iou = 0.
        pre_avg_iou = sum(max_overlap)
        for i in range(num_boxes):
            ###第i个gt_box（对应于第i个pred_bbox）的坐标值
            gt_box = gt_boxes[i, :]
            ###第i个pred_box的坐标值
            pred_box = pred_boxes[i, :]
            ###计算gt_box与pred_box之间的IOU
            tmp_iou = box_utils.bbox_overlaps(
                gt_box[np.newaxis, :].astype(dtype=np.float32, copy=False),
                pred_box[np.newaxis, :].astype(dtype=np.float32, copy=False),
            )
            avg_iou += tmp_iou[0]
        if num_boxes > 0:
            avg_iou /= num_boxes
            pre_avg_iou /= num_boxes
        ### 即outputs【0】--->本stage的avg_iou
        ###  outputs[1]----->上一个stage的avg_iou
        outputs[0].reshape([1])
        outputs[0].data[...] = avg_iou
        outputs[1].reshape([1])
        outputs[1].data[...] = pre_avg_iou

Beispiel #15

0

Datei anzeigen

    def get_target_class_weights(inputs,outputs):
        
        # 'rois', 'label_mask', 'im_info', 'is_source', 'cls_pred', 'bbox_pred'
        
        import numpy as np
        import detectron.utils.boxes as box_utils
        
        rois =  inputs[0].data
        dc_mask = inputs[1].data.astype(bool)
        im_info = inputs[2].data
        is_source = inputs[3].data.astype(bool)
        cls_pred = inputs[4].data
        bbox_pred = inputs[5].data
        
        this_im_info = im_info[~is_source,:][0,:]
        im_shape = this_im_info[:2]
        im_scale = this_im_info[2]
        im_idx = int(this_im_info[3]) # im_info is extended with its index in the roidb.
        
        rois = rois[~dc_mask,:]
        boxes = rois[:, 1:5] / im_scale
        
        box_deltas = bbox_pred
        scores = cls_pred
        
        # if cfg.TEST.BBOX_REG:
        #     # Apply bounding-box regression deltas
        #     if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
        #         # Remove predictions for bg class (compat with MSRA code)
        #         box_deltas = box_deltas[:, -4:]
        
        pred_boxes = box_utils.bbox_transform(
            boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS
        )
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im_shape)
        
            # if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            #     pred_boxes = np.tile(pred_boxes, (1, scores.shape[1]))
        # else:
        #     # Simply repeat the boxes, once for each class
        #     pred_boxes = np.tile(boxes, (1, scores.shape[1]))
    
        # if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        #     # Map scores and predictions back to the original set of boxes
        #     scores = scores[inv_index, :]
        #     pred_boxes = pred_boxes[inv_index, :]

        # > scores, boxes <
        
        num_classes = cfg.MODEL.NUM_CLASSES
        # Apply threshold on detection probabilities and apply NMS
        # Skip j = 0, because it's the background class
	
        sum_softmax = np.zeros((num_classes,))
    
        for j in range(1, num_classes):
            inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
            scores_j = scores[inds, j]
            boxes_j = pred_boxes[inds, j * 4:(j + 1) * 4]
            dets_j = np.hstack((boxes_j, scores_j[:, np.newaxis])).astype(
                np.float32, copy=False
            )
            
            # if cfg.TEST.SOFT_NMS.ENABLED:
            #     nms_dets, _ = box_utils.soft_nms(
            #         dets_j,
            #         sigma=cfg.TEST.SOFT_NMS.SIGMA,
            #         overlap_thresh=cfg.TEST.NMS,
            #         score_thresh=0.0001,
            #         method=cfg.TEST.SOFT_NMS.METHOD
            #     )
            # else:
            
            keep = box_utils.nms(dets_j, cfg.TEST.NMS)
            nms_dets = dets_j[keep, :]
            
            # # Refine the post-NMS boxes using bounding-box voting
            # if cfg.TEST.BBOX_VOTE.ENABLED:
            #     nms_dets = box_utils.box_voting(
            #         nms_dets,
            #         dets_j,
            #         cfg.TEST.BBOX_VOTE.VOTE_TH,
            #         scoring_method=cfg.TEST.BBOX_VOTE.SCORING_METHOD
            #     )
            sum_softmax[j] = nms_dets[:,-1].sum()
        
        
        model.class_weight_db.update_class_weights(im_idx,sum_softmax)

Beispiel #16

0

Datei anzeigen

Datei: test.py Projekt: Jakaria08/Detectron

def im_detect_bbox(model, im, target_scale, target_max_size, boxes=None):
    """Bounding box object detection for an image with given box proposals.

    Arguments:
        model (DetectionModelHelper): the detection model to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals in 0-indexed
            [x1, y1, x2, y2] format, or None if using RPN

    Returns:
        scores (ndarray): R x K array of object class scores for K classes
            (K includes background as object category 0)
        boxes (ndarray): R x 4*K array of predicted bounding boxes
        im_scales (list): list of image scales used in the input blob (as
            returned by _get_blobs and for use with im_detect_mask, etc.)
    """
    inputs, im_scale = _get_blobs(im, boxes, target_scale, target_max_size)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(
            hashes, return_index=True, return_inverse=True
        )
        inputs['rois'] = inputs['rois'][index, :]
        boxes = boxes[index, :]

    # Add multi-level rois for FPN
    if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN:
        _add_multilevel_rois_for_test(inputs, 'rois')

    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v)
    workspace.RunNet(model.net.Proto().name)

    # Read out blobs
    if cfg.MODEL.FASTER_RCNN:
        rois = workspace.FetchBlob(core.ScopedName('rois'))
        # unscale back to raw image space
        boxes = rois[:, 1:5] / im_scale

    # Softmax class probabilities
    scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze()
    # In case there is 1 proposal
    scores = scores.reshape([-1, scores.shape[-1]])

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = workspace.FetchBlob(core.ScopedName('bbox_pred')).squeeze()
        # In case there is 1 proposal
        box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]])
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            # Remove predictions for bg class (compat with MSRA code)
            box_deltas = box_deltas[:, -4:]
        pred_boxes = box_utils.bbox_transform(
            boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS
        )
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            pred_boxes = np.tile(pred_boxes, (1, scores.shape[1]))
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    return scores, pred_boxes, im_scale

Beispiel #17

0

Datei anzeigen

Datei: generate_proposals.py Projekt: zzm422/DensePose

    def proposals_for_one_image(self, im_info, all_anchors, bbox_deltas,
                                scores):
        # Get mode-dependent configuration
        cfg_key = 'TRAIN' if self._train else 'TEST'
        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE
        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #   - bbox deltas will be (4 * A, H, W) format from conv output
        #   - transpose to (H, W, 4 * A)
        #   - reshape to (H * W * A, 4) where rows are ordered by (H, W, A)
        #     in slowest to fastest order to match the enumerated anchors
        bbox_deltas = bbox_deltas.transpose((1, 2, 0)).reshape((-1, 4))

        # Same story for the scores:
        #   - scores are (A, H, W) format from conv output
        #   - transpose to (H, W, A)
        #   - reshape to (H * W * A, 1) where rows are ordered by (H, W, A)
        #     to match the order of anchors and bbox_deltas
        scores = scores.transpose((1, 2, 0)).reshape((-1, 1))

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        if pre_nms_topN <= 0 or pre_nms_topN >= len(scores):
            order = np.argsort(-scores.squeeze())
        else:
            # Avoid sorting possibly large arrays; First partition to get top K
            # unsorted and then sort just those (~20x faster for 200k scores)
            inds = np.argpartition(-scores.squeeze(),
                                   pre_nms_topN)[:pre_nms_topN]
            order = np.argsort(-scores[inds].squeeze())
            order = inds[order]
        bbox_deltas = bbox_deltas[order, :]
        all_anchors = all_anchors[order, :]
        scores = scores[order]

        # Transform anchors into proposals via bbox transformations
        proposals = box_utils.bbox_transform(all_anchors, bbox_deltas,
                                             (1.0, 1.0, 1.0, 1.0))

        # 2. clip proposals to image (may result in proposals with zero area
        # that will be removed in the next step)
        proposals = box_utils.clip_tiled_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < min_size
        keep = _filter_boxes(proposals, min_size, im_info)
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 6. apply loose nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        if nms_thresh > 0:
            keep = box_utils.nms(np.hstack((proposals, scores)), nms_thresh)
            if post_nms_topN > 0:
                keep = keep[:post_nms_topN]
            proposals = proposals[keep, :]
            scores = scores[keep]
        return proposals, scores

Beispiel #18

0

Datei anzeigen

Datei: test_retinanet.py Projekt: Jhird/Detectron

def im_detect_bbox(model, im, timers=None):
    """Generate RetinaNet detections on a single image."""
    if timers is None:
        timers = defaultdict(Timer)
    # Although anchors are input independent and could be precomputed,
    # recomputing them per image only brings a small overhead
    anchors = _create_cell_anchors()
    timers['im_detect_bbox'].tic()
    k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
    A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS)
    inputs = {}
    inputs['data'], im_scale, inputs['im_info'] = \
        blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE)
    cls_probs, box_preds = [], []
    for lvl in range(k_min, k_max + 1):
        suffix = 'fpn{}'.format(lvl)
        cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix)))
        box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix)))
    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False))

    workspace.RunNet(model.net.Proto().name)
    cls_probs = workspace.FetchBlobs(cls_probs)
    box_preds = workspace.FetchBlobs(box_preds)

    # here the boxes_all are [x0, y0, x1, y1, score]
    boxes_all = defaultdict(list)

    cnt = 0
    for lvl in range(k_min, k_max + 1):
        # create cell anchors array
        stride = 2. ** lvl
        cell_anchors = anchors[lvl]

        # fetch per level probability
        cls_prob = cls_probs[cnt]
        box_pred = box_preds[cnt]
        cls_prob = cls_prob.reshape((
            cls_prob.shape[0], A, int(cls_prob.shape[1] / A),
            cls_prob.shape[2], cls_prob.shape[3]))
        box_pred = box_pred.reshape((
            box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3]))
        cnt += 1

        if cfg.RETINANET.SOFTMAX:
            cls_prob = cls_prob[:, :, 1::, :, :]

        cls_prob_ravel = cls_prob.ravel()
        # In some cases [especially for very small img sizes], it's possible that
        # candidate_ind is empty if we impose threshold 0.05 at all levels. This
        # will lead to errors since no detections are found for this image. Hence,
        # for lvl 7 which has small spatial resolution, we take the threshold 0.0
        th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0
        candidate_inds = np.where(cls_prob_ravel > th)[0]
        if (len(candidate_inds) == 0):
            continue

        pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds))
        inds = np.argpartition(
            cls_prob_ravel[candidate_inds], -pre_nms_topn)[-pre_nms_topn:]
        inds = candidate_inds[inds]

        inds_5d = np.array(np.unravel_index(inds, cls_prob.shape)).transpose()
        classes = inds_5d[:, 2]
        anchor_ids, y, x = inds_5d[:, 1], inds_5d[:, 3], inds_5d[:, 4]
        scores = cls_prob[:, anchor_ids, classes, y, x]

        boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32)
        boxes *= stride
        boxes += cell_anchors[anchor_ids, :]

        if not cfg.RETINANET.CLASS_SPECIFIC_BBOX:
            box_deltas = box_pred[0, anchor_ids, :, y, x]
        else:
            box_cls_inds = classes * 4
            box_deltas = np.vstack(
                [box_pred[0, ind:ind + 4, yi, xi]
                 for ind, yi, xi in zip(box_cls_inds, y, x)]
            )
        pred_boxes = (
            box_utils.bbox_transform(boxes, box_deltas)
            if cfg.TEST.BBOX_REG else boxes)
        pred_boxes /= im_scale
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
        box_scores = np.zeros((pred_boxes.shape[0], 5))
        box_scores[:, 0:4] = pred_boxes
        box_scores[:, 4] = scores

        for cls in range(1, cfg.MODEL.NUM_CLASSES):
            inds = np.where(classes == cls - 1)[0]
            if len(inds) > 0:
                boxes_all[cls].extend(box_scores[inds, :])
    timers['im_detect_bbox'].toc()

    # Combine predictions across all levels and retain the top scoring by class
    timers['misc_bbox'].tic()
    detections = []
    for cls, boxes in boxes_all.items():
        cls_dets = np.vstack(boxes).astype(dtype=np.float32)
        # do class specific nms here
        keep = box_utils.nms(cls_dets, cfg.TEST.NMS)
        cls_dets = cls_dets[keep, :]
        out = np.zeros((len(keep), 6))
        out[:, 0:5] = cls_dets
        out[:, 5].fill(cls)
        detections.append(out)

    # detections (N, 6) format:
    #   detections[:, :4] - boxes
    #   detections[:, 4] - scores
    #   detections[:, 5] - classes
    detections = np.vstack(detections)
    # sort all again
    inds = np.argsort(-detections[:, 4])
    detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :]

    # Convert the detections to image cls_ format (see core/test_engine.py)
    num_classes = cfg.MODEL.NUM_CLASSES
    cls_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)]
    for c in range(1, num_classes):
        inds = np.where(detections[:, 5] == c)[0]
        cls_boxes[c] = detections[inds, :5]
    timers['misc_bbox'].toc()

    return cls_boxes

Beispiel #19

0

Datei anzeigen

def im_detect_bbox(model, im, target_scale, target_max_size, boxes=None):
    """Bounding box object detection for an image with given box proposals.

    Arguments:
        model (DetectionModelHelper): the detection model to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals in 0-indexed
            [x1, y1, x2, y2] format, or None if using RPN

    Returns:
        scores (ndarray): R x K array of object class scores for K classes
            (K includes background as object category 0)
        boxes (ndarray): R x 4*K array of predicted bounding boxes
        im_scales (list): list of image scales used in the input blob (as
            returned by _get_blobs and for use with im_detect_mask, etc.)
    """
    inputs, im_scale = _get_blobs(im, boxes, target_scale, target_max_size)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(
            hashes, return_index=True, return_inverse=True
        )
        inputs['rois'] = inputs['rois'][index, :]
        boxes = boxes[index, :]

    # Add multi-level rois for FPN
    if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN:
        _add_multilevel_rois_for_test(inputs, 'rois')

    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v)
    workspace.RunNet(model.net.Proto().name)

    # Read out blobs
    if cfg.MODEL.FASTER_RCNN:
        rois = workspace.FetchBlob(core.ScopedName('rois'))
        # unscale back to raw image space
        boxes = rois[:, 1:5] / im_scale

    # Softmax class probabilities
    scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze()
    # In case there is 1 proposal
    scores = scores.reshape([-1, scores.shape[-1]])

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = workspace.FetchBlob(core.ScopedName('bbox_pred')).squeeze()
        # In case there is 1 proposal
        box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]])
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            # Remove predictions for bg class (compat with MSRA code)
            box_deltas = box_deltas[:, -4:]
        pred_boxes = box_utils.bbox_transform(
            boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS
        )
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            pred_boxes = np.tile(pred_boxes, (1, scores.shape[1]))
        #predict depths
        pred_depths = workspace.FetchBlob(core.ScopedName('depth_pred')).squeeze()
        # In case there is 1 proposal
        pred_depths = pred_depths.reshape([-1, pred_depths.shape[-1]])
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    return scores, pred_boxes, pred_depths, im_scale

Beispiel #20

0

Datei anzeigen

def im_detect_bbox(model, im, timers=None, model1=None):
    """Generate RetinaNet detections on a single image."""
    if timers is None:
        timers = defaultdict(Timer)

    if model1 is None and os.environ.get('COSIM'):
        print("cosim must has model1")

    fp32_ws_name = "__fp32_ws__"
    int8_ws_name = "__int8_ws__"
    # Although anchors are input independent and could be precomputed,
    # recomputing them per image only brings a small overhead
    anchors = _create_cell_anchors()
    timers['im_detect_bbox'].tic()
    timers['data1'].tic()
    k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
    A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS)
    inputs = {}
    inputs['data'], im_scale, inputs['im_info'] = \
        blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, cfg.TEST.SIZEFIX)
    cls_probs, box_preds = [], []
    for lvl in range(k_min, k_max + 1):
        suffix = 'fpn{}'.format(lvl)
        cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix)))
        box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix)))
    for k, v in inputs.items():
        if os.environ.get('COSIM'):
            workspace.SwitchWorkspace(int8_ws_name, True)
        workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32,
                                                        copy=False))
        if os.environ.get('COSIM'):
            workspace.SwitchWorkspace(fp32_ws_name, True)
            workspace.FeedBlob(core.ScopedName(k),
                               v.astype(np.float32, copy=False))
    timers['data1'].toc()
    if os.environ.get('EPOCH2OLD') == "1":
        workspace.RunNet(model.net.Proto().name)
    timers['run'].tic()
    if os.environ.get('INT8INFO') == "1":
        algorithm = AbsmaxCalib()
        kind = os.environ.get('INT8CALIB')
        if kind == "moving_average":
            ema_alpha = 0.5
            algorithm = EMACalib(ema_alpha)
        elif kind == "kl_divergence":

            kl_iter_num_for_range = os.environ.get('INT8KLNUM')
            if not kl_iter_num_for_range:
                kl_iter_num_for_range = 100
            kl_iter_num_for_range = int(kl_iter_num_for_range)
            algorithm = KLCalib(kl_iter_num_for_range)
        calib = Calibrator(algorithm)
        calib.RunCalibIter(workspace, model.net.Proto())
    else:
        if os.environ.get('COSIM'):
            with open("int8.txt", "wb") as p:
                p.write(str(model.net.Proto()))
            with open("fp32.txt", "wb") as p:
                p.write(str(model1.net.Proto()))
            for i in range(len(model.net.Proto().op)):
                workspace.SwitchWorkspace(int8_ws_name)
                int8_inputs = []
                for inp in model.net.Proto().op[i].input:
                    int8_inputs.append(workspace.FetchBlob(str(inp)))
                logging.warning(" opint8[{0}] is  {1}".format(
                    i,
                    model.net.Proto().op[i]))
                workspace.RunOperatorOnce(model.net.Proto().op[i])
                int8_results = []
                for res in model.net.Proto().op[i].output:
                    int8_results.append(workspace.FetchBlob(str(res)))
                workspace.SwitchWorkspace(fp32_ws_name)
                fp32_inputs = []
                for inp1 in model1.net.Proto().op[i].input:
                    fp32_inputs.append(workspace.FetchBlob(str(inp1)))
                logging.warning(" opfp32[{0}] is  {1}".format(
                    i,
                    model1.net.Proto().op[i]))
                workspace.RunOperatorOnce(model1.net.Proto().op[i])
                fp32_results = []
                for res1 in model1.net.Proto().op[i].output:
                    fp32_results.append(workspace.FetchBlob(str(res1)))
                if len(int8_inputs) != len(fp32_inputs):
                    logging.error("Wrong number of inputs")
                    return
                if len(int8_results) != len(fp32_results):
                    logging.error("Wrong number of outputs")
                    return
                logging.warning("begin to check op[{}] {} input".format(
                    i,
                    model.net.Proto().op[i].type))
                for k in range(len(int8_inputs)):
                    if model.net.Proto().op[i].input[k][0] == '_':
                        continue
                    #assert_allclose(int8_inputs[k], fp32_inputs[k], **tol)
                logging.warning("pass checking op[{0}] {1} input".format(
                    i,
                    model.net.Proto().op[i].type))
                logging.warning("begin to check op[{0}] {1} output".format(
                    i,
                    model.net.Proto().op[i].type))
                for j, int8_result in enumerate(int8_results):
                    if model.net.Proto().op[i].output[j][0] == '_':
                        continue
                    #logging.warning("int8_outputis {} and fp32 output is {} ".format(int8_results[j], fp32_results[j]))
                    #if not compare_utils.assert_allclose(int8_results[j], fp32_results[j], **tol):
                    if not compare_utils.assert_compare(
                            int8_result, fp32_results[j], 1e-01,
                            os.environ.get('COSIM')):
                        for k, int8_input in enumerate(int8_inputs):
                            logging.warning("int8_input[{}] is {}".format(
                                k, int8_input))
                            logging.warning("fp32_input[{}] is {}".format(
                                k, fp32_inputs[k]))

                logging.warning("pass checking op[{0}] {1} output".format(
                    i,
                    model.net.Proto().op[i].type))
        else:
            workspace.RunNet(model.net.Proto().name)
    timers['run'].toc()
    cls_probs = workspace.FetchBlobs(cls_probs)
    box_preds = workspace.FetchBlobs(box_preds)

    # here the boxes_all are [x0, y0, x1, y1, score]
    boxes_all = defaultdict(list)

    batch_size = cls_probs[0].shape[0]
    boxes_all_list = [boxes_all] * batch_size
    cnt = 0
    for lvl in range(k_min, k_max + 1):
        # create cell anchors array
        stride = 2.**lvl
        cell_anchors = anchors[lvl]

        # fetch per level probability
        cls_prob = cls_probs[cnt]
        box_pred = box_preds[cnt]
        cls_prob = cls_prob.reshape(
            (cls_prob.shape[0], A, int(cls_prob.shape[1] / A),
             cls_prob.shape[2], cls_prob.shape[3]))
        box_pred = box_pred.reshape(
            (box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3]))
        cnt += 1

        if cfg.RETINANET.SOFTMAX:
            cls_prob = cls_prob[:, :, 1::, :, :]

        for i in range(batch_size):
            cls_prob_ravel = cls_prob[i, :].ravel()

            # In some cases [especially for very small img sizes], it's possible that
            # candidate_ind is empty if we impose threshold 0.05 at all levels. This
            # will lead to errors since no detections are found for this image. Hence,
            # for lvl 7 which has small spatial resolution, we take the threshold 0.0
            th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0
            candidate_inds = np.where(cls_prob_ravel > th)[0]
            if (len(candidate_inds) == 0):
                continue

            pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N,
                               len(candidate_inds))
            inds = np.argpartition(cls_prob_ravel[candidate_inds],
                                   -pre_nms_topn)[-pre_nms_topn:]
            inds = candidate_inds[inds]

            inds_4d = np.array(np.unravel_index(
                inds, (cls_prob[i, :]).shape)).transpose()
            classes = inds_4d[:, 1]
            anchor_ids, y, x = inds_4d[:, 0], inds_4d[:, 2], inds_4d[:, 3]
            scores = cls_prob[i, anchor_ids, classes, y, x]
            boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32)
            boxes *= stride
            boxes += cell_anchors[anchor_ids, :]

            if not cfg.RETINANET.CLASS_SPECIFIC_BBOX:
                box_deltas = box_pred[i, anchor_ids, :, y, x]
            else:
                box_cls_inds = classes * 4
                box_deltas = np.vstack([
                    box_pred[i, ind:ind + 4, yi, xi]
                    for ind, yi, xi in zip(box_cls_inds, y, x)
                ])
            pred_boxes = (box_utils.bbox_transform(boxes, box_deltas)
                          if cfg.TEST.BBOX_REG else boxes)
            pred_boxes /= im_scale
            pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im[0].shape)
            box_scores = np.zeros((pred_boxes.shape[0], 5))
            box_scores[:, 0:4] = pred_boxes
            box_scores[:, 4] = scores

            for cls in range(1, cfg.MODEL.NUM_CLASSES):
                inds = np.where(classes == cls - 1)[0]
                if len(inds) > 0:
                    boxes_all_list[i][cls].extend(box_scores[inds, :])

    timers['im_detect_bbox'].toc()

    cls_boxes_list = []
    for i in range(batch_size):
        boxes_all = boxes_all_list[i]
        # Combine predictions across all levels and retain the top scoring by class
        timers['misc_bbox'].tic()
        detections = []
        for cls, boxes in boxes_all.items():
            cls_dets = np.vstack(boxes).astype(dtype=np.float32)
            # do class specific nms here
            keep = box_utils.nms(cls_dets, cfg.TEST.NMS)
            cls_dets = cls_dets[keep, :]
            out = np.zeros((len(keep), 6))
            out[:, 0:5] = cls_dets
            out[:, 5].fill(cls)
            detections.append(out)

        # detections (N, 6) format:
        #   detections[:, :4] - boxes
        #   detections[:, 4] - scores
        #   detections[:, 5] - classes
        detections = np.vstack(detections)
        # sort all again
        inds = np.argsort(-detections[:, 4])
        detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :]

        # Convert the detections to image cls_ format (see core/test_engine.py)
        num_classes = cfg.MODEL.NUM_CLASSES
        cls_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)]
        for c in range(1, num_classes):
            inds = np.where(detections[:, 5] == c)[0]
            cls_boxes[c] = detections[inds, :5]
        cls_boxes_list.append(cls_boxes)

    timers['misc_bbox'].toc()

    return cls_boxes_list