def proposals_for_one_image(self, im_info, bboxes, scores):
        # Get mode-dependent configuration
        cfg_key = 'TRAIN' if self._train else 'TEST'
        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE
        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #   - bbox will be (4 * A, H, W) format from conv output
        #   - transpose to (H, W, 4 * A)
        #   - reshape to (H * W * A, 4) where rows are ordered by (H, W, A)
        #     in slowest to fastest order to match the enumerated anchors
        bboxes = bboxes.transpose((1, 2, 0)).reshape((-1, 4))

        # Same story for the scores:
        #   - scores are (A, H, W) format from conv output
        #   - transpose to (H, W, A)
        #   - reshape to (H * W * A, 1) where rows are ordered by (H, W, A)
        #     to match the order of anchors and bbox_deltas
        scores = scores.transpose((1, 2, 0)).reshape((-1, 1))

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        if pre_nms_topN <= 0 or pre_nms_topN >= len(scores):
            order = np.argsort(-scores.squeeze())
        else:
            # Avoid sorting possibly large arrays; First partition to get top K
            # unsorted and then sort just those (~20x faster for 200k scores)
            inds = np.argpartition(-scores.squeeze(),
                                   pre_nms_topN)[:pre_nms_topN]
            order = np.argsort(-scores[inds].squeeze())
            order = inds[order]
        proposals = bboxes[order, :]
        scores = scores[order]

        # 2. clip proposals to image (may result in proposals with zero area
        # that will be removed in the next step)
        proposals = box_utils.clip_tiled_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < min_size
        keep = _filter_boxes(proposals, min_size, im_info)
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 6. apply loose nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        if nms_thresh > 0:
            keep = box_utils.nms(np.hstack((proposals, scores)), nms_thresh)
            if post_nms_topN > 0:
                keep = keep[:post_nms_topN]
            proposals = proposals[keep, :]
            scores = scores[keep]
        return proposals, scores
예제 #2
0
def im_detect_bbox(workspace, predict_net, im, target_scale, target_max_size, boxes=None):
    """Bounding box object detection for an image with given box proposals.

    Arguments:
        workspace: the caffe2 workspace to use
        predict_net: the prediction network
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals in 0-indexed
            [x1, y1, x2, y2] format, or None if using RPN

    Returns:
        scores (ndarray): R x K array of object class scores for K classes
            (K includes background as object category 0)
        boxes (ndarray): R x 4*K array of predicted bounding boxes
        im_scales (list): list of image scales used in the input blob (as
            returned by _get_blobs and for use with im_detect_mask, etc.)
    """
    inputs, im_scale = _get_blobs(im, boxes, target_scale, target_max_size)

    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v, get_device_option_cpu())
    workspace.RunNet(predict_net.name)

    # Read out blobs
    #rois = workspace.FetchBlob(core.ScopedName('rois'))
    rois = workspace.FetchBlob(core.ScopedName('rpn_rois'))

    # unscale back to raw image space
    boxes = rois[:, 1:5] / im_scale

    # Softmax class probabilities
    scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze()
    # In case there is 1 proposal
    scores = scores.reshape([-1, scores.shape[-1]])

    # Apply bounding-box regression deltas
    box_deltas = workspace.FetchBlob(core.ScopedName('bbox_pred')).squeeze()
    # In case there is 1 proposal
    box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]])

    pred_boxes = box_utils.bbox_transform(
        boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS
    )
    pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
    if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
        pred_boxes = np.tile(pred_boxes, (1, scores.shape[1]))

    return scores, pred_boxes, im_scale
예제 #3
0
def im_detect_bbox(model, im, target_scale, target_max_size, boxes=None):
    """Bounding box object detection for an image with given box proposals.

    Arguments:
        model (DetectionModelHelper): the detection model to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals in 0-indexed
            [x1, y1, x2, y2] format, or None if using RPN

    Returns:
        scores (ndarray): R x K array of object class scores for K classes
            (K includes background as object category 0)
        boxes (ndarray): R x 4*K array of predicted bounding boxes
        im_scales (list): list of image scales used in the input blob (as
            returned by _get_blobs and for use with im_detect_mask, etc.)
    """
    inputs, im_scale = _get_blobs(im, boxes, target_scale, target_max_size)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(
            hashes, return_index=True, return_inverse=True
        )
        inputs['rois'] = inputs['rois'][index, :]
        boxes = boxes[index, :]

    # Add multi-level rois for FPN
    if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN:
        _add_multilevel_rois_for_test(inputs, 'rois')

    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v)
    workspace.RunNet(model.net.Proto().name)

    # Read out blobs
    if cfg.MODEL.FASTER_RCNN:
        rois = workspace.FetchBlob(core.ScopedName('rois'))
        # unscale back to raw image space
        boxes = rois[:, 1:5] / im_scale

    # Softmax class probabilities
    scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze()
    # In case there is 1 proposal
    scores = scores.reshape([-1, scores.shape[-1]])

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = workspace.FetchBlob(core.ScopedName('bbox_pred')).squeeze()
        # In case there is 1 proposal
        box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]])
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            # Remove predictions for bg class (compat with MSRA code)
            box_deltas = box_deltas[:, -4:]
        pred_boxes = box_utils.bbox_transform(
            boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS
        )
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            pred_boxes = np.tile(pred_boxes, (1, scores.shape[1]))
        #predict depths
        pred_depths = workspace.FetchBlob(core.ScopedName('depth_pred')).squeeze()
        # In case there is 1 proposal
        pred_depths = pred_depths.reshape([-1, pred_depths.shape[-1]])
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    return scores, pred_boxes, pred_depths, im_scale
예제 #4
0
def im_detect_bbox_batch(model,
                         ims,
                         target_scale,
                         target_max_size,
                         boxes=None):
    """Bounding box object detection for an image with given box proposals.

    Arguments:
        model (DetectionModelHelper): the detection model to use
        ims (list): cfg.TEST.IMS_PER_BATCH color images to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals in 0-indexed
            [x1, y1, x2, y2] format, or None if using RPN

    Returns:
        scores (ndarray): R x K array of object class scores for K classes
            (K includes background as object category 0)
        boxes (ndarray): R x 4*K array of predicted bounding boxes
        im_scales (list): list of image scales used in the input blob (as
            returned by _get_blobs and for use with im_detect_mask, etc.)
    """
    inputs, im_scales = _get_blobs_batch(ims, boxes, target_scale,
                                         target_max_size)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes,
                                        return_index=True,
                                        return_inverse=True)
        inputs['rois'] = inputs['rois'][index, :]
        boxes = boxes[index, :]

    # Add multi-level rois for FPN
    if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN:
        _add_multilevel_rois_for_test(inputs, 'rois')

    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v)
    workspace.RunNet(model.net.Proto().name)

    # Read out blobs
    if cfg.MODEL.FASTER_RCNN:
        rois = workspace.FetchBlob(core.ScopedName('rois'))
        ## # unscale back to raw image space
        ## boxes = rois[:, 1:5] / im_scale

    # Softmax class probabilities
    scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze()
    # # In case there is 1 proposal
    # scores = scores.reshape([-1, scores.shape[-1]])

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = workspace.FetchBlob(
            core.ScopedName('bbox_pred')).squeeze()

    scores_batch = []
    pred_boxes_batch = []
    for i in range(len(ims)):
        # select batch
        select_inds = np.where(rois[:, 0] == i)

        # unscale back to raw image space
        boxes = rois[select_inds, 1:5] / im_scales[i]
        boxes = boxes.reshape([-1, boxes.shape[-1]])
        scores_i = scores[select_inds, :]
        scores_i = scores_i.reshape([-1, scores_i.shape[-1]])
        scores_batch.append(scores_i)

        if cfg.TEST.BBOX_REG:
            # In case there is 1 proposal
            box_deltas_i = box_deltas[select_inds, :]
            box_deltas_i = box_deltas_i.reshape([-1, box_deltas_i.shape[-1]])
            if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
                # Remove predictions for bg class (compat with MSRA code)
                box_deltas_i = box_deltas_i[:, -4:]
            pred_boxes = box_utils.bbox_transform(boxes, box_deltas_i,
                                                  cfg.MODEL.BBOX_REG_WEIGHTS)
            pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, ims[i].shape)
            if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
                pred_boxes = (np.tile(pred_boxes, (1, scores_i.shape[1])))
            pred_boxes_batch.append(pred_boxes)
        else:
            logger.error('Not implemented.')
            return None, None, None

    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        raise NotImplementedError(
            'Deduplication not implemented with batch inference, set TEST.IMS_PER_BATCH to 1'
        )

    return scores_batch, pred_boxes_batch, im_scales
예제 #5
0
def im_detect_bbox(model, im, timers=None):
    """Generate RetinaNet detections on a single image."""
    if timers is None:
        timers = defaultdict(Timer)
    # Although anchors are input independent and could be precomputed,
    # recomputing them per image only brings a small overhead
    anchors = _create_cell_anchors()
    timers['im_detect_bbox'].tic()
    k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
    A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS)
    inputs = {}
    inputs['data'], im_scale, inputs['im_info'] = \
        blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE)
    cls_probs, box_preds = [], []
    for lvl in range(k_min, k_max + 1):
        suffix = 'fpn{}'.format(lvl)
        cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix)))
        box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix)))
    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32,
                                                        copy=False))

    workspace.RunNet(model.net.Proto().name)
    cls_probs = workspace.FetchBlobs(cls_probs)
    box_preds = workspace.FetchBlobs(box_preds)

    # here the boxes_all are [x0, y0, x1, y1, score]
    boxes_all = defaultdict(list)

    cnt = 0
    for lvl in range(k_min, k_max + 1):
        # create cell anchors array
        stride = 2.**lvl
        cell_anchors = anchors[lvl]

        # fetch per level probability
        cls_prob = cls_probs[cnt]
        box_pred = box_preds[cnt]
        cls_prob = cls_prob.reshape(
            (cls_prob.shape[0], A, int(cls_prob.shape[1] / A),
             cls_prob.shape[2], cls_prob.shape[3]))
        box_pred = box_pred.reshape(
            (box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3]))
        cnt += 1

        if cfg.RETINANET.SOFTMAX:
            cls_prob = cls_prob[:, :, 1::, :, :]

        cls_prob_ravel = cls_prob.ravel()
        # In some cases [especially for very small img sizes], it's possible that
        # candidate_ind is empty if we impose threshold 0.05 at all levels. This
        # will lead to errors since no detections are found for this image. Hence,
        # for lvl 7 which has small spatial resolution, we take the threshold 0.0
        th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0
        candidate_inds = np.where(cls_prob_ravel > th)[0]
        if (len(candidate_inds) == 0):
            continue

        pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds))
        inds = np.argpartition(cls_prob_ravel[candidate_inds],
                               -pre_nms_topn)[-pre_nms_topn:]
        inds = candidate_inds[inds]

        inds_5d = np.array(np.unravel_index(inds, cls_prob.shape)).transpose()
        classes = inds_5d[:, 2]
        anchor_ids, y, x = inds_5d[:, 1], inds_5d[:, 3], inds_5d[:, 4]
        scores = cls_prob[:, anchor_ids, classes, y, x]

        boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32)
        boxes *= stride
        boxes += cell_anchors[anchor_ids, :]

        if not cfg.RETINANET.CLASS_SPECIFIC_BBOX:
            box_deltas = box_pred[0, anchor_ids, :, y, x]
        else:
            box_cls_inds = classes * 4
            box_deltas = np.vstack([
                box_pred[0, ind:ind + 4, yi, xi]
                for ind, yi, xi in zip(box_cls_inds, y, x)
            ])
        pred_boxes = (box_utils.bbox_transform(boxes, box_deltas)
                      if cfg.TEST.BBOX_REG else boxes)
        pred_boxes /= im_scale
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
        box_scores = np.zeros((pred_boxes.shape[0], 5))
        box_scores[:, 0:4] = pred_boxes
        box_scores[:, 4] = scores

        for cls in range(1, cfg.MODEL.NUM_CLASSES):
            inds = np.where(classes == cls - 1)[0]
            if len(inds) > 0:
                boxes_all[cls].extend(box_scores[inds, :])
    timers['im_detect_bbox'].toc()

    # Combine predictions across all levels and retain the top scoring by class
    timers['misc_bbox'].tic()
    detections = []
    for cls, boxes in boxes_all.items():
        cls_dets = np.vstack(boxes).astype(dtype=np.float32)
        # do class specific nms here
        keep = box_utils.nms(cls_dets, cfg.TEST.NMS)
        cls_dets = cls_dets[keep, :]
        out = np.zeros((len(keep), 6))
        out[:, 0:5] = cls_dets
        out[:, 5].fill(cls)
        detections.append(out)

    # detections (N, 6) format:
    #   detections[:, :4] - boxes
    #   detections[:, 4] - scores
    #   detections[:, 5] - classes
    detections = np.vstack(detections)
    # sort all again
    inds = np.argsort(-detections[:, 4])
    detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :]

    # Convert the detections to image cls_ format (see core/test_engine.py)
    num_classes = cfg.MODEL.NUM_CLASSES
    cls_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)]
    for c in range(1, num_classes):
        inds = np.where(detections[:, 5] == c)[0]
        cls_boxes[c] = detections[inds, :5]
    timers['misc_bbox'].toc()

    return cls_boxes
예제 #6
0
    def proposals_for_one_image(
        self, im_info, all_anchors, bbox_deltas, scores
    ):
        # Get mode-dependent configuration
        cfg_key = 'TRAIN' if self._train else 'TEST'
        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE
        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #   - bbox deltas will be (4 * A, H, W) format from conv output
        #   - transpose to (H, W, 4 * A)
        #   - reshape to (H * W * A, 4) where rows are ordered by (H, W, A)
        #     in slowest to fastest order to match the enumerated anchors
        bbox_deltas = bbox_deltas.transpose((1, 2, 0)).reshape((-1, 4))

        # Same story for the scores:
        #   - scores are (A, H, W) format from conv output
        #   - transpose to (H, W, A)
        #   - reshape to (H * W * A, 1) where rows are ordered by (H, W, A)
        #     to match the order of anchors and bbox_deltas
        scores = scores.transpose((1, 2, 0)).reshape((-1, 1))

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        if pre_nms_topN <= 0 or pre_nms_topN >= len(scores):
            order = np.argsort(-scores.squeeze())
        else:
            # Avoid sorting possibly large arrays; First partition to get top K
            # unsorted and then sort just those (~20x faster for 200k scores)
            inds = np.argpartition(
                -scores.squeeze(), pre_nms_topN
            )[:pre_nms_topN]
            order = np.argsort(-scores[inds].squeeze())
            order = inds[order]
        bbox_deltas = bbox_deltas[order, :]
        all_anchors = all_anchors[order, :]
        scores = scores[order]

        # Transform anchors into proposals via bbox transformations
        proposals = box_utils.bbox_transform(
            all_anchors, bbox_deltas, (1.0, 1.0, 1.0, 1.0))

        # 2. clip proposals to image (may result in proposals with zero area
        # that will be removed in the next step)
        proposals = box_utils.clip_tiled_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < min_size
        keep = _filter_boxes(proposals, min_size, im_info)
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 6. apply loose nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        if nms_thresh > 0:
            keep = box_utils.nms(np.hstack((proposals, scores)), nms_thresh)
            if post_nms_topN > 0:
                keep = keep[:post_nms_topN]
            proposals = proposals[keep, :]
            scores = scores[keep]
        return proposals, scores
예제 #7
0
def im_detect_bbox(model, im, target_scale, target_max_size, boxes=None):
    """Bounding box object detection for an image with given box proposals.

    Arguments:
        model (DetectionModelHelper): the detection model to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals in 0-indexed
            [x1, y1, x2, y2] format, or None if using RPN

    Returns:
        scores (ndarray): R x K array of object class scores for K classes
            (K includes background as object category 0)
        boxes (ndarray): R x 4*K array of predicted bounding boxes
        im_scales (list): list of image scales used in the input blob (as
            returned by _get_blobs and for use with im_detect_mask, etc.)
    """
    inputs, im_scale = _get_blobs(im, boxes, target_scale, target_max_size)

    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v)
    workspace.RunNet(model.net.Proto().name)

    rois = workspace.FetchBlob(core.ScopedName('rois'))
    split = workspace.FetchBlob(core.ScopedName('roi_numbers'))

    # Softmax class probabilities
    scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze()
    # In case there is 1 proposal
    scores = scores.reshape([-1, scores.shape[-1]])

    if cfg.TEST.WHAT == 'coco':
        boxes = rois[:int(split[0]), 1:5] / im_scale
        scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze()
        box_deltas = workspace.FetchBlob(
            core.ScopedName('bbox_pred')).squeeze()
        scores = scores.reshape([-1, scores.shape[-1]])
        box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]])
        pred_boxes = box_utils.bbox_transform(boxes, box_deltas,
                                              cfg.MODEL.BBOX_REG_WEIGHTS)
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
    elif cfg.TEST.WHAT == 'toothbrush':
        boxes = rois[int(split[0]):, 1:5] / im_scale
        scores = workspace.FetchBlob(
            core.ScopedName('cls_prob_toothbrush')).squeeze()
        box_deltas = workspace.FetchBlob(
            core.ScopedName('bbox_pred_toothbrush')).squeeze()
        scores = scores.reshape([-1, scores.shape[-1]])
        box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]])
        pred_boxes = box_utils.bbox_transform(boxes, box_deltas,
                                              cfg.MODEL.BBOX_REG_WEIGHTS)
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
    elif cfg.TEST.WHAT == 'toothbrush_rpn':
        boxes = rois[int(split[0]):, 1:5] / im_scale
        box_deltas = workspace.FetchBlob(
            core.ScopedName('bbox_pred_toothbrush')).squeeze()
        box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]])
        pred_boxes = box_utils.bbox_transform(boxes, box_deltas,
                                              cfg.MODEL.BBOX_REG_WEIGHTS)
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)

        scores = workspace.FetchBlob(core.ScopedName('roi_scores'))
        scores = scores[int(split[0]):]
        #for i in range(scores.shape[0]):
        #    scores[i]=min(scores[i]+0.05,1.0)
        #print(min(scores))
        scores = scores[:, np.newaxis]
        tmp = np.zeros(scores.shape, dtype=np.float32)
        scores = np.concatenate((tmp, scores), axis=1)

    return scores, pred_boxes, im_scale
예제 #8
0
def im_detect_bbox(model,
                   im,
                   target_scale,
                   target_max_size,
                   size_fix=None,
                   timers=None,
                   model1=None,
                   boxes=None):
    """Bounding box object detection for an image with given box proposals.

    Arguments:
        model (DetectionModelHelper): the detection model to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals in 0-indexed
            [x1, y1, x2, y2] format, or None if using RPN

    Returns:
        scores (ndarray): R x K array of object class scores for K classes
            (K includes background as object category 0)
        boxes (ndarray): R x 4*K array of predicted bounding boxes
        im_scales (list): list of image scales used in the input blob (as
            returned by _get_blobs and for use with im_detect_mask, etc.)
    """
    if timers is None:
        timers = defaultdict(Timer)

    if model1 is None and os.environ.get('COSIM'):
        print("cosim must has model1")

    fp32_ws_name = "__fp32_ws__"
    int8_ws_name = "__int8_ws__"

    timers['data1'].tic()
    inputs, im_scale = _get_blobs(im, boxes, target_scale, target_max_size,
                                  size_fix)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes,
                                        return_index=True,
                                        return_inverse=True)
        inputs['rois'] = inputs['rois'][index, :]
        boxes = boxes[index, :]

    # Add multi-level rois for FPN
    if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN:
        _add_multilevel_rois_for_test(inputs, 'rois')
    for k, v in inputs.items():
        if os.environ.get('COSIM'):
            workspace.SwitchWorkspace(int8_ws_name, True)
        workspace.FeedBlob(core.ScopedName(k), v)
        if os.environ.get('COSIM'):
            workspace.SwitchWorkspace(fp32_ws_name, True)
            workspace.FeedBlob(core.ScopedName(k), v)
    timers['data1'].toc()
    # run first time to warm up
    if os.environ.get('EPOCH2OLD') == "1":
        workspace.RunNet(model.net.Proto().name)
    timers['run'].tic()
    if os.environ.get('INT8INFO') == "1":
        algorithm = AbsmaxCalib()
        kind = os.environ.get('INT8CALIB')
        if kind == "moving_average":
            ema_alpha = 0.5
            algorithm = EMACalib(ema_alpha)
        elif kind == "kl_divergence":
            kl_iter_num_for_range = os.environ.get('INT8KLNUM')
            if not kl_iter_num_for_range:
                kl_iter_num_for_range = 100
            kl_iter_num_for_range = int(kl_iter_num_for_range)
            algorithm = KLCalib(kl_iter_num_for_range)
        calib = Calibrator(algorithm)
        calib.RunCalibIter(workspace, model.net.Proto())
    else:
        if os.environ.get('COSIM'):
            cosim_alg = os.environ.get('COSIM')
            with open("int8.txt", "wb") as p:
                p.write(str(model.net.Proto()))
            with open("fp32.txt", "wb") as p:
                p.write(str(model1.net.Proto()))
            for i in range(len(model.net.Proto().op)):
                workspace.SwitchWorkspace(int8_ws_name)
                int8_inputs = []
                for inp in model.net.Proto().op[i].input:
                    int8_inputs.append(workspace.FetchBlob(str(inp)))
                logging.warning(" opint8[{0}] is  {1}".format(
                    i,
                    model.net.Proto().op[i]))
                workspace.RunOperatorOnce(model.net.Proto().op[i])
                int8_results = []
                for res in model.net.Proto().op[i].output:
                    int8_results.append(workspace.FetchBlob(str(res)))
                workspace.SwitchWorkspace(fp32_ws_name)
                fp32_inputs = []
                for inp1 in model1.net.Proto().op[i].input:
                    fp32_inputs.append(workspace.FetchBlob(str(inp1)))
                logging.warning(" opfp32[{0}] is  {1}".format(
                    i,
                    model1.net.Proto().op[i]))
                workspace.RunOperatorOnce(model1.net.Proto().op[i])
                fp32_results = []
                for res1 in model1.net.Proto().op[i].output:
                    fp32_results.append(workspace.FetchBlob(str(res1)))
                if len(int8_inputs) != len(fp32_inputs):
                    logging.error("Wrong number of inputs")
                    return
                if len(int8_results) != len(fp32_results):
                    logging.error("Wrong number of outputs")
                    return
                logging.warning("begin to check op[{}] {} input".format(
                    i,
                    model.net.Proto().op[i].type))
                for k in range(len(int8_inputs)):
                    if model.net.Proto().op[i].input[k][0] == '_':
                        continue
                    #assert_allclose(int8_inputs[k], fp32_inputs[k], **tol)
                logging.warning("pass checking op[{0}] {1} input".format(
                    i,
                    model.net.Proto().op[i].type))
                logging.warning("begin to check op[{0}] {1} output".format(
                    i,
                    model.net.Proto().op[i].type))
                for j, int8_result in enumerate(int8_results):
                    if model.net.Proto().op[i].output[j][0] == '_':
                        continue
                    #logging.warning("int8_outputis {} and fp32 output is {} ".format(int8_results[j], fp32_results[j]))
                    #if not compare_utils.assert_allclose(int8_results[j], fp32_results[j], **tol):
                    if not compare_utils.assert_compare(
                            int8_result, fp32_results[j], 1e-01, cosim_alg):
                        for k, int8_input in enumerate(int8_inputs):
                            logging.warning("int8_input[{}] is {}".format(
                                k, int8_input))
                            logging.warning("fp32_input[{}] is {}".format(
                                k, fp32_inputs[k]))
                    #assert_allclose(int8_results[j], fp32_results[j], **tol)
                logging.warning("pass checking op[{0}] {1} output".format(
                    i,
                    model.net.Proto().op[i].type))
        else:
            workspace.RunNet(model.net.Proto().name)
    timers['run'].toc()
    timers['result'].tic()
    # Read out blobs
    if cfg.MODEL.FASTER_RCNN:
        rois = workspace.FetchBlob(core.ScopedName('rois'))
        # unscale back to raw image space
        boxes = rois[:, 1:5] / im_scale
        batch_indices = rois[:, 0]

    # Softmax class probabilities
    scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze()
    # In case there is 1 proposal
    scores = scores.reshape([-1, scores.shape[-1]])

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = workspace.FetchBlob(
            core.ScopedName('bbox_pred')).squeeze()
        # In case there is 1 proposal
        box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]])
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            # Remove predictions for bg class (compat with MSRA code)
            box_deltas = box_deltas[:, -4:]
        pred_boxes = box_utils.bbox_transform(boxes, box_deltas,
                                              cfg.MODEL.BBOX_REG_WEIGHTS)
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im[0].shape)
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            pred_boxes = np.tile(pred_boxes, (1, scores.shape[1]))
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]
    timers['result'].toc()
    return scores, pred_boxes, im_scale, batch_indices
예제 #9
0
def im_detect_bbox(model, im, timers=None):
    """Generate RetinaNet detections on a single image."""
    if timers is None:
        timers = defaultdict(Timer)
    # Although anchors are input independent and could be precomputed,
    # recomputing them per image only brings a small overhead
    anchors = _create_cell_anchors()
    timers['im_detect_bbox'].tic()
    k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
    A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS)
    inputs = {}
    inputs['data'], im_scale, inputs['im_info'] = \
        blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE)
    cls_probs, box_preds = [], []
    for lvl in range(k_min, k_max + 1):
        suffix = 'fpn{}'.format(lvl)
        cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix)))
        box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix)))
    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False))

    workspace.RunNet(model.net.Proto().name)
    cls_probs = workspace.FetchBlobs(cls_probs)
    box_preds = workspace.FetchBlobs(box_preds)

    # here the boxes_all are [x0, y0, x1, y1, score]
    boxes_all = defaultdict(list)

    cnt = 0
    for lvl in range(k_min, k_max + 1):
        # create cell anchors array
        stride = 2. ** lvl
        cell_anchors = anchors[lvl]

        # fetch per level probability
        cls_prob = cls_probs[cnt]
        box_pred = box_preds[cnt]
        cls_prob = cls_prob.reshape((
            cls_prob.shape[0], A, int(cls_prob.shape[1] / A),
            cls_prob.shape[2], cls_prob.shape[3]))
        box_pred = box_pred.reshape((
            box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3]))
        cnt += 1

        if cfg.RETINANET.SOFTMAX:
            cls_prob = cls_prob[:, :, 1::, :, :]

        cls_prob_ravel = cls_prob.ravel()
        # In some cases [especially for very small img sizes], it's possible that
        # candidate_ind is empty if we impose threshold 0.05 at all levels. This
        # will lead to errors since no detections are found for this image. Hence,
        # for lvl 7 which has small spatial resolution, we take the threshold 0.0
        th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0
        candidate_inds = np.where(cls_prob_ravel > th)[0]
        if (len(candidate_inds) == 0):
            continue

        pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds))
        inds = np.argpartition(
            cls_prob_ravel[candidate_inds], -pre_nms_topn)[-pre_nms_topn:]
        inds = candidate_inds[inds]

        inds_5d = np.array(np.unravel_index(inds, cls_prob.shape)).transpose()
        classes = inds_5d[:, 2]
        anchor_ids, y, x = inds_5d[:, 1], inds_5d[:, 3], inds_5d[:, 4]
        scores = cls_prob[:, anchor_ids, classes, y, x]

        boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32)
        boxes *= stride
        boxes += cell_anchors[anchor_ids, :]

        if not cfg.RETINANET.CLASS_SPECIFIC_BBOX:
            box_deltas = box_pred[0, anchor_ids, :, y, x]
        else:
            box_cls_inds = classes * 4
            box_deltas = np.vstack(
                [box_pred[0, ind:ind + 4, yi, xi]
                 for ind, yi, xi in zip(box_cls_inds, y, x)]
            )
        pred_boxes = (
            box_utils.bbox_transform(boxes, box_deltas)
            if cfg.TEST.BBOX_REG else boxes)
        pred_boxes /= im_scale
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
        box_scores = np.zeros((pred_boxes.shape[0], 5))
        box_scores[:, 0:4] = pred_boxes
        box_scores[:, 4] = scores

        for cls in range(1, cfg.MODEL.NUM_CLASSES):
            inds = np.where(classes == cls - 1)[0]
            if len(inds) > 0:
                boxes_all[cls].extend(box_scores[inds, :])
    timers['im_detect_bbox'].toc()

    # Combine predictions across all levels and retain the top scoring by class
    timers['misc_bbox'].tic()
    detections = []
    for cls, boxes in boxes_all.items():
        cls_dets = np.vstack(boxes).astype(dtype=np.float32)
        # do class specific nms here
        keep = box_utils.nms(cls_dets, cfg.TEST.NMS)
        cls_dets = cls_dets[keep, :]
        out = np.zeros((len(keep), 6))
        out[:, 0:5] = cls_dets
        out[:, 5].fill(cls)
        detections.append(out)

    # detections (N, 6) format:
    #   detections[:, :4] - boxes
    #   detections[:, 4] - scores
    #   detections[:, 5] - classes
    detections = np.vstack(detections)
    # sort all again
    inds = np.argsort(-detections[:, 4])
    detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :]

    # Convert the detections to image cls_ format (see core/test_engine.py)
    num_classes = cfg.MODEL.NUM_CLASSES
    cls_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)]
    for c in range(1, num_classes):
        inds = np.where(detections[:, 5] == c)[0]
        cls_boxes[c] = detections[inds, :5]
    timers['misc_bbox'].toc()

    return cls_boxes
예제 #10
0
def im_detect_bbox_given_features(model, features, im_info, im_scales,
                                  im_shape):
    """Bounding box object detection for provided features with given box proposals.

    Arguments:
        model (DetectionModelHelper): the detection model to use
        features (dictionary of ndarray): high level features from which to run detection

    Returns:
        scores (ndarray): R x K array of object class scores for K classes
            (K includes background as object category 0)
        boxes (ndarray): R x 4*K array of predicted bounding boxes
        im_scales (list): list of image scales used in the input blob (as
            returned by _get_blobs and for use with im_detect_mask, etc.)
    """
    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    # Function simply adapted to use the input features and forward through the
    # head rather than input images through the entire network.

    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes,
                                        return_index=True,
                                        return_inverse=True)
        inputs['rois'] = inputs['rois'][index, :]
        boxes = boxes[index, :]

    # Add multi-level rois for FPN
    if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN:
        _add_multilevel_rois_for_test(inputs, 'rois')

    blobs = copy.copy(features)
    blobs['im_info'] = im_info
    for k, v in blobs.items():
        workspace.FeedBlob(caffe2_core.ScopedName(k), v)
    workspace.RunNet(model.faster_rnn_head.Proto().name)

    # Read out blobs
    if cfg.MODEL.FASTER_RCNN:
        assert len(im_scales) == 1, \
            'Only single-image / single-scale batch implemented'
        rois = workspace.FetchBlob(caffe2_core.ScopedName('rois'))
        # unscale back to raw image space
        boxes = rois[:, 1:5] / im_scales[0]

    # use softmax estimated probabilities
    scores = workspace.FetchBlob(caffe2_core.ScopedName('cls_prob')).squeeze()

    # In case there is 1 proposal
    scores = scores.reshape([-1, scores.shape[-1]])

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = workspace.FetchBlob(
            caffe2_core.ScopedName('bbox_pred')).squeeze()
        # In case there is 1 proposal
        box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]])
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            # Remove predictions for bg class (compat with MSRA code)
            box_deltas = box_deltas[:, -4:]
        pred_boxes = box_utils.bbox_transform(boxes, box_deltas,
                                              cfg.MODEL.BBOX_REG_WEIGHTS)
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im_shape)
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            pred_boxes = np.tile(pred_boxes, (1, scores.shape[1]))
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    return scores, pred_boxes, im_scales
예제 #11
0
    def get_target_class_weights(inputs,outputs):
        
        # 'rois', 'label_mask', 'im_info', 'is_source', 'cls_pred', 'bbox_pred'
        
        import numpy as np
        import detectron.utils.boxes as box_utils
        
        rois =  inputs[0].data
        dc_mask = inputs[1].data.astype(bool)
        im_info = inputs[2].data
        is_source = inputs[3].data.astype(bool)
        cls_pred = inputs[4].data
        bbox_pred = inputs[5].data
        
        this_im_info = im_info[~is_source,:][0,:]
        im_shape = this_im_info[:2]
        im_scale = this_im_info[2]
        im_idx = int(this_im_info[3]) # im_info is extended with its index in the roidb.
        
        rois = rois[~dc_mask,:]
        boxes = rois[:, 1:5] / im_scale
        
        box_deltas = bbox_pred
        scores = cls_pred
        
        # if cfg.TEST.BBOX_REG:
        #     # Apply bounding-box regression deltas
        #     if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
        #         # Remove predictions for bg class (compat with MSRA code)
        #         box_deltas = box_deltas[:, -4:]
        
        pred_boxes = box_utils.bbox_transform(
            boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS
        )
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im_shape)
        
            # if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            #     pred_boxes = np.tile(pred_boxes, (1, scores.shape[1]))
        # else:
        #     # Simply repeat the boxes, once for each class
        #     pred_boxes = np.tile(boxes, (1, scores.shape[1]))
    
        # if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        #     # Map scores and predictions back to the original set of boxes
        #     scores = scores[inv_index, :]
        #     pred_boxes = pred_boxes[inv_index, :]

        # > scores, boxes <
        
        num_classes = cfg.MODEL.NUM_CLASSES
        # Apply threshold on detection probabilities and apply NMS
        # Skip j = 0, because it's the background class
	
        sum_softmax = np.zeros((num_classes,))
    
        for j in range(1, num_classes):
            inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
            scores_j = scores[inds, j]
            boxes_j = pred_boxes[inds, j * 4:(j + 1) * 4]
            dets_j = np.hstack((boxes_j, scores_j[:, np.newaxis])).astype(
                np.float32, copy=False
            )
            
            # if cfg.TEST.SOFT_NMS.ENABLED:
            #     nms_dets, _ = box_utils.soft_nms(
            #         dets_j,
            #         sigma=cfg.TEST.SOFT_NMS.SIGMA,
            #         overlap_thresh=cfg.TEST.NMS,
            #         score_thresh=0.0001,
            #         method=cfg.TEST.SOFT_NMS.METHOD
            #     )
            # else:
            
            keep = box_utils.nms(dets_j, cfg.TEST.NMS)
            nms_dets = dets_j[keep, :]
            
            # # Refine the post-NMS boxes using bounding-box voting
            # if cfg.TEST.BBOX_VOTE.ENABLED:
            #     nms_dets = box_utils.box_voting(
            #         nms_dets,
            #         dets_j,
            #         cfg.TEST.BBOX_VOTE.VOTE_TH,
            #         scoring_method=cfg.TEST.BBOX_VOTE.SCORING_METHOD
            #     )
            sum_softmax[j] = nms_dets[:,-1].sum()
        
        
        model.class_weight_db.update_class_weights(im_idx,sum_softmax)
예제 #12
0
파일: test.py 프로젝트: Jakaria08/Detectron
def im_detect_bbox(model, im, target_scale, target_max_size, boxes=None):
    """Bounding box object detection for an image with given box proposals.

    Arguments:
        model (DetectionModelHelper): the detection model to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals in 0-indexed
            [x1, y1, x2, y2] format, or None if using RPN

    Returns:
        scores (ndarray): R x K array of object class scores for K classes
            (K includes background as object category 0)
        boxes (ndarray): R x 4*K array of predicted bounding boxes
        im_scales (list): list of image scales used in the input blob (as
            returned by _get_blobs and for use with im_detect_mask, etc.)
    """
    inputs, im_scale = _get_blobs(im, boxes, target_scale, target_max_size)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(
            hashes, return_index=True, return_inverse=True
        )
        inputs['rois'] = inputs['rois'][index, :]
        boxes = boxes[index, :]

    # Add multi-level rois for FPN
    if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN:
        _add_multilevel_rois_for_test(inputs, 'rois')

    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v)
    workspace.RunNet(model.net.Proto().name)

    # Read out blobs
    if cfg.MODEL.FASTER_RCNN:
        rois = workspace.FetchBlob(core.ScopedName('rois'))
        # unscale back to raw image space
        boxes = rois[:, 1:5] / im_scale

    # Softmax class probabilities
    scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze()
    # In case there is 1 proposal
    scores = scores.reshape([-1, scores.shape[-1]])

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = workspace.FetchBlob(core.ScopedName('bbox_pred')).squeeze()
        # In case there is 1 proposal
        box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]])
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            # Remove predictions for bg class (compat with MSRA code)
            box_deltas = box_deltas[:, -4:]
        pred_boxes = box_utils.bbox_transform(
            boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS
        )
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
            pred_boxes = np.tile(pred_boxes, (1, scores.shape[1]))
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    return scores, pred_boxes, im_scale
예제 #13
0
def im_detect_bbox(model, im, timers=None, model1=None):
    """Generate RetinaNet detections on a single image."""
    if timers is None:
        timers = defaultdict(Timer)

    if model1 is None and os.environ.get('COSIM'):
        print("cosim must has model1")

    fp32_ws_name = "__fp32_ws__"
    int8_ws_name = "__int8_ws__"
    # Although anchors are input independent and could be precomputed,
    # recomputing them per image only brings a small overhead
    anchors = _create_cell_anchors()
    timers['im_detect_bbox'].tic()
    timers['data1'].tic()
    k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
    A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS)
    inputs = {}
    inputs['data'], im_scale, inputs['im_info'] = \
        blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, cfg.TEST.SIZEFIX)
    cls_probs, box_preds = [], []
    for lvl in range(k_min, k_max + 1):
        suffix = 'fpn{}'.format(lvl)
        cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix)))
        box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix)))
    for k, v in inputs.items():
        if os.environ.get('COSIM'):
            workspace.SwitchWorkspace(int8_ws_name, True)
        workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32,
                                                        copy=False))
        if os.environ.get('COSIM'):
            workspace.SwitchWorkspace(fp32_ws_name, True)
            workspace.FeedBlob(core.ScopedName(k),
                               v.astype(np.float32, copy=False))
    timers['data1'].toc()
    if os.environ.get('EPOCH2OLD') == "1":
        workspace.RunNet(model.net.Proto().name)
    timers['run'].tic()
    if os.environ.get('INT8INFO') == "1":
        algorithm = AbsmaxCalib()
        kind = os.environ.get('INT8CALIB')
        if kind == "moving_average":
            ema_alpha = 0.5
            algorithm = EMACalib(ema_alpha)
        elif kind == "kl_divergence":

            kl_iter_num_for_range = os.environ.get('INT8KLNUM')
            if not kl_iter_num_for_range:
                kl_iter_num_for_range = 100
            kl_iter_num_for_range = int(kl_iter_num_for_range)
            algorithm = KLCalib(kl_iter_num_for_range)
        calib = Calibrator(algorithm)
        calib.RunCalibIter(workspace, model.net.Proto())
    else:
        if os.environ.get('COSIM'):
            with open("int8.txt", "wb") as p:
                p.write(str(model.net.Proto()))
            with open("fp32.txt", "wb") as p:
                p.write(str(model1.net.Proto()))
            for i in range(len(model.net.Proto().op)):
                workspace.SwitchWorkspace(int8_ws_name)
                int8_inputs = []
                for inp in model.net.Proto().op[i].input:
                    int8_inputs.append(workspace.FetchBlob(str(inp)))
                logging.warning(" opint8[{0}] is  {1}".format(
                    i,
                    model.net.Proto().op[i]))
                workspace.RunOperatorOnce(model.net.Proto().op[i])
                int8_results = []
                for res in model.net.Proto().op[i].output:
                    int8_results.append(workspace.FetchBlob(str(res)))
                workspace.SwitchWorkspace(fp32_ws_name)
                fp32_inputs = []
                for inp1 in model1.net.Proto().op[i].input:
                    fp32_inputs.append(workspace.FetchBlob(str(inp1)))
                logging.warning(" opfp32[{0}] is  {1}".format(
                    i,
                    model1.net.Proto().op[i]))
                workspace.RunOperatorOnce(model1.net.Proto().op[i])
                fp32_results = []
                for res1 in model1.net.Proto().op[i].output:
                    fp32_results.append(workspace.FetchBlob(str(res1)))
                if len(int8_inputs) != len(fp32_inputs):
                    logging.error("Wrong number of inputs")
                    return
                if len(int8_results) != len(fp32_results):
                    logging.error("Wrong number of outputs")
                    return
                logging.warning("begin to check op[{}] {} input".format(
                    i,
                    model.net.Proto().op[i].type))
                for k in range(len(int8_inputs)):
                    if model.net.Proto().op[i].input[k][0] == '_':
                        continue
                    #assert_allclose(int8_inputs[k], fp32_inputs[k], **tol)
                logging.warning("pass checking op[{0}] {1} input".format(
                    i,
                    model.net.Proto().op[i].type))
                logging.warning("begin to check op[{0}] {1} output".format(
                    i,
                    model.net.Proto().op[i].type))
                for j, int8_result in enumerate(int8_results):
                    if model.net.Proto().op[i].output[j][0] == '_':
                        continue
                    #logging.warning("int8_outputis {} and fp32 output is {} ".format(int8_results[j], fp32_results[j]))
                    #if not compare_utils.assert_allclose(int8_results[j], fp32_results[j], **tol):
                    if not compare_utils.assert_compare(
                            int8_result, fp32_results[j], 1e-01,
                            os.environ.get('COSIM')):
                        for k, int8_input in enumerate(int8_inputs):
                            logging.warning("int8_input[{}] is {}".format(
                                k, int8_input))
                            logging.warning("fp32_input[{}] is {}".format(
                                k, fp32_inputs[k]))

                logging.warning("pass checking op[{0}] {1} output".format(
                    i,
                    model.net.Proto().op[i].type))
        else:
            workspace.RunNet(model.net.Proto().name)
    timers['run'].toc()
    cls_probs = workspace.FetchBlobs(cls_probs)
    box_preds = workspace.FetchBlobs(box_preds)

    # here the boxes_all are [x0, y0, x1, y1, score]
    boxes_all = defaultdict(list)

    batch_size = cls_probs[0].shape[0]
    boxes_all_list = [boxes_all] * batch_size
    cnt = 0
    for lvl in range(k_min, k_max + 1):
        # create cell anchors array
        stride = 2.**lvl
        cell_anchors = anchors[lvl]

        # fetch per level probability
        cls_prob = cls_probs[cnt]
        box_pred = box_preds[cnt]
        cls_prob = cls_prob.reshape(
            (cls_prob.shape[0], A, int(cls_prob.shape[1] / A),
             cls_prob.shape[2], cls_prob.shape[3]))
        box_pred = box_pred.reshape(
            (box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3]))
        cnt += 1

        if cfg.RETINANET.SOFTMAX:
            cls_prob = cls_prob[:, :, 1::, :, :]

        for i in range(batch_size):
            cls_prob_ravel = cls_prob[i, :].ravel()

            # In some cases [especially for very small img sizes], it's possible that
            # candidate_ind is empty if we impose threshold 0.05 at all levels. This
            # will lead to errors since no detections are found for this image. Hence,
            # for lvl 7 which has small spatial resolution, we take the threshold 0.0
            th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0
            candidate_inds = np.where(cls_prob_ravel > th)[0]
            if (len(candidate_inds) == 0):
                continue

            pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N,
                               len(candidate_inds))
            inds = np.argpartition(cls_prob_ravel[candidate_inds],
                                   -pre_nms_topn)[-pre_nms_topn:]
            inds = candidate_inds[inds]

            inds_4d = np.array(np.unravel_index(
                inds, (cls_prob[i, :]).shape)).transpose()
            classes = inds_4d[:, 1]
            anchor_ids, y, x = inds_4d[:, 0], inds_4d[:, 2], inds_4d[:, 3]
            scores = cls_prob[i, anchor_ids, classes, y, x]
            boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32)
            boxes *= stride
            boxes += cell_anchors[anchor_ids, :]

            if not cfg.RETINANET.CLASS_SPECIFIC_BBOX:
                box_deltas = box_pred[i, anchor_ids, :, y, x]
            else:
                box_cls_inds = classes * 4
                box_deltas = np.vstack([
                    box_pred[i, ind:ind + 4, yi, xi]
                    for ind, yi, xi in zip(box_cls_inds, y, x)
                ])
            pred_boxes = (box_utils.bbox_transform(boxes, box_deltas)
                          if cfg.TEST.BBOX_REG else boxes)
            pred_boxes /= im_scale
            pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im[0].shape)
            box_scores = np.zeros((pred_boxes.shape[0], 5))
            box_scores[:, 0:4] = pred_boxes
            box_scores[:, 4] = scores

            for cls in range(1, cfg.MODEL.NUM_CLASSES):
                inds = np.where(classes == cls - 1)[0]
                if len(inds) > 0:
                    boxes_all_list[i][cls].extend(box_scores[inds, :])

    timers['im_detect_bbox'].toc()

    cls_boxes_list = []
    for i in range(batch_size):
        boxes_all = boxes_all_list[i]
        # Combine predictions across all levels and retain the top scoring by class
        timers['misc_bbox'].tic()
        detections = []
        for cls, boxes in boxes_all.items():
            cls_dets = np.vstack(boxes).astype(dtype=np.float32)
            # do class specific nms here
            keep = box_utils.nms(cls_dets, cfg.TEST.NMS)
            cls_dets = cls_dets[keep, :]
            out = np.zeros((len(keep), 6))
            out[:, 0:5] = cls_dets
            out[:, 5].fill(cls)
            detections.append(out)

        # detections (N, 6) format:
        #   detections[:, :4] - boxes
        #   detections[:, 4] - scores
        #   detections[:, 5] - classes
        detections = np.vstack(detections)
        # sort all again
        inds = np.argsort(-detections[:, 4])
        detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :]

        # Convert the detections to image cls_ format (see core/test_engine.py)
        num_classes = cfg.MODEL.NUM_CLASSES
        cls_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)]
        for c in range(1, num_classes):
            inds = np.where(detections[:, 5] == c)[0]
            cls_boxes[c] = detections[inds, :5]
        cls_boxes_list.append(cls_boxes)

    timers['misc_bbox'].toc()

    return cls_boxes_list