Beispiel #1
0
def box_results_with_nms_and_limit(scores,
                                   boxes):  # NOTE: support single-batch
    """Returns bounding-box detection results by thresholding on scores and
    applying non-maximum suppression (NMS).

    `boxes` has shape (#detections, 4 * #classes), where each row represents
    a list of predicted bounding boxes for each of the object classes in the
    dataset (including the background class). The detections in each row
    originate from the same object proposal.

    `scores` has shape (#detection, #classes), where each row represents a list
    of object detection confidence scores for each of the object classes in the
    dataset (including the background class). `scores[i, j]`` corresponds to the
    box at `boxes[i, j * 4:(j + 1) * 4]`.
    """
    num_classes = cfg.MODEL.NUM_CLASSES
    cls_boxes = [[] for _ in range(num_classes)]
    # Apply threshold on detection probabilities and apply NMS
    # Skip j = 0, because it's the background class
    for j in range(1, num_classes):
        inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
        scores_j = scores[inds, j]
        boxes_j = boxes[inds, j * 4:(j + 1) * 4]
        dets_j = np.hstack((boxes_j, scores_j[:,
                                              np.newaxis])).astype(np.float32,
                                                                   copy=False)
        if cfg.TEST.USE_GT_PROPOSALS:
            nms_dets = dets_j
        elif cfg.TEST.SOFT_NMS.ENABLED:
            nms_dets, _ = box_utils.soft_nms(dets_j,
                                             sigma=cfg.TEST.SOFT_NMS.SIGMA,
                                             overlap_thresh=cfg.TEST.NMS,
                                             score_thresh=0.0001,
                                             method=cfg.TEST.SOFT_NMS.METHOD)
        else:
            keep = box_utils.nms(dets_j, cfg.TEST.NMS)
            nms_dets = dets_j[keep, :]
        # Refine the post-NMS boxes using bounding-box voting
        if cfg.TEST.BBOX_VOTE.ENABLED:
            nms_dets = box_utils.box_voting(
                nms_dets,
                dets_j,
                cfg.TEST.BBOX_VOTE.VOTE_TH,
                scoring_method=cfg.TEST.BBOX_VOTE.SCORING_METHOD)
        cls_boxes[j] = nms_dets

    # Limit to max_per_image detections **over all classes**
    if cfg.TEST.DETECTIONS_PER_IM > 0:
        image_scores = np.hstack(
            [cls_boxes[j][:, -1] for j in range(1, num_classes)])
        if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
            image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
            for j in range(1, num_classes):
                keep = np.where(cls_boxes[j][:, -1] >= image_thresh)[0]
                cls_boxes[j] = cls_boxes[j][keep, :]

    im_results = np.vstack([cls_boxes[j] for j in range(1, num_classes)])
    boxes = im_results[:, :-1]
    scores = im_results[:, -1]
    return scores, boxes, cls_boxes
Beispiel #2
0
def box_results_with_nms_and_limit(scores, boxes):  # NOTE: support single-batch
    """Returns bounding-box detection results by thresholding on scores and
    applying non-maximum suppression (NMS).

    `boxes` has shape (#detections, 4 * #classes), where each row represents
    a list of predicted bounding boxes for each of the object classes in the
    dataset (including the background class). The detections in each row
    originate from the same object proposal.

    `scores` has shape (#detection, #classes), where each row represents a list
    of object detection confidence scores for each of the object classes in the
    dataset (including the background class). `scores[i, j]`` corresponds to the
    box at `boxes[i, j * 4:(j + 1) * 4]`.
    """
    num_classes = cfg.MODEL.NUM_CLASSES
    cls_boxes = [[] for _ in range(num_classes)]
    # Apply threshold on detection probabilities and apply NMS
    # Skip j = 0, because it's the background class
    for j in range(1, num_classes):
        inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
        scores_j = scores[inds, j]
        boxes_j = boxes[inds, j * 4:(j + 1) * 4]
        dets_j = np.hstack((boxes_j, scores_j[:, np.newaxis])).astype(np.float32, copy=False)
        if cfg.TEST.SOFT_NMS.ENABLED:
            nms_dets, _ = box_utils.soft_nms(
                dets_j,
                sigma=cfg.TEST.SOFT_NMS.SIGMA,
                overlap_thresh=cfg.TEST.NMS,
                score_thresh=0.0001,
                method=cfg.TEST.SOFT_NMS.METHOD
            )
        else:
            keep = box_utils.nms(dets_j, cfg.TEST.NMS)
            nms_dets = dets_j[keep, :]
        # Refine the post-NMS boxes using bounding-box voting
        if cfg.TEST.BBOX_VOTE.ENABLED:
            nms_dets = box_utils.box_voting(
                nms_dets,
                dets_j,
                cfg.TEST.BBOX_VOTE.VOTE_TH,
                scoring_method=cfg.TEST.BBOX_VOTE.SCORING_METHOD
            )
        cls_boxes[j] = nms_dets

    # Limit to max_per_image detections **over all classes**
    if cfg.TEST.DETECTIONS_PER_IM > 0:
        image_scores = np.hstack(
            [cls_boxes[j][:, -1] for j in range(1, num_classes)]
        )
        if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
            image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
            for j in range(1, num_classes):
                keep = np.where(cls_boxes[j][:, -1] >= image_thresh)[0]
                cls_boxes[j] = cls_boxes[j][keep, :]

    im_results = np.vstack([cls_boxes[j] for j in range(1, num_classes)])
    boxes = im_results[:, :-1]
    scores = im_results[:, -1]
    return scores, boxes, cls_boxes
Beispiel #3
0
def run_task(start_img_id, end_img_id, cls_id):
    # print("Task from {} to {}".format(start_img_id, end_img_id))
    ret_boxes = []
    ret_segms = []
    for i in range(11):
        ret_boxes.append([])
        ret_segms.append([])
        for j in range(end_img_id - start_img_id):
            ret_boxes[i].append([])
            ret_segms[i].append([])

    for img_id in range(start_img_id, end_img_id):
        if len(all_segms[cls_id]) != 0:
            if len(all_segms[cls_id][img_id]) != 0:
                segms = all_segms[cls_id][img_id]
                boxes = all_boxes[cls_id][img_id]
                # nms
                nms_start_time = time.time()
                if BBOX_NMS:
                    boxes = np.array(boxes).astype(np.float32, copy=False)
                    keep = box_utils.nms(boxes, 0.5)
                else:
                    keep = rle_mask_nms(segms, boxes, 0.5, mode='IOU')

                # nms_end_time = time.time()
                # print('nms spend {:.2f}s'.format(nms_end_time - nms_start_time))

                top_boxes = boxes[keep, :]
                top_segms = []
                for index in keep:
                    top_segms.append(segms[index])

                vote_start_time = time.time()
                # mask_vote
                # top_segms = rle_mask_voting(
                #     top_segms,
                #     segms,
                #     boxes,
                #     0.9,
                #     0.5
                # )

                # trans from byte to str for json format
                if not top_segms is None and len(top_segms) > 0:
                    for id, s in enumerate(top_segms):
                        if type(s['counts']) == str:
                            top_segms[id]['counts'] = s['counts']
                        else:
                            top_segms[id]['counts'] = str(s['counts'], 'utf-8')
                vote_end_time = time.time()
                print('Img:{} cls:{} vote spend {:.2f}s'.format(
                    img_id, cls_id, vote_end_time - vote_start_time))
                ret_boxes[cls_id][img_id - start_img_id].append(top_boxes)
                ret_segms[cls_id][img_id - start_img_id].append(top_segms)

    return ret_boxes, ret_segms
Beispiel #4
0
def box_results_with_nms_and_limit_return_keep(
        scores, boxes):  # NOTE: support single-batch
    """Returns bounding-box detection results by thresholding on scores and
    applying non-maximum suppression (NMS).

    `boxes` has shape (#detections, 4 * #classes), where each row represents
    a list of predicted bounding boxes for each of the object classes in the
    dataset (including the background class). The detections in each row
    originate from the same object proposal.

    `scores` has shape (#detection, #classes), where each row represents a list
    of object detection confidence scores for each of the object classes in the
    dataset (including the background class). `scores[i, j]`` corresponds to the
    box at `boxes[i, j * 4:(j + 1) * 4]`.
    """
    num_classes = cfg.MODEL.NUM_CLASSES
    cls_boxes = [[] for _ in range(num_classes)]
    return_keep = [[] for _ in range(num_classes)]
    # Apply threshold on detection probabilities and apply NMS
    # Skip j = 0, because it's the background class
    for j in range(1, num_classes):
        # pdb.set_trace()
        inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
        scores_j = scores[inds, j]
        boxes_j = boxes[inds, j * 4:(j + 1) * 4]
        dets_j = np.hstack((boxes_j, scores_j[:,
                                              np.newaxis])).astype(np.float32,
                                                                   copy=False)
        keep = box_utils.nms(dets_j, cfg.TEST.NMS)
        nms_dets = dets_j[keep, :]
        cls_boxes[j] = nms_dets
        return_keep[j] = inds[keep]

    # Limit to max_per_image detections **over all classes**
    # if cfg.TEST.DETECTIONS_PER_IM > 0:
    #     image_scores = np.hstack(
    #         [cls_boxes[j][:, -1] for j in range(1, num_classes)]
    #     )
    #     if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
    #         image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
    #         for j in range(1, num_classes):
    #             keep = np.where(cls_boxes[j][:, -1] >= image_thresh)[0]
    #             return_keep[j] = return_keep[j][keep]
    #             cls_boxes[j] = cls_boxes[j][keep, :]

    # pdb.set_trace()
    return_keep_np_re = []
    im_results = np.vstack([cls_boxes[j] for j in range(1, num_classes)])
    for j in range(1, num_classes):
        return_keep_np_re.extend(list(return_keep[j]))

    return_keep_np = np.array(return_keep_np_re)
    boxes = im_results[:, :-1]
    scores = im_results[:, -1]
    return scores, boxes, cls_boxes, return_keep_np
    def box_results_with_nms_and_limit(self,
                                       scores,
                                       boxes,
                                       score_thresh=cfg.TEST.SCORE_THRESH):
        num_classes = cfg.MODEL.NUM_CLASSES
        cls_boxes = [[] for _ in range(num_classes)]
        # Apply threshold on detection probabilities and apply NMS
        # Skip j = 0, because it's the background class
        for j in range(1, num_classes):
            inds = np.where(scores[:, j] > score_thresh)[0]
            scores_j = scores[inds, j]
            boxes_j = boxes[inds, j * 4:(j + 1) * 4]
            dets_j = np.hstack(
                (boxes_j, scores_j[:, np.newaxis])).astype(np.float32,
                                                           copy=False)
            if cfg.TEST.SOFT_NMS.ENABLED:
                nms_dets, _ = box_utils.soft_nms(
                    dets_j,
                    sigma=cfg.TEST.SOFT_NMS.SIGMA,
                    overlap_thresh=cfg.TEST.NMS,
                    score_thresh=0.0001,
                    method=cfg.TEST.SOFT_NMS.METHOD)
            else:
                keep = box_utils.nms(dets_j, cfg.TEST.NMS)
                nms_dets = dets_j[keep, :]
            # add labels
            label_j = np.ones((nms_dets.shape[0], 1), dtype=np.float32) * j
            nms_dets = np.hstack((nms_dets, label_j))
            # Refine the post-NMS boxes using bounding-box voting
            if cfg.TEST.BBOX_VOTE.ENABLED:
                nms_dets = box_utils.box_voting(
                    nms_dets,
                    dets_j,
                    cfg.TEST.BBOX_VOTE.VOTE_TH,
                    scoring_method=cfg.TEST.BBOX_VOTE.SCORING_METHOD)
            cls_boxes[j] = nms_dets

        # Limit to max_per_image detections **over all classes**
        if cfg.TEST.DETECTIONS_PER_IM > 0:
            image_scores = np.hstack(
                [cls_boxes[j][:, -2] for j in range(1, num_classes)])
            if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
                image_thresh = np.sort(
                    image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
                for j in range(1, num_classes):
                    keep = np.where(cls_boxes[j][:, -2] >= image_thresh)[0]
                    cls_boxes[j] = cls_boxes[j][keep, :]

        im_results = np.vstack([cls_boxes[j] for j in range(1, num_classes)])
        boxes = im_results[:, :-2]
        scores = im_results[:, -2]
        labels = im_results[:, -1]

        return scores, boxes, labels
Beispiel #6
0
 def get_nms_preds(self, cls_scores, det_boxes_all):
     # probs = F.softmax(cls_scores, -1).data.cpu().numpy()
     probs = cls_scores
     nms_mask = np.zeros_like(probs)
     for c in range(1, probs.shape[-1]):
         s_c = probs[:, c]
         boxes_c = det_boxes_all[:, c]
         dets_c = np.hstack((boxes_c, s_c[:, np.newaxis])).astype(np.float32, copy=False)
         keep = box_utils.nms(dets_c, cfg.TEST.NMS)
         nms_mask[:, c][keep] = 1
     obj_preds = (nms_mask * probs)[:, 1:].argmax(-1) + 1
     return obj_preds
Beispiel #7
0
def iou_box_nms_and_limit(stage1_box, stage1_iou, dets_cls, scores):
    num_classes = cfg.MODEL.NUM_CLASSES
    cls_boxes = [[] for _ in range(num_classes)]
    for j in range(1, num_classes):
        inds = np.array(dets_cls[str(j)], dtype=np.int)
        if not inds.tolist():
            boxes_j = np.empty((0, 4), dtype=np.float32)
        else:
            boxes_j = stage1_box[inds]
        iou_j = stage1_iou[inds]
        score_j = scores[inds]
        dets_j = np.hstack((boxes_j, iou_j[:, np.newaxis])).astype(np.float32,
                                                                   copy=False)
        if cfg.TEST.SOFT_NMS.ENABLED:
            nms_dets, _ = box_utils.soft_nms(dets_j,
                                             sigma=cfg.TEST.SOFT_NMS.SIGMA,
                                             overlap_thresh=cfg.TEST.NMS,
                                             score_thresh=0.0001,
                                             method=cfg.TEST.SOFT_NMS.METHOD)
        else:
            keep = box_utils.nms(dets_j, cfg.TEST.NMS)
            boxes_j = boxes_j[keep]
            score_j = score_j[keep]
            nms_dets = np.hstack(
                (boxes_j, score_j[:, np.newaxis])).astype(np.float32,
                                                          copy=False)

        # Refine the post-NMS boxes using bounding-box voting
        if cfg.TEST.BBOX_VOTE.ENABLED:
            nms_dets = box_utils.box_voting(
                nms_dets,
                dets_j,
                cfg.TEST.BBOX_VOTE.VOTE_TH,
                scoring_method=cfg.TEST.BBOX_VOTE.SCORING_METHOD)
        cls_boxes[j] = nms_dets

    # Limit to max_per_image detections **over all classes**
    if cfg.TEST.DETECTIONS_PER_IM > 0:
        image_scores = np.hstack(
            [cls_boxes[j][:, -1] for j in range(1, num_classes)])
        if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
            image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
            for j in range(1, num_classes):
                keep = np.where(cls_boxes[j][:, -1] >= image_thresh)[0]
                cls_boxes[j] = cls_boxes[j][keep, :]

    im_results = np.vstack([cls_boxes[j] for j in range(1, num_classes)])
    boxes = im_results[:, :-1]
    scores = im_results[:, -1]
    return scores, boxes, cls_boxes
Beispiel #8
0
def get_detections_from_im(cfg,
                           model,
                           im,
                           image_id,
                           featmap_blob_name,
                           feat_blob_name,
                           MIN_BOXES,
                           MAX_BOXES,
                           conf_thresh=0.2,
                           bboxes=None):

    with c2_utils.NamedCudaScope(0):
        scores, cls_boxes, im_scale = infer_engine.im_detect_bbox(
            model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes=bboxes)
        region_feat = workspace.FetchBlob(feat_blob_name)
        cls_prob = workspace.FetchBlob("gpu_0/cls_prob")
        rois = workspace.FetchBlob("gpu_0/rois")
        max_conf = np.zeros((rois.shape[0]))
        # unscale back to raw image space
        cls_boxes = rois[:, 1:5] / im_scale

        for cls_ind in range(1, cls_prob.shape[1]):
            cls_scores = scores[:, cls_ind]
            dets = np.hstack(
                (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
            keep = np.array(nms(dets, cfg.TEST.NMS))
            max_conf[keep] = np.where(cls_scores[keep] > max_conf[keep],
                                      cls_scores[keep], max_conf[keep])

        keep_boxes = np.where(max_conf >= conf_thresh)[0]
        if len(keep_boxes) < MIN_BOXES:
            keep_boxes = np.argsort(max_conf)[::-1][:MIN_BOXES]
        elif len(keep_boxes) > MAX_BOXES:
            keep_boxes = np.argsort(max_conf)[::-1][:MAX_BOXES]
        objects = np.argmax(cls_prob[keep_boxes], axis=1)
        obj_prob = np.amax(cls_prob[keep_boxes],
                           axis=1)  # proposal not in order!

    assert (np.sum(objects >= 1601) == 0)

    return {
        "image_id": image_id,
        "image_h": np.size(im, 0),
        "image_w": np.size(im, 1),
        'num_boxes': len(keep_boxes),
        'boxes': cls_boxes[keep_boxes],
        'region_feat': region_feat[keep_boxes],
        'object': objects,
        'obj_prob': obj_prob
    }
Beispiel #9
0
    def forward(self, loc_data, conf_data, prior_data):
        """
        Args:
            loc_data: (tensor) Loc preds from loc layers
                Shape: [batch,num_priors*4]
            conf_data: (tensor) Shape: Conf preds from conf layers
                Shape: [batch*num_priors,num_classes]
            prior_data: (tensor) Prior boxes and variances from priorbox layers
                Shape: [1,num_priors,4]
        """
        num = loc_data.size(0)  # batch size
        num_priors = prior_data.size(0)
        output = torch.zeros(num, self.num_classes, self.top_k, 5)
        conf_preds = conf_data.view(num, num_priors,
                                    self.num_classes).transpose(2, 1)

        # Decode predictions into bboxes.
        for i in range(num):

            decoded_boxes = decode(loc_data[i], prior_data, self.variance)
            # For each class, perform nms
            conf_scores = conf_preds[i].clone()
            # print('*****:', i)
            for cl in range(1, self.num_classes):
                # print('$$$$$$$:', cl)
                t0 = time.time()
                c_mask = conf_scores[cl].gt(self.conf_thresh)
                scores = conf_scores[cl][c_mask]
                # print('score: ', scores)
                # print('score dimension: ', scores.dim())
                # if scores.dim() == 0:
                #     continue
                if len(scores) == 0:
                    continue
                l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
                boxes = decoded_boxes[l_mask].view(-1, 4)
                # idx of highest scoring and non-overlapping boxes per class
                ids, count = nms(boxes, scores, self.nms_thresh, self.top_k)
                output[i, cl, :count] = \
                    torch.cat((scores[ids[:count]].unsqueeze(1),
                               boxes[ids[:count]]), 1)
                t1 = time.time()
                # print('timer_new: %.4f sec.' % (t1 - t0),flush=True)

        flt = output.contiguous().view(num, -1, 5)
        _, idx = flt[:, :, 0].sort(1, descending=True)
        _, rank = idx.sort(1)
        flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0)
        return output
Beispiel #10
0
def get_detections_from_im(
    cfg,
    model,
    im,
    image_id,
    feat_blob_name,
    MIN_BOXES,
    MAX_BOXES,
    background=False,
    conf_thresh=0.2,
    bboxes=None,
):

    with c2_utils.NamedCudaScope(0):
        scores, cls_boxes, im_scale = infer_engine.im_detect_bbox(
            model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes=bboxes
        )
        box_features = workspace.FetchBlob(feat_blob_name)
        cls_prob = workspace.FetchBlob("gpu_0/cls_prob")
        rois = workspace.FetchBlob("gpu_0/rois")
        max_conf = np.zeros((rois.shape[0]))
        # unscale back to raw image space
        cls_boxes = rois[:, 1:5] / im_scale

        start_index = 1
        # Column 0 of the scores matrix is for the background class
        if background:
            start_index = 0
        for cls_ind in range(start_index, cls_prob.shape[1]):
            cls_scores = scores[:, cls_ind]
            dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
            keep = np.array(nms(dets, cfg.TEST.NMS))
            max_conf[keep] = np.where(
                cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep]
            )

        keep_boxes = np.where(max_conf >= conf_thresh)[0]
        if len(keep_boxes) < MIN_BOXES:
            keep_boxes = np.argsort(max_conf)[::-1][:MIN_BOXES]
        elif len(keep_boxes) > MAX_BOXES:
            keep_boxes = np.argsort(max_conf)[::-1][:MAX_BOXES]
        # Predict the class label using the scores
        objects = np.argmax(cls_prob[keep_boxes][start_index:], axis=1)

    return box_features[keep_boxes]
 def get_nms_preds(self, cls_scores, det_rois, softmax=True):
     if softmax:
         probs = F.softmax(cls_scores, -1).data.cpu().numpy()
     else:
         probs = cls_scores.data.cpu().numpy()
     # probs = cls_scores
     # det_boxes_all = np.tile(np.expand_dims(det_rois[:, 1:], 1), (1, probs.shape[0], 1))
     nms_mask = np.zeros_like(probs)
     for c in range(1, probs.shape[-1]):
         s_c = probs[:, c]
         boxes_c = det_rois[:, 1:]
         dets_c = np.hstack((boxes_c, s_c[:,
                                          np.newaxis])).astype(np.float32,
                                                               copy=False)
         keep = box_utils.nms(dets_c, cfg.TEST.NMS)
         nms_mask[:, c][keep] = 1
     obj_preds = (nms_mask * probs)[:, 1:].argmax(-1) + 1
     return obj_preds
Beispiel #12
0
    def box_results_with_nms_and_limit(self, scores, boxes, score_thresh=cfg.TEST.SCORE_THRESH):
        num_classes = cfg.MODEL.NUM_CLASSES
        cls_boxes = [[] for _ in range(num_classes)]
        # Apply threshold on detection probabilities and apply NMS
        # Skip j = 0, because it's the background class
        nms_mask = np.zeros_like(scores)
        for j in range(1, num_classes):
            inds = np.where(scores[:, j] > score_thresh)[0]
            scores_j = scores[inds, j]
            boxes_j = boxes[inds, j * 4:(j + 1) * 4]
            dets_j = np.hstack((boxes_j, scores_j[:, np.newaxis])).astype(np.float32, copy=False)
            if cfg.TEST.SOFT_NMS.ENABLED:
                nms_dets, keep = box_utils.soft_nms(
                    dets_j,
                    sigma=cfg.TEST.SOFT_NMS.SIGMA,
                    overlap_thresh=cfg.TEST.NMS,
                    score_thresh=0.0001,
                    method=cfg.TEST.SOFT_NMS.METHOD
                )
            else:
                keep = box_utils.nms(dets_j, cfg.TEST.NMS)
                nms_dets = dets_j[keep, :]
            nms_mask[:, j][keep] = 1.0
            # add labels
            
            # Refine the post-NMS boxes using bounding-box voting
            

        dists_all = nms_mask * scores

        scores_pre, labels_pre = dists_all.max(-1), dists_all.argmax(-1)
        inds_all = np.where(scores_pre > 0)[0]
        labels_all = labels_pre[inds_all]
        scores_all = scores_pre[inds_all]

        idx = np.argsort(-scores_all)
        if cfg.TEST.DETECTIONS_PER_IM < idx.shape[0]:
            idx = idx[:cfg.TEST.DETECTIONS_PER_IM]
        
        scores = scores_all[idx]
        labels = labels_all[idx]

        return scores, idx, labels
def collect(inputs, is_training):
    cfg_key = 'TRAIN' if is_training else 'TEST'
    pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
    nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
    k_max = cfg.FPN.RPN_MAX_LEVEL
    k_min = cfg.FPN.RPN_MIN_LEVEL
    num_lvls = k_max - k_min + 1
    roi_inputs = inputs[:num_lvls]
    score_inputs = inputs[num_lvls:]
    if is_training:
        score_inputs = score_inputs[:-2]

    # rois are in [[batch_idx, x0, y0, x1, y2], ...] format
    # Combine predictions across all levels and retain the top scoring
    rois = np.concatenate([blob.data for blob in roi_inputs])
    scores = np.concatenate([blob.data for blob in score_inputs]).squeeze()
    if 0:
        inds = np.argsort(-scores)[:post_nms_topN]
        rois = rois[inds, :]
    else:
        if pre_nms_topN <= 0 or pre_nms_topN >= len(scores):
            order = np.argsort(-scores.squeeze())
        else:
            # Avoid sorting possibly large arrays; First partition to get top K
            # unsorted and then sort just those (~20x faster for 200k scores)
            inds = np.argpartition(-scores.squeeze(),
                                   pre_nms_topN)[:pre_nms_topN]
            order = np.argsort(-scores[inds].squeeze())
            order = inds[order]
        proposals = rois[order, 1:]
        scores = scores[order].reshape((-1, 1))
        ids = rois[order, 0].reshape((-1, 1))
        if nms_thresh > 0:
            keep = box_utils.nms(np.hstack((proposals, scores)), nms_thresh)
            if post_nms_topN > 0:
                keep = keep[:post_nms_topN]
            proposals = proposals[keep, :]
            scores = scores[keep]
            ids = ids[keep]
        rois = np.hstack((ids, proposals))
    return rois
Beispiel #14
0
def im_detections(model, im, anchors):
    """Generate RetinaNet detections on a single image."""
    k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
    A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS)
    inputs = {}
    inputs['data'], inputs['im_info'] = _get_image_blob(im)
    cls_probs, box_preds = [], []
    for lvl in range(k_min, k_max + 1):
        suffix = 'fpn{}'.format(lvl)
        cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix)))
        box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix)))
    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False))

    workspace.RunNet(model.net.Proto().name)
    scale = inputs['im_info'][0, 2]
    cls_probs = workspace.FetchBlobs(cls_probs)
    box_preds = workspace.FetchBlobs(box_preds)

    # here the boxes_all are [x0, y0, x1, y1, score]
    boxes_all = defaultdict(list)

    cnt = 0
    for lvl in range(k_min, k_max + 1):
        # create cell anchors array
        stride = 2. ** lvl
        cell_anchors = anchors[lvl]

        # fetch per level probability
        cls_prob = cls_probs[cnt]
        box_pred = box_preds[cnt]
        cls_prob = cls_prob.reshape((
            cls_prob.shape[0], A, int(cls_prob.shape[1] / A),
            cls_prob.shape[2], cls_prob.shape[3]))
        box_pred = box_pred.reshape((
            box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3]))
        cnt += 1

        if cfg.RETINANET.SOFTMAX:
            cls_prob = cls_prob[:, :, 1::, :, :]

        cls_prob_ravel = cls_prob.ravel()
        # In some cases [especially for very small img sizes], it's possible that
        # candidate_ind is empty if we impose threshold 0.05 at all levels. This
        # will lead to errors since no detections are found for this image. Hence,
        # for lvl 7 which has small spatial resolution, we take the threshold 0.0
        th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0
        candidate_inds = np.where(cls_prob_ravel > th)[0]
        if (len(candidate_inds) == 0):
            continue

        pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds))
        inds = np.argpartition(
            cls_prob_ravel[candidate_inds], -pre_nms_topn)[-pre_nms_topn:]
        inds = candidate_inds[inds]

        inds_5d = np.array(np.unravel_index(inds, cls_prob.shape)).transpose()
        classes = inds_5d[:, 2]
        anchor_ids, y, x = inds_5d[:, 1], inds_5d[:, 3], inds_5d[:, 4]
        scores = cls_prob[:, anchor_ids, classes, y, x]

        boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32)
        boxes *= stride
        boxes += cell_anchors[anchor_ids, :]

        if not cfg.RETINANET.CLASS_SPECIFIC_BBOX:
            box_deltas = box_pred[0, anchor_ids, :, y, x]
        else:
            box_cls_inds = classes * 4
            box_deltas = np.vstack(
                [box_pred[0, ind:ind + 4, yi, xi]
                 for ind, yi, xi in zip(box_cls_inds, y, x)]
            )
        pred_boxes = (
            box_utils.bbox_transform(boxes, box_deltas)
            if cfg.TEST.BBOX_REG else boxes)
        pred_boxes /= scale
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
        box_scores = np.zeros((pred_boxes.shape[0], 5))
        box_scores[:, 0:4] = pred_boxes
        box_scores[:, 4] = scores

        for cls in range(1, cfg.MODEL.NUM_CLASSES):
            inds = np.where(classes == cls - 1)[0]
            if len(inds) > 0:
                boxes_all[cls].extend(box_scores[inds, :])

    # Combine predictions across all levels and retain the top scoring by class
    detections = []
    for cls, boxes in boxes_all.items():
        cls_dets = np.vstack(boxes).astype(dtype=np.float32)
        # do class specific nms here
        keep = box_utils.nms(cls_dets, cfg.TEST.NMS)
        cls_dets = cls_dets[keep, :]
        out = np.zeros((len(keep), 6))
        out[:, 0:5] = cls_dets
        out[:, 5].fill(cls)
        detections.append(out)

    detections = np.vstack(detections)
    # sort all again
    inds = np.argsort(-detections[:, 4])
    detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :]
    boxes = detections[:, 0:4]
    scores = detections[:, 4]
    classes = detections[:, 5]
    return boxes, scores, classes
    def forward(self, rpn_cls_probs, rpn_bbox_pred, im_height, im_width, scaling_factor, spatial_scale=None):
        if spatial_scale is None:  
            spatial_scale = self._spatial_scale
        """See modeling.detector.GenerateProposals for inputs/outputs
        documentation.
        """
        # 1. for each location i in a (H, W) grid:
        #      generate A anchor boxes centered on cell i
        #      apply predicted bbox deltas to each of the A anchors at cell i
        # 2. clip predicted boxes to image
        # 3. remove predicted boxes with either height or width < threshold
        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take the top pre_nms_topN proposals before NMS
        # 6. apply NMS with a loose threshold (0.7) to the remaining proposals
        # 7. take after_nms_topN proposals after NMS
        # 8. return the top proposals
        
        # 1. get anchors at all features positions
        all_anchors_np = self.get_all_anchors(num_images = rpn_cls_probs.shape[0],
                                      feature_height = rpn_cls_probs.shape[2],
                                      feature_width = rpn_cls_probs.shape[3],
                                      spatial_scale = spatial_scale)
        
        all_anchors = Variable(torch.FloatTensor(all_anchors_np))
        if rpn_cls_probs.is_cuda:
            all_anchors = all_anchors.cuda()
    
        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #   - bbox deltas will be (4 * A, H, W) format from conv output
        #   - transpose to (H, W, 4 * A)
        #   - reshape to (H * W * A, 4) where rows are ordered by (H, W, A)
        #     in slowest to fastest order to match the enumerated anchors
        bbox_deltas = rpn_bbox_pred.squeeze(0).permute(1, 2, 0).contiguous().view(-1, 4)
        bbox_deltas_np = bbox_deltas.cpu().data.numpy()

        # Same story for the scores:
        #   - scores are (A, H, W) format from conv output
        #   - transpose to (H, W, A)
        #   - reshape to (H * W * A, 1) where rows are ordered by (H, W, A)
        #     to match the order of anchors and bbox_deltas
        scores = rpn_cls_probs.squeeze(0).permute(1, 2, 0).contiguous().view(-1, 1)
        scores_np = scores.cpu().data.numpy()

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        if self.rpn_pre_nms_top_n <= 0 or self.rpn_pre_nms_top_n >= len(scores_np):
            order = np.argsort(-scores_np.squeeze())
        else:
            # Avoid sorting possibly large arrays; First partition to get top K
            # unsorted and then sort just those (~20x faster for 200k scores)
            inds = np.argpartition(
                -scores_np.squeeze(), self.rpn_pre_nms_top_n
            )[:self.rpn_pre_nms_top_n]
            order = np.argsort(-scores_np[inds].squeeze())
            order = inds[order]
            
        bbox_deltas = bbox_deltas[order, :]
        bbox_deltas_np = bbox_deltas_np[order, :]
        scores = scores[order,:]        
        scores_np = scores_np[order,:]
        all_anchors = all_anchors[order, :]
        all_anchors_np00 = all_anchors_np[order, :]    

        # Transform anchors into proposals via bbox transformations
        proposals = self.bbox_transform(all_anchors, bbox_deltas, (1.0, 1.0, 1.0, 1.0))

        # 2. clip proposals to image (may result in proposals with zero area
        # that will be removed in the next step)
        proposals = self.clip_tiled_boxes(proposals, im_height, im_width)
        proposals_np = proposals.cpu().data.numpy()

        # 3. remove predicted boxes with either height or width < min_size
        keep = self.filter_boxes(proposals_np, self.rpn_min_size, scaling_factor, im_height, im_width)

        proposals = proposals[keep, :]
        proposals_np = proposals_np[keep, :]
        scores = scores[keep,:]
        scores_np = scores_np[keep]

        # 6. apply loose nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        if self.rpn_nms_thresh > 0:
            keep = box_utils.nms(np.hstack((proposals_np, scores_np)), self.rpn_nms_thresh)
            if self.rpn_post_nms_top_n > 0:
                keep = keep[:self.rpn_post_nms_top_n]
                
            proposals = proposals[keep, :]
            scores = scores[keep,:]
            
        return proposals, scores
Beispiel #16
0
def box_results_with_nms_and_limit(
        scores,
        boxes,
        num_classes=81,
        score_thresh=0.05,
        overlap_thresh=0.5,
        do_soft_nms=False,
        soft_nms_sigma=0.5,
        soft_nms_method='linear',
        do_bbox_vote=False,
        bbox_vote_thresh=0.8,
        bbox_vote_method='ID',
        max_detections_per_img=100,  ### over all classes ###
):
    """Returns bounding-box detection results by thresholding on scores and
    applying non-maximum suppression (NMS).
    
    A number of #detections presist after this and are returned, sorted by class

    `boxes` has shape (#detections, 4 * #classes), where each row represents
    a list of predicted bounding boxes for each of the object classes in the
    dataset (including the background class). The detections in each row
    originate from the same object proposal.

    `scores` has shape (#detection, #classes), where each row represents a list
    of object detection confidence scores for each of the object classes in the
    dataset (including the background class). `scores[i, j]`` corresponds to the
    box at `boxes[i, j * 4:(j + 1) * 4]`.
    """
    cls_boxes = [[] for _ in range(num_classes)]
    # Apply threshold on detection probabilities and apply NMS
    # Skip j = 0, because it's the background class
    for j in range(1, num_classes):
        inds = np.where(scores[:, j] > score_thresh)[0]
        scores_j = scores[inds, j]
        boxes_j = boxes[inds, j * 4:(j + 1) * 4]
        dets_j = np.hstack((boxes_j, scores_j[:,
                                              np.newaxis])).astype(np.float32,
                                                                   copy=False)
        if do_soft_nms:
            nms_dets, _ = box_utils.soft_nms(dets_j,
                                             sigma=soft_nms_sigma,
                                             overlap_thresh=overlap_thresh,
                                             score_thresh=0.0001,
                                             method=soft_nms_method)
        else:
            keep = box_utils.nms(dets_j, overlap_thresh)
            nms_dets = dets_j[keep, :]
        # Refine the post-NMS boxes using bounding-box voting
        if do_bbox_vote:
            nms_dets = box_utils.box_voting(nms_dets,
                                            dets_j,
                                            bbox_vote_thresh,
                                            scoring_method=bbox_vote_method)
        cls_boxes[j] = nms_dets

    # Limit to max_per_image detections **over all classes**
    if max_detections_per_img > 0:
        image_scores = np.hstack(
            [cls_boxes[j][:, -1] for j in range(1, num_classes)])
        if len(image_scores) > max_detections_per_img:
            image_thresh = np.sort(image_scores)[-max_detections_per_img]
            for j in range(1, num_classes):
                keep = np.where(cls_boxes[j][:, -1] >= image_thresh)[0]
                cls_boxes[j] = cls_boxes[j][keep, :]

    im_results = np.vstack([cls_boxes[j] for j in range(1, num_classes)])
    boxes = im_results[:, :-1]
    scores = im_results[:, -1]
    return scores, boxes, cls_boxes
Beispiel #17
0
def box_results_with_nms_and_limit(scores, boxes, prev_cls_boxes=None):  # NOTE: support single-batch
    """Returns bounding-box detection results by thresholding on scores and
    applying non-maximum suppression (NMS).

    `boxes` has shape (#detections, 4 * #classes), where each row represents
    a list of predicted bounding boxes for each of the object classes in the
    dataset (including the background class). The detections in each row
    originate from the same object proposal.

    `scores` has shape (#detection, #classes), where each row represents a list
    of object detection confidence scores for each of the object classes in the
    dataset (including the background class). `scores[i, j]`` corresponds to the
    box at `boxes[i, j * 4:(j + 1) * 4]`.
    """
    num_classes = cfg.MODEL.NUM_CLASSES
    cls_boxes = [[] for _ in range(num_classes)]

    # Apply threshold on detection probabilities and apply NMS
    # Skip j = 0, because it's the background class
    for j in range(1, num_classes):
        inds = np.where(scores[:, j] >= cfg.TEST.SCORE_THRESH)[0]
        scores_j = scores[inds, j]
        boxes_j = boxes[inds, j * 4:(j + 1) * 4]
        dets_j = np.hstack((boxes_j, scores_j[:, np.newaxis])).astype(np.float32, copy=False)
        if cfg.TEST.SOFT_NMS.ENABLED:
            nms_dets, _ = box_utils.soft_nms(
                dets_j,
                sigma=cfg.TEST.SOFT_NMS.SIGMA,
                overlap_thresh=cfg.TEST.NMS,
                score_thresh=0.0001,
                method=cfg.TEST.SOFT_NMS.METHOD
            )
        else:
            keep = box_utils.nms(dets_j, cfg.TEST.NMS)
            nms_dets = dets_j[keep, :]

        # Refine the post-NMS boxes using bounding-box voting
        if cfg.TEST.BBOX_VOTE.ENABLED:
            nms_dets = box_utils.box_voting(
                nms_dets,
                dets_j,
                cfg.TEST.BBOX_VOTE.VOTE_TH,
                scoring_method=cfg.TEST.BBOX_VOTE.SCORING_METHOD
            )
        cls_boxes[j] = nms_dets

    # Limit to max_per_image detections **over all classes**
    if cfg.TEST.DETECTIONS_PER_IM > 0:
        image_scores = np.hstack(
            [cls_boxes[j][:, -1] for j in range(1, num_classes)]
        )
        if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
            image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
            for j in range(1, num_classes):
                keep = np.where(cls_boxes[j][:, -1] >= image_thresh)[0]
                cls_boxes[j] = cls_boxes[j][keep, :]

    # select two best for each class if score is low..
    '''
    if cfg.TEST.NUM_DET_PER_CLASS_PRE==1:
        for j in range(1, num_classes):
            keep = np.argsort(-cls_boxes[j][:, -1])[:2]
            cls_boxes[j] = cls_boxes[j][keep, :]
            # if one has a very strong cls score, we only keep two boxes for weak cls score.
            if len(cls_boxes[j])>0 and cls_boxes[j][0,-1]>0.5:
                cls_boxes[j] = cls_boxes[j][:1, :]'''

    # nms between classes.
    if cfg.TEST.NMS_CROSS_CLASS > 0.:
        '''
        # code to keep some of the dets for which class there is only one det.
        all_cls_boxes = []
        reserved_cls = []
        for j in range(1, num_classes):
            tmp_cls_boxes = np.copy(cls_boxes[j])
            # if only one det for cls j, we keep it.
            if tmp_cls_boxes.shape[0] == 1:
                tmp_cls_boxes[:,-1] = 1.0
            all_cls_boxes.append(tmp_cls_boxes)
        all_dets_for_nms = np.vstack(all_cls_boxes)
        '''
        all_dets = np.vstack([cls_boxes[j] for j in range(1, num_classes)])
        class_ids = np.vstack([np.ones(shape=(len(cls_boxes[j]), 1))*j for j in range(1, num_classes)])
        keep = box_utils.nms(all_dets, cfg.TEST.NMS_CROSS_CLASS)
        all_dets = all_dets[keep, :]
        class_ids = class_ids[keep, :]
        for j in range(1, num_classes):
            idx_j = np.where(class_ids==j)[0]
            cls_boxes[j] = all_dets[idx_j, :]

    # select one best for each class.
    if cfg.TEST.NUM_DET_PER_CLASS_PRE>0:
        for j in range(1, num_classes):
            keep = np.argsort(-cls_boxes[j][:, -1])[:cfg.TEST.NUM_DET_PER_CLASS_PRE]
            cls_boxes[j] = cls_boxes[j][keep, :]

    # nms by previous box.
    if cfg.TEST.NMS_SMALL_BOX_IOU>0:
        for j in range(1, num_classes):
            if prev_cls_boxes is not None:
                assert len(prev_cls_boxes[j])<2, 'number of prev boxes should <2.'
                if len(prev_cls_boxes[j])==1:                    
                    if prev_cls_boxes[j][0][-1]<cfg.TEST.NMS_SMALL_BOX_SCORE_THRESHOLD:
                        #if not confident about previous box, no nms.
                        continue
                    prev_cls_box = prev_cls_boxes[j][0][:-1]
                    index_to_remove = []
                    for id_box in range(len(cls_boxes[j])-1,-1,-1):
                        box = cls_boxes[j][id_box][:-1]
                        iou = bb_intersection_over_union(prev_cls_box, box)
                        if iou<cfg.TEST.NMS_SMALL_BOX_IOU:
                            index_to_remove.append(id_box)
                    cls_boxes[j] = np.delete(cls_boxes[j], index_to_remove, 0)

    im_results = np.vstack([cls_boxes[j] for j in range(1, num_classes)])
    boxes = im_results[:, :-1]
    scores = im_results[:, -1]
    return scores, boxes, cls_boxes
    def _forward(self, data, im_info, roidb=None, **rpn_kwargs):
        im_data = data
        if self.training:
            roidb = list(map(lambda x: blob_utils.deserialize(x)[0], roidb))

        device_id = im_data.get_device()

        return_dict = {}  # A dict to collect return variables

        blob_conv = self.Conv_Body(im_data)

        rpn_ret = self.RPN(blob_conv, im_info, roidb)
        # rpn proposals

        # if self.training:
        #     # can be used to infer fg/bg ratio
        #     return_dict['rois_label'] = rpn_ret['labels_int32']

        rois_certification = False
        if cfg.FPN.FPN_ON:
            # Retain only the blobs that will be used for RoI heads. `blob_conv` may include
            # extra blobs that are used for RPN proposals, but not for RoI heads.
            blob_conv = blob_conv[-self.num_roi_levels:]

        if not self.training:
            return_dict['blob_conv'] = blob_conv

        if rois_certification:
            lvl_min = cfg.FPN.ROI_MIN_LEVEL
            lvl_max = cfg.FPN.ROI_MAX_LEVEL
            test_rpn_ret = {'rois': rpn_ret['rois']}
            lvls = fpn_utils.map_rois_to_fpn_levels(test_rpn_ret['rois'],
                                                    lvl_min, lvl_max)
            rois_idx_order = np.empty((0, ))
            test_rois = test_rpn_ret['rois']

            for output_idx, lvl in enumerate(range(lvl_min, lvl_max + 1)):
                idx_lvl = np.where(lvls == lvl)[0]
                rois_lvl = test_rois[idx_lvl, :]
                rois_idx_order = np.concatenate((rois_idx_order, idx_lvl))
                test_rpn_ret['rois_fpn{}'.format(lvl)] = rois_lvl

            rois_idx_restore = np.argsort(rois_idx_order).astype(np.int32,
                                                                 copy=False)
            test_rpn_ret['rois_idx_restore_int32'] = rois_idx_restore

            test_feat = self.Box_Head(blob_conv, test_rpn_ret)
            test_cls_score, test_bbox_pred = self.Box_Outs(test_feat)

            test_cls_score = test_cls_score.data.cpu().numpy().squeeze()
            test_bbox_pred = test_bbox_pred.data.cpu().numpy().squeeze()

        if not cfg.MODEL.RPN_ONLY:
            if cfg.MODEL.SHARE_RES5 and self.training:
                box_feat, res5_feat = self.Box_Head(blob_conv, rpn_ret)
            # bbox proposals
            else:
                box_feat = self.Box_Head(blob_conv, rpn_ret)
            cls_score, bbox_pred = self.Box_Outs(box_feat)
        else:
            # TODO: complete the returns for RPN only situation
            pass

        # 在这里开始计算loss
        if self.training:
            return_dict['losses'] = {}
            return_dict['metrics'] = {}
            # rpn loss
            rpn_kwargs.update(
                dict((k, rpn_ret[k]) for k in rpn_ret.keys()
                     if (k.startswith('rpn_cls_logits')
                         or k.startswith('rpn_bbox_pred'))))
            loss_rpn_cls, loss_rpn_bbox = rpn_heads.generic_rpn_losses(
                **rpn_kwargs)
            if cfg.FPN.FPN_ON:
                for i, lvl in enumerate(
                        range(cfg.FPN.RPN_MIN_LEVEL,
                              cfg.FPN.RPN_MAX_LEVEL + 1)):
                    return_dict['losses']['loss_rpn_cls_fpn%d' %
                                          lvl] = loss_rpn_cls[i]
                    return_dict['losses']['loss_rpn_bbox_fpn%d' %
                                          lvl] = loss_rpn_bbox[i]
            else:
                return_dict['losses']['loss_rpn_cls'] = loss_rpn_cls
                return_dict['losses']['loss_rpn_bbox'] = loss_rpn_bbox

            # bbox loss
            loss_cls, loss_bbox, accuracy_cls = fast_rcnn_heads.fast_rcnn_losses(
                cls_score, bbox_pred, rpn_ret['labels_int32'],
                rpn_ret['bbox_targets'], rpn_ret['bbox_inside_weights'],
                rpn_ret['bbox_outside_weights'])
            return_dict['losses']['loss_cls'] = loss_cls
            return_dict['losses']['loss_bbox'] = loss_bbox
            return_dict['metrics']['accuracy_cls'] = accuracy_cls

            if cfg.MODEL.MASK_ON:
                if getattr(self.Mask_Head, 'SHARE_RES5', False):
                    mask_feat = self.Mask_Head(
                        res5_feat,
                        rpn_ret,
                        roi_has_mask_int32=rpn_ret['roi_has_mask_int32'])
                else:
                    mask_feat = self.Mask_Head(blob_conv, rpn_ret)
                mask_pred = self.Mask_Outs(mask_feat)
                # return_dict['mask_pred'] = mask_pred
                # mask loss
                loss_mask = mask_rcnn_heads.mask_rcnn_losses(
                    mask_pred, rpn_ret['masks_int32'])
                return_dict['losses']['loss_mask'] = loss_mask

            if cfg.MODEL.KEYPOINTS_ON:
                if getattr(self.Keypoint_Head, 'SHARE_RES5', False):
                    # No corresponding keypoint head implemented yet (Neither in Detectron)
                    # Also, rpn need to generate the label 'roi_has_keypoints_int32'
                    kps_feat = self.Keypoint_Head(
                        res5_feat,
                        rpn_ret,
                        roi_has_keypoints_int32=rpn_ret[
                            'roi_has_keypoint_int32'])
                else:
                    kps_feat = self.Keypoint_Head(blob_conv, rpn_ret)
                kps_pred = self.Keypoint_Outs(kps_feat)
                # return_dict['keypoints_pred'] = kps_pred
                # keypoints loss
                if cfg.KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS:
                    loss_keypoints = keypoint_rcnn_heads.keypoint_losses(
                        kps_pred, rpn_ret['keypoint_locations_int32'],
                        rpn_ret['keypoint_weights'])
                else:
                    loss_keypoints = keypoint_rcnn_heads.keypoint_losses(
                        kps_pred, rpn_ret['keypoint_locations_int32'],
                        rpn_ret['keypoint_weights'],
                        rpn_ret['keypoint_loss_normalizer'])
                return_dict['losses']['loss_kps'] = loss_keypoints

            # pytorch0.4 bug on gathering scalar(0-dim) tensors
            for k, v in return_dict['losses'].items():
                return_dict['losses'][k] = v.unsqueeze(0)
            for k, v in return_dict['metrics'].items():
                return_dict['metrics'][k] = v.unsqueeze(0)

        else:
            # Testing
            return_dict['rois'] = rpn_ret['rois']
            import json
            if cfg.TEST.IOU_OUT:
                # 直接通过rpn_ret可以取出rois
                with open(
                        "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/raw_roi.json",
                        'w') as f:
                    json.dump((return_dict['rois'][:, 1:] /
                               im_info.numpy()[0][2]).tolist(), f)

                # 如果在FPN模式下,需要进到一个collect_and_distribute...的函数去取出分配后的scores
                # ,我直接在collect_and_distribute_fpn_rpn_proposals.py里把json输出
                # 因此这里直接考虑RPN_ONLY模式的取值。
                if not cfg.FPN.FPN_ON:
                    with open(
                            "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/rois_score.json",
                            'w') as f:
                        score_2_json = []
                        for item in rpn_ret['rpn_roi_probs']:
                            score_2_json.append(item.item())
                        json.dump(score_2_json, f)

            # 开始第二个fast_head阶段,首先通过rois和bbox_delta计算pred_box
            if cfg.FAST_RCNN.FAST_HEAD2_DEBUG:
                lvl_min = cfg.FPN.ROI_MIN_LEVEL
                lvl_max = cfg.FPN.ROI_MAX_LEVEL
                if cfg.FPN.FPN_ON:
                    im_scale = im_info.data.cpu().numpy().squeeze()[2]
                    rois = rpn_ret['rois'][:, 1:5] / im_scale
                    bbox_pred = bbox_pred.data.cpu().numpy().squeeze()
                    box_deltas = bbox_pred.reshape([-1, bbox_pred.shape[-1]])
                    shift_boxes = box_utils.bbox_transform(
                        rois, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS)
                    shift_boxes = box_utils.clip_tiled_boxes(
                        shift_boxes,
                        im_info.data.cpu().numpy().squeeze()[0:2])
                    num_classes = cfg.MODEL.NUM_CLASSES

                    onecls_pred_boxes = []
                    onecls_score = []
                    dets_cls = {}
                    count = 0
                    for j in range(1, num_classes):
                        inds = np.where(
                            cls_score[:, j] > cfg.TEST.SCORE_THRESH)[0]
                        boxes_j = shift_boxes[inds, j * 4:(j + 1) * 4]
                        score_j = cls_score[inds, j]
                        onecls_pred_boxes += boxes_j.tolist()
                        onecls_score += score_j.tolist()
                        dets_cls.update({j: []})
                        for k in range(len(boxes_j.tolist())):
                            dets_cls[j].append(count)
                            count += 1

                    assert count == len(onecls_pred_boxes)
                    stage2_rois_score = np.array(onecls_score,
                                                 dtype=np.float32)
                    stage2_rois = np.array(onecls_pred_boxes, dtype=np.float32)

                    # Redistribute stage2_rois using fpn_utils module provided functions
                    # calculate by formula
                    cls_tracker = {}
                    if not stage2_rois.tolist():
                        stage1_pred_iou = stage2_rois_score.tolist()
                        stage2_final_boxes = np.empty((0, ))
                        stage2_final_score = np.empty((0, ))

                        logger.info("Detections above threshold is null.")
                    else:
                        alter_rpn = {}
                        unresize_stage2_rois = stage2_rois * im_scale
                        # unresize_stage2_rois = np.concatenate((unresize_stage2_rois, unresized_rois[:, 1:5]))

                        lvls = fpn_utils.map_rois_to_fpn_levels(
                            unresize_stage2_rois, lvl_min, lvl_max)
                        # TAG: We might need to visualize "stage2_rois" to make sure.
                        rois_idx_order = np.empty((0, ))
                        dummy_batch = np.zeros(
                            (unresize_stage2_rois.shape[0], 1),
                            dtype=np.float32)
                        alter_rpn["rois"] = np.hstack(
                            (dummy_batch,
                             unresize_stage2_rois)).astype(np.float32,
                                                           copy=False)
                        # alter_rpn['rois'] = np.concatenate((alter_rpn['rois'], unresized_rois))

                        for output_idx, lvl in enumerate(
                                range(lvl_min, lvl_max + 1)):
                            idx_lvl = np.where(lvls == lvl)[0]
                            rois_lvl = unresize_stage2_rois[idx_lvl, :]
                            rois_idx_order = np.concatenate(
                                (rois_idx_order, idx_lvl))
                            _ = np.zeros((rois_lvl.shape[0], 1),
                                         dtype=np.float32)
                            alter_rpn['rois_fpn{}'.format(lvl)] = np.hstack(
                                (_, rois_lvl)).astype(np.float32, copy=False)

                        rois_idx_restore = np.argsort(rois_idx_order).astype(
                            np.int32, copy=False)
                        alter_rpn['rois_idx_restore_int32'] = rois_idx_restore
                        # Go through 2nd stage of FPN and fast_head
                        stage2_feat = self.Box_Head(blob_conv, alter_rpn)
                        stage2_cls_score, stage2_bbox_pred = self.Box_Outs(
                            stage2_feat)

                        # Transform shift value to original one to get final pred boxes coordinates
                        stage2_bbox_pred = stage2_bbox_pred.data.cpu().numpy(
                        ).squeeze()
                        stage2_cls_score = stage2_cls_score.data.cpu().numpy()

                        stage2_box_deltas = stage2_bbox_pred.reshape(
                            [-1, bbox_pred.shape[-1]])
                        # Add some variance to box delta
                        if cfg.FAST_RCNN.STAGE1_TURBULENCE:
                            import random
                            for i in range(len(stage2_box_deltas)):
                                for j in range(len(stage2_box_deltas[i])):
                                    stage2_box_deltas[i][j] *= random.uniform(
                                        0.9, 1.1)

                        stage2_cls_out = box_utils.bbox_transform(
                            stage2_rois, stage2_box_deltas,
                            cfg.MODEL.BBOX_REG_WEIGHTS)
                        stage2_cls_out = box_utils.clip_tiled_boxes(
                            stage2_cls_out,
                            im_info.data.cpu().numpy().squeeze()[0:2])
                        onecls_pred_boxes = []
                        onecls_score = []
                        count = 0
                        for j in range(1, num_classes):
                            inds = np.where(
                                stage2_cls_score[:,
                                                 j] > cfg.TEST.SCORE_THRESH)[0]
                            boxes_j = stage2_cls_out[inds, j * 4:(j + 1) * 4]
                            score_j = stage2_cls_score[inds, j]
                            dets_j = np.hstack(
                                (boxes_j,
                                 score_j[:, np.newaxis])).astype(np.float32,
                                                                 copy=False)
                            keep = box_utils.nms(dets_j, cfg.TEST.NMS)
                            boxes_j = boxes_j[keep]
                            score_j = score_j[keep]
                            # 用于记录每个框属于第几类
                            onecls_score += score_j.tolist()
                            onecls_pred_boxes += boxes_j.tolist()

                            for k in range(len(score_j)):
                                cls_tracker.update({count: j})
                                count += 1

                        assert count == len(onecls_score)
                        stage2_final_boxes = np.array(onecls_pred_boxes,
                                                      dtype=np.float32)
                        stage2_final_score = np.array(onecls_score,
                                                      dtype=np.float32)
                        inds = np.where(stage2_final_score > 0.3)[0]

                        # Filtered by keep index...
                        preserve_stage2_final_boxes = copy.deepcopy(
                            stage2_final_boxes)
                        preserve_stage2_final_score = copy.deepcopy(
                            stage2_final_score)
                        stage2_final_boxes = stage2_final_boxes[inds]
                        stage2_final_score = stage2_final_score[inds]

                        # if nothing left after 0.3 threshold filter, reserve whole boxes to original.
                        if stage2_final_boxes.size == 0:
                            lower_inds = np.where(
                                preserve_stage2_final_score > 0.1)[0]
                            stage2_final_boxes = preserve_stage2_final_boxes[
                                lower_inds]
                            stage2_final_score = preserve_stage2_final_score[
                                lower_inds]

                        else:
                            del preserve_stage2_final_boxes
                            del preserve_stage2_final_score

                        # if all boxes are clsfied into bg class.
                        if stage2_final_boxes.size == 0:
                            stage1_pred_iou = stage2_rois_score.tolist()
                            stage2_final_boxes = np.empty((0, ))
                            stage2_final_score = np.empty((0, ))
                            logger.info("Detections above threshold is null.")

                        else:
                            # Restore stage2_pred_boxes to match the index with stage2_rois, Compute IOU between
                            # final_boxes and stage2_rois, one by one
                            flag = "cross_product"
                            if flag == "element_wise":
                                if stage2_final_boxes.shape[
                                        0] == stage2_rois.shape[0]:
                                    restored_stage2_final_boxes = stage2_final_boxes[
                                        rois_idx_restore]
                                    stage1_pred_iou = []
                                    for ind, item in enumerate(stage2_rois):
                                        stage1 = np.array(
                                            item, dtype=np.float32).reshape(
                                                (1, 4))
                                        stage2 = np.array(
                                            restored_stage2_final_boxes[ind],
                                            dtype=np.float32).reshape((1, 4))
                                        iou = box_utils.bbox_overlaps(
                                            stage1, stage2)
                                        stage1_pred_iou.append(
                                            iou.squeeze().item())
                                else:
                                    logger.info(
                                        "Mistake while processing {}".format(
                                            str(im_info)))
                            elif flag == "cross_product":
                                iou = box_utils.bbox_overlaps(
                                    stage2_rois, stage2_final_boxes)
                                stage1_pred_iou = iou.max(axis=1).tolist()

                    # stage1_pred is another name of stage2_rois
                    assert len(stage1_pred_iou) == len(stage2_rois)
                    if cfg.FAST_RCNN.IOU_NMS:
                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/iou_stage1_score.json",
                                "w") as f:
                            json.dump(stage2_rois_score.tolist(), f)

                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/iou_stage2_score.json",
                                "w") as f:
                            json.dump(stage2_final_score.tolist(), f)

                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/iou_stage1_pred_boxes.json",
                                'w') as f:
                            json.dump(stage2_rois.tolist(), f)

                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/iou_stage1_pred_iou.json",
                                'w') as f:
                            json.dump(stage1_pred_iou, f)

                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/iou_stage2_pred_boxes.json",
                                'w') as f:
                            json.dump(stage2_final_boxes.tolist(), f)

                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/iou_dets_cls.json",
                                'w') as f:
                            json.dump(dets_cls, f)

                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/iou_cls_tracker.json",
                                'w') as f:
                            json.dump(cls_tracker, f)

                    elif cfg.FAST_RCNN.SCORE_NMS:
                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/score_stage1_score.json",
                                "w") as f:
                            json.dump(stage2_rois_score.tolist(), f)

                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/score_stage2_score.json",
                                "w") as f:
                            json.dump(stage2_final_score.tolist(), f)

                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/score_stage1_pred_boxes.json",
                                'w') as f:
                            json.dump(stage2_rois.tolist(), f)

                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/score_stage1_pred_iou.json",
                                'w') as f:
                            json.dump(stage1_pred_iou, f)

                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/score_stage2_pred_boxes.json",
                                'w') as f:
                            json.dump(stage2_final_boxes.tolist(), f)

                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/score_dets_cls.json",
                                'w') as f:
                            json.dump(dets_cls, f)

                        with open(
                                "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/score_cls_tracker.json",
                                'w') as f:
                            json.dump(cls_tracker, f)

                else:
                    im_scale = im_info.data.cpu().numpy().squeeze()[2]
                    rois = rpn_ret['rois'][:, 1:5] / im_scale
                    # unscale back to raw image space
                    box_deltas = bbox_pred.data.cpu().numpy().squeeze()
                    fast_stage1_score = cls_score.data.cpu().numpy().squeeze()

                    box_deltas = box_deltas.reshape([-1, bbox_pred.shape[-1]])
                    stage2_rois = box_utils.bbox_transform(
                        rois, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS)
                    stage2_rois = box_utils.clip_tiled_boxes(
                        stage2_rois,
                        im_info.data.cpu().numpy().squeeze()[0:2])

                    num_classes = cfg.MODEL.NUM_CLASSES

                    onecls_pred_boxes = []
                    onecls_cls_score = []
                    for j in range(1, num_classes):
                        inds = np.where(
                            cls_score[:, j] > cfg.TEST.SCORE_THRESH)[0]
                        boxes_j = stage2_rois[inds, j * 4:(j + 1) * 4]
                        score_j = fast_stage1_score[inds, j]
                        onecls_pred_boxes += boxes_j.tolist()
                        onecls_cls_score += score_j.tolist()

                    stage2_rois = np.array(onecls_pred_boxes, dtype=np.float32)
                    stage2_rois_score = np.array(onecls_cls_score,
                                                 dtype=np.float32)

                    assert len(stage2_rois) == len(stage2_rois_score)

                    # Send stage2 rois to next stage fast head, do ROI ALIGN again
                    # to modify rpn_ret['rois] , rpn_ret['rpn_rois'] and rpn['rois_rpn_score']

                    rpn_ret['rois'] = stage2_rois
                    rpn_ret['rpn_rois'] = stage2_rois
                    rpn_ret['rpn_roi_probs'] = stage2_rois_score
                    stage2_box_feat = self.Box_Head(blob_conv, rpn_ret)
                    stage2_cls_score, stage2_bbox_pred = self.Box_Outs(
                        stage2_box_feat)

                    stage2_bbox_pred = stage2_bbox_pred.data.cpu().numpy(
                    ).squeeze()
                    stage2_bbox_pred = stage2_bbox_pred.reshape(
                        [-1, bbox_pred.shape[-1]])

                    stage2_cls_pred_boxes = box_utils.bbox_transform(
                        stage2_rois, stage2_bbox_pred,
                        cfg.MODEL.BBOX_REG_WEIGHTS)
                    stage2_cls_pred_boxes = box_utils.clip_tiled_boxes(
                        stage2_cls_pred_boxes,
                        im_info.data.cpu().numpy().squeeze()[0:2])

                    onecls_pred_boxes = []
                    onecls_cls_score = []
                    for j in range(1, num_classes):
                        inds = np.where(
                            stage2_cls_score[:, j] > cfg.TEST.SCORE_THRESH)[0]
                        if len(inds) != 0:
                            print("KKKKK")
                        boxes_j = stage2_cls_pred_boxes[inds,
                                                        j * 4:(j + 1) * 4]
                        score_j = stage2_cls_score[inds, j]
                        onecls_pred_boxes += boxes_j.tolist()
                        onecls_cls_score += score_j.tolist()

                    stage2_bbox_pred = np.array(onecls_pred_boxes,
                                                dtype=np.float32)
                    stage2_bbox_pred_score = np.array(onecls_cls_score,
                                                      dtype=np.float32)

        # get stage2 pred_boxes here

        return_dict['cls_score'] = cls_score
        return_dict['bbox_pred'] = bbox_pred
        return return_dict
Beispiel #19
0
    def forward(self,
                rpn_cls_probs,
                rpn_bbox_pred,
                im_height,
                im_width,
                scaling_factor,
                spatial_scale=None):
        if spatial_scale is None:
            spatial_scale = self._spatial_scale
        """See modeling.detector.GenerateProposals for inputs/outputs
        documentation.
        """
        # 1. for each location i in a (H, W) grid:
        #      generate A anchor boxes centered on cell i
        #      apply predicted bbox deltas to each of the A anchors at cell i
        # 2. clip predicted boxes to image
        # 3. remove predicted boxes with either height or width < threshold
        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take the top pre_nms_topN proposals before NMS
        # 6. apply NMS with a loose threshold (0.7) to the remaining proposals
        # 7. take after_nms_topN proposals after NMS
        # 8. return the top proposals

        # 1. get anchors at all features positions
        all_anchors_np = self.get_all_anchors(
            num_images=rpn_cls_probs.shape[0],
            feature_height=rpn_cls_probs.shape[2],
            feature_width=rpn_cls_probs.shape[3],
            spatial_scale=spatial_scale)

        all_anchors = Variable(torch.FloatTensor(all_anchors_np))
        if rpn_cls_probs.is_cuda:
            all_anchors = all_anchors.cuda()

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #   - bbox deltas will be (4 * A, H, W) format from conv output
        #   - transpose to (H, W, 4 * A)
        #   - reshape to (H * W * A, 4) where rows are ordered by (H, W, A)
        #     in slowest to fastest order to match the enumerated anchors
        bbox_deltas = rpn_bbox_pred.squeeze(0).permute(1, 2,
                                                       0).contiguous().view(
                                                           -1, 4)
        bbox_deltas_np = bbox_deltas.cpu().data.numpy()

        # Same story for the scores:
        #   - scores are (A, H, W) format from conv output
        #   - transpose to (H, W, A)
        #   - reshape to (H * W * A, 1) where rows are ordered by (H, W, A)
        #     to match the order of anchors and bbox_deltas
        scores = rpn_cls_probs.squeeze(0).permute(1, 2,
                                                  0).contiguous().view(-1, 1)
        scores_np = scores.cpu().data.numpy()

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        if self.rpn_pre_nms_top_n <= 0 or self.rpn_pre_nms_top_n >= len(
                scores_np):
            order = np.argsort(-scores_np.squeeze())
        else:
            # Avoid sorting possibly large arrays; First partition to get top K
            # unsorted and then sort just those (~20x faster for 200k scores)
            inds = np.argpartition(
                -scores_np.squeeze(),
                self.rpn_pre_nms_top_n)[:self.rpn_pre_nms_top_n]
            order = np.argsort(-scores_np[inds].squeeze())
            order = inds[order]

        bbox_deltas = bbox_deltas[order, :]
        bbox_deltas_np = bbox_deltas_np[order, :]
        scores = scores[order, :]
        scores_np = scores_np[order, :]
        all_anchors = all_anchors[order, :]
        all_anchors_np00 = all_anchors_np[order, :]

        # Transform anchors into proposals via bbox transformations
        proposals = self.bbox_transform(all_anchors, bbox_deltas,
                                        (1.0, 1.0, 1.0, 1.0))

        # 2. clip proposals to image (may result in proposals with zero area
        # that will be removed in the next step)
        proposals = self.clip_tiled_boxes(proposals, im_height, im_width)
        proposals_np = proposals.cpu().data.numpy()

        # 3. remove predicted boxes with either height or width < min_size
        keep = self.filter_boxes(proposals_np, self.rpn_min_size,
                                 scaling_factor, im_height, im_width)

        proposals = proposals[keep, :]
        proposals_np = proposals_np[keep, :]
        scores = scores[keep, :]
        scores_np = scores_np[keep]

        # 6. apply loose nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        if self.rpn_nms_thresh > 0:
            keep = box_utils.nms(np.hstack((proposals_np, scores_np)),
                                 self.rpn_nms_thresh)
            if self.rpn_post_nms_top_n > 0:
                keep = keep[:self.rpn_post_nms_top_n]

            proposals = proposals[keep, :]
            scores = scores[keep, :]

        return proposals, scores
def calculate_sim_ap(pkl_ls, gt_info_ls, pred_ls, thres):
    ap_ls = []
    total_precision = []
    total_recall = []
    count_all = 0
    tp_all = 0
    gt_all = 0
    ap_all_ls = []
    support_box_cls_ls = get_box_cls_ls(
        pkl_ls)  #print(len(support_box_cls_ls[0]))

    # for pred
    # fore pred_ls
    box_num = 0
    #print(pred_ls)
    for img_id, img in enumerate(pred_ls):
        #print(img_id, img)
        box_feat = np.squeeze(img['box_feat'], axis=(2, 3))
        box_num += box_feat.shape[0]
        feat_len = box_feat.shape[1]

    print('pred_box_num, feat_len:', box_num, feat_len)
    #assert False

    query_roidb_ls = np.zeros(
        (box_num, 5),
        dtype=np.float32)  # init query_roidb_ls for the try-except
    query_box_feat_ls = np.zeros((box_num, feat_len), dtype=np.float32)
    pred_img_ls = []
    for img_id, img in enumerate(pred_ls):
        #print(str(img_id), img['box_feat'].shape)
        box_feat = np.squeeze(img['box_feat'], axis=(2, 3))
        roidb = img['roidb']
        #print(roidb)
        current_box_num = img['box_feat'].shape[0]
        box_start = len(pred_img_ls)
        query_roidb_ls[box_start:box_start + current_box_num, :-1] = roidb[:,
                                                                           1:]
        query_box_feat_ls[box_start:box_start + current_box_num, :] = box_feat

        pred_img_ls.extend([img_id] *
                           current_box_num)  # get img id of every box
    pred_img_ls = np.array(pred_img_ls)
    '''
    # for pred_info_ls
    pred_id_ls = []
    for img_id, img in enumerate(pred_info_ls):
        pred_id_ls.append(img['entry']['id'])
        #print('pred_info:', img['entry']['id'])
    '''
    # for gt
    # for pkl_ls
    gt_img_ls = []
    gt_cls_ls = []
    gt_box_num = 0
    #print(pred_ls)
    for img_id, img in enumerate(pkl_ls):
        #print(img_id, img)
        box_feat = np.squeeze(img['box_feat'], axis=(2, 3))
        gt_box_num += box_feat.shape[0]
        feat_len = box_feat.shape[1]

    print('gt_box_num, feat_len:', gt_box_num, feat_len)

    gt_box_ls = np.zeros((gt_box_num, 4), dtype=np.float32)
    for img_id, img in enumerate(pkl_ls):
        roidb = img['roidb']
        #print(roidb.shape)
        current_box_num = roidb.shape[0]
        box_start = len(gt_img_ls)
        gt_box_ls[box_start:box_start + current_box_num, :] = roidb[:, 1:]
        gt_img_ls.extend([img_id] * current_box_num)  # get img id of every box
        gt_cls_ls.extend(roidb[:, 0].astype(np.int16).tolist())

    # for gt_info_ls
    gt_id_ls = []
    gt_path_ls = []
    for img_id, img in enumerate(gt_info_ls):
        gt_id_ls.append(img['entry']['id'])
        gt_path_ls.append(img['entry']['image'])
        #print('gt_info:', img['entry']['id'])

    #print(pred_img_ls)
    for support_id in range(len(
            support_box_cls_ls[-1])):  # pick -1 to avoid 0 bg num
        support_roidb_ls = np.array([])
        support_box_feat_ls = np.array([])
        support_box_ls = []
        support_img_ls = []
        support_id_ls = []

        for cls_id in range(len(support_box_cls_ls)):
            if len(support_box_cls_ls[cls_id]) == 0:  # for 0 bg number
                # assert False # it is bug when use predicted boxes
                continue
            else:
                support_item = support_box_cls_ls[cls_id][support_id]
                support_img_id = int(support_item.split('-')[0])
                support_box_id = int(support_item.split('-')[-1])
                support_img_ls.append(support_img_id)
                support_box_ls.append(support_box_id)
            #print(support_box_id)
            support_img = pkl_ls[int(support_img_id)]
            support_box_feat = np.squeeze(
                support_img['box_feat'],
                axis=(2, 3))[int(support_box_id)][np.newaxis, :]
            support_roidb = support_img['roidb'][int(support_box_id)][
                np.newaxis, :]
            #print(support_box_feat.shape)
            if support_roidb_ls.shape[0] != 0:
                support_roidb_ls = np.concatenate(
                    (support_roidb_ls, support_roidb), axis=0)
                support_box_feat_ls = np.concatenate(
                    (support_box_feat_ls, support_box_feat), axis=0)
            else:
                support_roidb_ls = support_roidb
                support_box_feat_ls = support_box_feat
            #print(support_roidb_ls.shape)
            support_id_ls.append(str(cls_id) + '_' + support_item)
        support_img_ls = np.array(support_img_ls)
        start = time.time()
        #print(support_img_ls.shape)
        #print(query_roidb_ls.shape[0])

        #print(query_box_feat_ls.shape)
        #print(support_box_feat_ls.shape)
        #print(pred_img_ls.shape)
        #print('support_img_ls:', support_img_ls)
        #print(np.in1d(pred_img_ls, support_img_ls).sum())
        #continue
        #print(np.in1d(pred_img_ls, support_img_ls).shape)
        query_box_feat_ls_now = deepcopy(query_box_feat_ls)
        query_box_feat_ls_now = query_box_feat_ls_now[
            ~np.in1d(pred_img_ls, support_img_ls)]
        sim_matrix = 1 - distance.cdist(query_box_feat_ls_now,
                                        support_box_feat_ls, 'cosine')
        sim_matrix_max = np.max(sim_matrix, axis=1)
        pred_box_ls = deepcopy(query_roidb_ls)
        pred_box_ls = pred_box_ls[~np.in1d(pred_img_ls, support_img_ls)]
        pred_box_ls[:, -1] = sim_matrix_max
        pred_img_ls_now = pred_img_ls[~np.in1d(pred_img_ls, support_img_ls)]

        sim_matrix_bg = np.where(
            sim_matrix_max <=
            thres)  # if sim_matrix_max <= thres, it is background
        sim_matrix_cls = np.argmax(
            sim_matrix, axis=1
        ) + 1  # wrong: when use gt box, it needs to +1, when use predicted boxes, remove +1.
        sim_matrix_cls[sim_matrix_bg] = 0

        pred_cls_ls = deepcopy(sim_matrix_cls)
        #print(time.time() - start)

        #pred_box_num = sim_matrix_cls.shape[0]
        #tp = np.zeros(pred_box_num)
        #fp = np.zeros(pred_box_num)

        current_gt_cls_ls = np.array(
            gt_cls_ls)[~np.in1d(gt_img_ls, support_img_ls)]
        current_gt_img_ls = np.array(
            gt_img_ls)[~np.in1d(gt_img_ls, support_img_ls)]
        current_gt_box_ls = deepcopy(
            gt_box_ls)[~np.in1d(gt_img_ls, support_img_ls)]

        cls_unique = np.unique(current_gt_cls_ls)
        img_unique = np.unique(current_gt_img_ls)

        ap_ls = []
        ovthresh = 0.5
        for cls in cls_unique:
            '''
            nd = (pred_cls_ls==cls).sum()
            tp = np.zeros(nd)
            fp = np.zeros(nd)

            print('nd:', nd)
            for img_id, img in enumerate(pred_img_ls.tolist()):
                # for pred

                pred_cls = deepcopy(pred_cls_ls)[img_id]
                #print(pred_cls, cls)
                if pred_cls != cls:
                    continue
            '''
            tp = []
            fp = []
            npos = (current_gt_cls_ls == cls).sum()
            #print(npos)
            total_pred_box = 0
            total_pred_box_after_nms = 0
            total_gt_box = 0
            total_img = 0
            start = time.time()
            confidence_ls = []
            for img in img_unique:
                #assert pred_id_ls[img] == gt_id_ls[img]
                pred_box = deepcopy(pred_box_ls)[pred_img_ls_now == img, :]
                pred_cls = deepcopy(pred_cls_ls)[pred_img_ls_now == img]

                pred_box = pred_box[pred_cls == cls, :]
                keep = box_utils.nms(pred_box, 0.3)
                nms_box = pred_box[keep, :]

                # for gt
                gt_box = current_gt_box_ls[current_gt_img_ls == img, :]
                gt_cls = current_gt_cls_ls[current_gt_img_ls == img]
                gt_box = gt_box[gt_cls == cls, :]
                '''
                if nms_box.shape[0] == 0:
                    tp.extend([0.] * gt_box.shape[0])
                    fp.extend([1.] * gt_box.shape[0])
                    continue
                if gt_box.shape[0] == 0:
                    tp.extend([0.] * nms_box.shape[0])
                    fp.extend([1.] * nms_box.shape[0])
                    continue
                '''
                confidence = nms_box[:, -1]
                sorted_ind = np.argsort(-confidence)
                #print(confidence, sorted_ind)
                nms_box = nms_box[sorted_ind, :-1]

                # sort tp and fp according confidence
                confidence = confidence[sorted_ind]
                confidence_ls.extend(confidence.tolist())

                det_flag = [False] * gt_box.shape[0]
                BBGT = gt_box.astype(float)
                total_pred_box += pred_box.shape[0]
                total_pred_box_after_nms += nms_box.shape[0]
                total_gt_box += gt_box.shape[0]
                if gt_box.shape[0] != 0:
                    total_img += 1
                for box_i in nms_box:
                    bb = box_i.astype(float)

                    ovmax = -np.inf
                    #start1 = time.time()
                    if BBGT.size > 0:
                        # compute overlaps
                        # intersection
                        ixmin = np.maximum(BBGT[:, 0], bb[0])
                        iymin = np.maximum(BBGT[:, 1], bb[1])
                        ixmax = np.minimum(BBGT[:, 2], bb[2])
                        iymax = np.minimum(BBGT[:, 3], bb[3])
                        iw = np.maximum(ixmax - ixmin + 1., 0.)
                        ih = np.maximum(iymax - iymin + 1., 0.)
                        inters = iw * ih

                        # union
                        uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
                               (BBGT[:, 2] - BBGT[:, 0] + 1.) *
                               (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)

                        overlaps = inters / uni
                        ovmax = np.max(overlaps)
                        jmax = np.argmax(overlaps)
                    #print(time.time() - start1)
                    if ovmax > ovthresh:
                        if not det_flag[jmax]:
                            tp.append(1.)
                            fp.append(0.)

                            #tp[img_id] = 1.
                            det_flag[jmax] = 1
                        else:
                            tp.append(0.)
                            fp.append(1.)
                            #fp[img_id] = 1.

                    else:
                        tp.append(0.)
                        fp.append(1.)
                        #fp[img_id] = 1.
            tp = np.array(tp)
            fp = np.array(fp)
            confidence_ls = np.array(confidence_ls)
            sorted_ind = np.argsort(-confidence_ls)
            tp = tp[sorted_ind]
            fp = fp[sorted_ind]
            img_id = int(support_img_ls[cls - 1])
            #print(img_id, gt_path_ls[img_id])

            # compute precision recall
            fp = np.cumsum(fp)
            tp = np.cumsum(tp)
            #print(fp[-1], tp[-1])
            #print('total pred box:', total_pred_box, ' total_pred_box_after_nms:', total_pred_box_after_nms, ' total_gt_box:', total_gt_box, 'total_image:', total_img)
            rec = tp / float(npos)
            # avoid divide by zero in case the first detection matches a difficult
            # ground truth
            prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
            current_ap = voc_ap(rec, prec)
            #print('time:', time.time() - start)
            #print('cls, rec, prec, ap', cls, rec[-1], prec[-1], current_ap)

            # vis
            im_path = gt_path_ls[img_id]
            im_name = im_path.split('/')[-1]
            im_dir = os.path.join('./log/figure', str(thres),
                                  'class_' + str(cls))
            output_name = '%.4f' % round(current_ap, 4) + '_' + str(im_name)
            if not os.path.exists(im_dir):
                os.makedirs(im_dir)
            im = cv2.imread(im_path)
            current_support_box = support_roidb_ls[cls - 1][1:]
            assert cls == int(support_roidb_ls[cls - 1][0])
            #print(current_support_box)
            #print(round(current_ap, 4))
            vis_image(
                im[:, :, ::-1],
                current_support_box,
                output_name,
                im_dir,  #os.path.join(output_dir, 'vis'),
            )
            #assert False

            #print(support_id_ls)
            ap_ls.append(current_ap)
            if cls == 1:
                ap_save = str(thres) + ',' + str(support_id) + ',' + str(
                    current_ap)
                ap_save = ap_save + ',' + str(
                    current_ap) + ',' + support_id_ls[cls_id - 1]
            else:
                ap_save = ap_save + ',' + str(
                    current_ap) + ',' + support_id_ls[cls_id - 1]
        ap = sum(ap_ls) / float(
            len(ap_ls)
        )  # remove bg when use gt boxes, when use predicted boxes, remove -1
        ap_all_ls.append(ap)
        print('support_id:', support_id, '  threshold:', thres, '  ap:', ap)
        root_path = './log'
        support_ap_path = os.path.join(root_path, 'support_ap.csv')
        all_ap_path = os.path.join(root_path, 'all_ap.csv')
        if not os.path.exists(root_path):
            os.makedirs(root_path)
        #if os.path.exists(support_ap_path):
        #    os.remove(support_ap_path)
        #if os.path.exists(all_ap_path):
        #    os.remove(all_ap_path)
        with open(support_ap_path, 'a') as f:
            f.write(str(thres) + ',' + str(support_id) + ',' + str(ap) + '\n')
        with open(all_ap_path, 'a') as f1:
            f1.write(ap_save + '\n')

    return ap_all_ls
Beispiel #21
0
def main():
    """main function"""

    if not torch.cuda.is_available():
        sys.exit("Need a CUDA device to run the code.")

    args = parse_args()
    print('Called with args:')
    print(args)

    assert args.image_dir or args.images
    assert bool(args.image_dir) ^ bool(args.images)

    dataset = datasets.get_hospital_dataset()
    cfg.MODEL.NUM_CLASSES = 20  # with bg
    num_class = cfg.MODEL.NUM_CLASSES
    sents = dataset.sents
    th_cls = dataset.th_cls
    cls2eng = dataset.cls2eng
    eng2type = dataset.eng2type

    print('load cfg from file: {}'.format(args.cfg_file))
    cfg_from_file(args.cfg_file)

    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    assert bool(args.load_ckpt) ^ bool(args.load_detectron), \
        'Exactly one of --load_ckpt and --load_detectron should be specified.'
    cfg.MODEL.LOAD_IMAGENET_PRETRAINED_WEIGHTS = False  # Don't need to load imagenet pretrained weights
    assert_and_infer_cfg()

    maskRCNN = Generalized_RCNN()

    if args.cuda:
        maskRCNN.cuda()

    if args.load_ckpt:
        load_name = args.load_ckpt
        print("loading checkpoint %s" % (load_name))
        checkpoint = torch.load(load_name,
                                map_location=lambda storage, loc: storage)
        net_utils.load_ckpt(maskRCNN, checkpoint['model'])

    if args.load_detectron:
        print("loading detectron weights %s" % args.load_detectron)
        load_detectron_weight(maskRCNN, args.load_detectron)

    maskRCNN = mynn.DataParallel(maskRCNN,
                                 cpu_keywords=['im_info', 'roidb'],
                                 minibatch=True,
                                 device_ids=[0])  # only support single GPU

    maskRCNN.eval()
    if args.image_dir:
        imglist = misc_utils.get_imagelist_from_dir(args.image_dir)
    else:
        imglist = args.images
    num_images = len(imglist)
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    for i in xrange(num_images):  # for each image
        print('img', i)
        im = cv2.imread(imglist[i])
        assert im is not None

        # segmentation
        # d = segment(im)
        # pdb.set_trace()

        timers = defaultdict(Timer)

        # detection
        cls_boxes, cls_segms, cls_keyps = im_detect_all(maskRCNN,
                                                        im,
                                                        timers=timers)

        # first we collect boxes from all classes
        dets_total = np.empty([0, 6], dtype=np.float32)
        for cls in range(1, num_class):  # for each cls
            dets = cls_boxes[cls]
            if dets.shape[0] == 0:
                continue
            dets_extend = np.pad(
                dets,
                ((0, 0),
                 (0, 1)),  # add 0 rows above, below and left, but 1 row right
                mode='constant',
                constant_values=cls)  # append cls to dets
            dets_total = np.vstack((dets_total, dets_extend))

        # then use a loose NMS to make each region has only one symptom
        keep = box_utils.nms(dets_total, 0.7)
        nms_dets = dets_total[keep, :]

        # iterate through remained boxes
        report, healthy = '', True
        have_sym_of_cls = [False for _ in range(num_class)]

        n = nms_dets.shape[0]
        final_results = []  # return to the web
        for idx in range(n):  # for each region
            th, cls = nms_dets[idx, -2], int(nms_dets[idx, -1])
            if th > th_cls[cls]:  # diagnosed to have the sym
                report += sents[cls][1]
                have_sym_of_cls[cls] = True
                healthy = False

                ename = cls2eng[int(cls)]
                _type = eng2type[ename]
                final_results.append({
                    'name': ename,
                    'type': _type,
                    'box': list(nms_dets[idx, 0:4])
                })

        for cls in range(1, num_class):  # for each cls
            if not have_sym_of_cls[cls]:  # if have no sym of this cls
                report += sents[cls][0]

        if healthy:
            report = sents[0][0]
        print(report)

        pdb.set_trace()

        # healthy = True  # flag indicating healthy or not
        # for cls in range(1, num_class):  # for each cls
        #     dets = cls_boxes[cls]
        #     if dets.shape[0] == 0:
        #         report += sents[cls][0]
        #         continue
        #     n = dets.shape[0]
        #     flag = False  # indicates if the sym exists
        #     for k in range(n):  # for each region
        #         if dets[k, -1] > th_cls[cls]:  # above threshold for this cls, means have this cls of symptom
        #             report += sents[cls][1]
        #             flag = True
        #             healthy = False
        #     if not flag:  # don't have this symptom
        #         report += sents[cls][0]
        #
        # if healthy:  # use the report for healthy people
        #     report = sents[0][0]

        im_name, _ = os.path.splitext(os.path.basename(imglist[i]))
        # vis_utils.vis_one_image(
        #     im[:, :, ::-1],  # BGR -> RGB for visualization
        #     im_name,
        #     args.output_dir,
        #     cls_boxes,
        #     cls_segms,
        #     cls_keyps,
        #     dataset=dataset,
        #     box_alpha=0.3,
        #     show_class=True,
        #     thresh=0.05,
        #     kp_thresh=2
        # )

    if args.merge_pdfs and num_images > 1:
        merge_out_path = '{}/results.pdf'.format(args.output_dir)
        if os.path.exists(merge_out_path):
            os.remove(merge_out_path)
        command = "pdfunite {}/*.pdf {}".format(args.output_dir,
                                                merge_out_path)
        subprocess.call(command, shell=True)
def evaluate_mAP_combine(json_datasets,
                         roidbs,
                         all_boxes_list,
                         output_dir,
                         cleanup=False):
    """ LJY
    all_boxes: num_cls x num_images x [num_boxes x 5]
    """
    json_dataset = json_datasets[0]
    mAP_folder = '/home/liujingyu/code/mAP'

    roidb, roidb_part = roidbs[0], roidbs[1]
    all_boxes, all_boxes_part = all_boxes_list[0], all_boxes_list[1]

    small_classes = ['结节', '肺实变', '膈面异常', '骨折']

    for i, (entry, entry_part) in enumerate(zip(
            roidb, roidb_part)):  # for each pair of images
        # print(i, entry['eva_id'], entry['file_name'])
        assert entry['file_name'] == entry_part['file_name']

        file_name = entry['file_name'][:-4] + '.txt'
        fgt = open(osp.join(mAP_folder, 'ground-truth', file_name), 'w')
        fpred = open(osp.join(mAP_folder, 'predicted', file_name), 'w')

        for cls_ind, cls in enumerate(json_dataset.classes):  # for each cls
            if cls == '__background__':
                continue
            if cls_ind >= len(all_boxes):
                break

            gt_classes = roidb[i]['gt_classes']
            ind = np.where(gt_classes == cls_ind)

            gt_boxes = roidb[i]['boxes'][ind]

            dets = all_boxes[cls_ind][i]  # N x 5, [x1, y1, x2, y2, score]
            dets_part = all_boxes_part[cls_ind][i]  # N x 5

            # offset the dets_part based on offset
            dets_part[:, 0] += entry_part['offset_x']
            dets_part[:, 2] += entry_part['offset_x']
            dets_part[:, 1] += entry_part['offset_y']
            dets_part[:, 3] += entry_part['offset_y']

            # select
            # if cls in small_classes:
            #     dets = dets_part

            # merge
            dets = np.vstack((dets, dets_part))

            # NMS on dets and dets_part
            if cfg.TEST.SOFT_NMS.ENABLED:
                nms_dets, _ = box_utils.soft_nms(
                    dets,
                    sigma=cfg.TEST.SOFT_NMS.SIGMA,
                    overlap_thresh=cfg.TEST.NMS,
                    score_thresh=0.0001,
                    method=cfg.TEST.SOFT_NMS.METHOD)
            else:
                keep = box_utils.nms(dets, cfg.TEST.NMS)
                nms_dets = dets[keep, :]
            # Refine the post-NMS boxes using bounding-box voting
            if cfg.TEST.BBOX_VOTE.ENABLED:
                nms_dets = box_utils.box_voting(
                    nms_dets,
                    dets,
                    cfg.TEST.BBOX_VOTE.VOTE_TH,
                    scoring_method=cfg.TEST.BBOX_VOTE.SCORING_METHOD)
            dets = nms_dets

            # write gt boxes, format: tvmonitor 2 10 173 238
            for k in range(gt_boxes.shape[0]):
                s = '{} {:f} {:f} {:f} {:f}'.format(cls, gt_boxes[k, 0],
                                                    gt_boxes[k, 1],
                                                    gt_boxes[k,
                                                             2], gt_boxes[k,
                                                                          3])
                fgt.write(s)
                fgt.write('\n')

                if cls == '肿块' or cls == '结节' or cls == '钙化' or cls == '乳头影':
                    s = '{} {:f} {:f} {:f} {:f}'.format(
                        '肿块结节钙化', gt_boxes[k, 0], gt_boxes[k, 1],
                        gt_boxes[k, 2], gt_boxes[k, 3])
                    fgt.write(s)
                    fgt.write('\n')

                if cls == '纤维化表现' or cls == '肺实变' or cls == '肺纹理增多' or cls == '肿块' or cls == '弥漫性结节':
                    s = '{} {:f} {:f} {:f} {:f}'.format(
                        '高密度影', gt_boxes[k, 0], gt_boxes[k, 1], gt_boxes[k, 2],
                        gt_boxes[k, 3])
                    fgt.write(s)
                    fgt.write('\n')

                if cls == '气胸' or cls == '气肿':
                    s = '{} {:f} {:f} {:f} {:f}'.format(
                        '低密度影', gt_boxes[k, 0], gt_boxes[k, 1], gt_boxes[k, 2],
                        gt_boxes[k, 3])
                    fgt.write(s)
                    fgt.write('\n')

            # write pred boxes, format: tvmonitor 0.471781 0 13 174 244
            for k in range(dets.shape[0]):
                s = '{} {:f} {:f} {:f} {:f} {:f}'.format(
                    cls, dets[k, -1], dets[k, 0], dets[k, 1], dets[k, 2],
                    dets[k, 3])
                fpred.write(s)
                fpred.write('\n')

                if cls == '肿块' or cls == '结节' or cls == '钙化' or cls == '乳头影':
                    s = '{} {:f} {:f} {:f} {:f} {:f}'.format(
                        '肿块结节钙化', dets[k, -1], dets[k, 0], dets[k, 1],
                        dets[k, 2], dets[k, 3])
                    fpred.write(s)
                    fpred.write('\n')

                if cls == '纤维化表现' or cls == '肺实变' or cls == '肺纹理增多' or cls == '肿块' or cls == '弥漫性结节':
                    s = '{} {:f} {:f} {:f} {:f} {:f}'.format(
                        '高密度影', dets[k, -1], dets[k, 0], dets[k, 1],
                        dets[k, 2], dets[k, 3])
                    fpred.write(s)
                    fpred.write('\n')

                if cls == '气胸' or cls == '气肿':
                    s = '{} {:f} {:f} {:f} {:f} {:f}'.format(
                        '低密度影', dets[k, -1], dets[k, 0], dets[k, 1],
                        dets[k, 2], dets[k, 3])
                    fpred.write(s)
                    fpred.write('\n')
            if gt_boxes.shape[0] > 0:  # then we draw gt boxes and pred boxes
                im = cv2.imread(entry['image'])
                more_text = str(entry['eva_id']) + ' ' + entry['doc_name']
                im = vis_boxes_ljy(im, gt_boxes, dets[:, :-1], more_text)
                out_path = os.path.join(
                    '/data5/liujingyu/mask_rcnn_Outputs/vis', cls,
                    entry['file_name'])
                cv2.imwrite(out_path, im)

    pdb.set_trace()
Beispiel #23
0
def im_detect_all(model,
                  im,
                  box_proposals=None,
                  timers=None,
                  im_name_tag=None):
    """Process the outputs of model for testing
	Args:
	  model: the network module
	  im_data: Pytorch variable. Input batch to the model.
	  im_info: Pytorch variable. Input batch to the model.
	  gt_boxes: Pytorch variable. Input batch to the model.
	  num_boxes: Pytorch variable. Input batch to the model.
	  args: arguments from command line.
	  timer: record the cost of time for different steps
	The rest of inputs are of type pytorch Variables and either input to or output from the model.
	"""
    if timers is None:
        timers = defaultdict(Timer)

    timers['im_detect_bbox'].tic()
    if cfg.TEST.BBOX_AUG.ENABLED:
        scores, boxes, im_scale, blob_conv = im_detect_bbox_aug(
            model, im, box_proposals)
    else:
        scores, boxes, im_scale, blob_conv = im_detect_bbox(
            model,
            im,
            cfg.TEST.SCALE,
            cfg.TEST.MAX_SIZE,
            box_proposals,
            file_tag_path=im_name_tag)
    timers['im_detect_bbox'].toc()

    dict_i = {}
    path = "/nfs/project/libo_i/IOU.pytorch/IOU_Validation"
    timers['misc_bbox'].tic()

    if cfg.FAST_RCNN.FAST_HEAD2_DEBUG:
        if cfg.FAST_RCNN.IOU_NMS:
            with open(os.path.join(path, "iou_stage1_pred_iou.json"),
                      "r") as f:
                stage1_pred_iou = np.array(json.load(f), dtype="float32")

            with open(os.path.join(path, "iou_stage1_pred_boxes.json"),
                      "r") as f:
                stage1_pred_boxes = np.array(json.load(f), dtype="float32")

            with open(os.path.join(path, "iou_stage2_pred_boxes.json"),
                      "r") as f:
                stage2_pred_boxes = np.array(json.load(f), dtype="float32")

            with open(os.path.join(path, 'iou_dets_cls.json'), 'r') as f:
                dets_cls = json.load(f)

            with open(os.path.join(path, "iou_stage1_score.json"), "r") as f:
                stage1_score = json.load(f)

            with open(os.path.join(path, "iou_stage2_score.json"), "r") as f:
                stage2_score = json.load(f)

        elif cfg.FAST_RCNN.SCORE_NMS:
            with open(os.path.join(path, "score_stage1_pred_iou.json"),
                      "r") as f:
                stage1_pred_iou = np.array(json.load(f), dtype="float32")

            with open(os.path.join(path, "score_stage1_pred_boxes.json"),
                      "r") as f:
                stage1_pred_boxes = np.array(json.load(f), dtype="float32")

            with open(os.path.join(path, "score_stage2_pred_boxes.json"),
                      "r") as f:
                stage2_pred_boxes = np.array(json.load(f), dtype="float32")

            with open(os.path.join(path, 'score_dets_cls.json'), 'r') as f:
                dets_cls = json.load(f)

            with open(os.path.join(path, "score_stage1_score.json"), "r") as f:
                stage1_score = json.load(f)

            with open(os.path.join(path, "score_stage2_score.json"), "r") as f:
                stage2_score = json.load(f)

        dict_i['stage1_out'] = stage1_pred_boxes
        dict_i['shift_iou'] = stage1_pred_iou
        dict_i['stage1_score'] = stage1_score
        dict_i['stage2_score'] = stage2_score
        dict_i['stage2_out'] = stage2_pred_boxes

        stage1_score = np.array(stage1_score, dtype=np.float32)
        if cfg.FAST_RCNN.IOU_NMS:
            scores, boxes, cls_boxes = iou_box_nms_and_limit(
                stage1_pred_boxes, stage1_pred_iou, dets_cls, stage1_score)
        elif cfg.FAST_RCNN.SCORE_NMS:
            scores, boxes, cls_boxes = iou_box_nms_and_limit(
                stage1_pred_boxes, stage1_score, dets_cls, stage1_score)

        logger.info("Length of inds is {}".format(boxes.shape[0]))
        dict_i['boxes'] = boxes
        dict_i['score'] = scores

    # score and boxes are from the whole image after score thresholding and nms
    # (they are not separated by class) (numpy.ndarray)
    # cls_boxes boxes and scores are separated by class and in the format used
    # for evaluating results
    else:
        scores, boxes, cls_boxes = box_results_with_nms_and_limit(
            scores, boxes)
        logger.info("Length of inds is {}".format(boxes.shape[0]))

    timers['misc_bbox'].toc()

    if cfg.TEST.IOU_OUT:

        with open(os.path.join(path, "shifted_boxes.json"), "r") as f:
            pred_boxes = np.array(json.load(f), dtype="float32")

        with open(os.path.join(path, "raw_roi.json"), "r") as f:
            raw_roi = np.array(json.load(f), dtype="float32")

        with open(os.path.join(path, "rois_score.json"), "r") as f:
            rpn_score = np.array(json.load(f), dtype="float32")

        with open(os.path.join(path, "shifted_boxes_scores.json"), "r") as f:
            pred_box_scores = np.array(json.load(f), dtype="float32")

        roi_to_shift = predbox_roi_iou(raw_roi, pred_boxes)

        # 顾老师的NMS流程,不知道加不加上?
        if cfg.FAST_RCNN.IOU_NMS:
            bbox_with_score = np.hstack(
                (raw_roi, roi_to_shift[:, np.newaxis])).astype(np.float32,
                                                               copy=False)
        else:
            bbox_with_score = np.hstack(
                (raw_roi, rpn_score[:, np.newaxis])).astype(np.float32,
                                                            copy=False)

        keep = box_utils.nms(bbox_with_score, cfg.TEST.NMS)

        dict_i['rois'] = raw_roi
        dict_i['shift_iou'] = roi_to_shift.tolist()
        dict_i['rois_score'] = rpn_score.tolist()
        dict_i['pred_boxes'] = pred_boxes
        dict_i['keep'] = keep
        dict_i['pred_boxes_scores'] = pred_box_scores.tolist()

    if cfg.MODEL.MASK_ON and boxes.shape[0] > 0:
        timers['im_detect_mask'].tic()
        if cfg.TEST.MASK_AUG.ENABLED:
            masks = im_detect_mask_aug(model, im, boxes, im_scale, blob_conv)
        else:
            masks = im_detect_mask(model, im_scale, boxes, blob_conv)
        timers['im_detect_mask'].toc()

        timers['misc_mask'].tic()
        cls_segms = segm_results(cls_boxes, masks, boxes, im.shape[0],
                                 im.shape[1])
        timers['misc_mask'].toc()
    else:
        cls_segms = None

    if cfg.MODEL.KEYPOINTS_ON and boxes.shape[0] > 0:
        timers['im_detect_keypoints'].tic()
        if cfg.TEST.KPS_AUG.ENABLED:
            heatmaps = im_detect_keypoints_aug(model, im, boxes, im_scale,
                                               blob_conv)
        else:
            heatmaps = im_detect_keypoints(model, im_scale, boxes, blob_conv)
        timers['im_detect_keypoints'].toc()

        timers['misc_keypoints'].tic()
        cls_keyps = keypoint_results(cls_boxes, heatmaps, boxes)
        timers['misc_keypoints'].toc()
    else:
        cls_keyps = None

    return cls_boxes, cls_segms, cls_keyps, dict_i
Beispiel #24
0
def get_detections_from_im(cfg,
                           model,
                           im,
                           image_id,
                           featmap_blob_name,
                           feat_blob_name,
                           MIN_BOXES,
                           MAX_BOXES,
                           conf_thresh=0.2,
                           bboxes=None):

    assert conf_thresh >= 0.
    with c2_utils.NamedCudaScope(0):
        scores, cls_boxes, im_scale = infer_engine.im_detect_bbox(
            model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes=bboxes)
        num_rpn = scores.shape[0]
        region_feat = workspace.FetchBlob(feat_blob_name)
        max_conf = np.zeros((num_rpn, ), dtype=np.float32)
        max_cls = np.zeros((num_rpn, ), dtype=np.int32)
        max_box = np.zeros((num_rpn, 4), dtype=np.float32)

        for cls_ind in range(1, cfg.MODEL.NUM_CLASSES):
            cls_scores = scores[:, cls_ind]
            dets = np.hstack((cls_boxes[:, (cls_ind * 4):(cls_ind * 4 + 4)],
                              cls_scores[:, np.newaxis])).astype(np.float32)
            keep = np.array(nms(dets, cfg.TEST.NMS))
            inds_update = np.where(cls_scores[keep] > max_conf[keep])
            kinds = keep[inds_update]
            max_conf[kinds] = cls_scores[kinds]
            max_cls[kinds] = cls_ind
            max_box[kinds] = dets[kinds][:, :4]

        keep_boxes = np.where(max_conf > conf_thresh)[0]
        if len(keep_boxes) < MIN_BOXES:
            keep_boxes = np.argsort(max_conf)[::-1][:MIN_BOXES]
        elif len(keep_boxes) > MAX_BOXES:
            keep_boxes = np.argsort(max_conf)[::-1][:MAX_BOXES]

        objects = max_cls[keep_boxes]
        obj_prob = max_conf[keep_boxes]
        obj_boxes = max_box[keep_boxes, :]
        cls_prob = scores[keep_boxes, :]

    # print('{} ({}x{}): {} boxes, box size {}, feature size {}, class size {}'.format(image_id,
    #       np.size(im, 0), np.size(im, 1), len(keep_boxes), cls_boxes[keep_boxes].shape,
    #       box_features[keep_boxes].shape, objects.shape))
    # print(cls_boxes[keep_boxes][:10, :], objects[:10], obj_prob[:10])

    assert (np.sum(objects >= cfg.MODEL.NUM_CLASSES) == 0)
    # assert(np.min(obj_prob[:10])>=0.2)
    # if np.min(obj_prob) < 0.2:
    # print('confidence score too low!', np.min(obj_prob[:10]))
    # if np.max(cls_boxes[keep_boxes]) > max(np.size(im, 0), np.size(im, 1)):
    #     print('box is offscreen!', np.max(cls_boxes[keep_boxes]), np.size(im, 0), np.size(im, 1))

    return {
        "image_id": image_id,
        "image_h": np.size(im, 0),
        "image_w": np.size(im, 1),
        'num_boxes': len(keep_boxes),
        'boxes': obj_boxes,
        'region_feat': region_feat[keep_boxes, :],
        'object': objects,
        'obj_prob': obj_prob,
        'cls_prob': cls_prob
    }
Beispiel #25
0
    def proposals_for_one_image(self, im_info, all_anchors, bbox_deltas,
                                scores):
        # Get mode-dependent configuration
        cfg_key = 'TRAIN' if self._train else 'TEST'
        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE
        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #   - bbox deltas will be (4 * A, H, W) format from conv output
        #   - transpose to (H, W, 4 * A)
        #   - reshape to (H * W * A, 4) where rows are ordered by (H, W, A)
        #     in slowest to fastest order to match the enumerated anchors
        bbox_deltas = bbox_deltas.transpose((1, 2, 0)).reshape((-1, 4))

        # Same story for the scores:
        #   - scores are (A, H, W) format from conv output
        #   - transpose to (H, W, A)
        #   - reshape to (H * W * A, 1) where rows are ordered by (H, W, A)
        #     to match the order of anchors and bbox_deltas
        scores = scores.transpose((1, 2, 0)).reshape((-1, 1))

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        if pre_nms_topN <= 0 or pre_nms_topN >= len(scores):
            order = np.argsort(-scores.squeeze())
        else:
            # Avoid sorting possibly large arrays; First partition to get top K
            # unsorted and then sort just those (~20x faster for 200k scores)
            inds = np.argpartition(-scores.squeeze(),
                                   pre_nms_topN)[:pre_nms_topN]
            order = np.argsort(-scores[inds].squeeze())
            order = inds[order]
        bbox_deltas = bbox_deltas[order, :]
        all_anchors = all_anchors[order, :]
        scores = scores[order]

        # Transform anchors into proposals via bbox transformations
        proposals = box_utils.bbox_transform(all_anchors, bbox_deltas,
                                             (1.0, 1.0, 1.0, 1.0))

        # 2. clip proposals to image (may result in proposals with zero area
        # that will be removed in the next step)
        proposals = box_utils.clip_tiled_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < min_size
        keep = _filter_boxes(proposals, min_size, im_info)
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 6. apply loose nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        if nms_thresh > 0:
            keep = box_utils.nms(np.hstack((proposals, scores)), nms_thresh)
            if post_nms_topN > 0:
                keep = keep[:post_nms_topN]
            proposals = proposals[keep, :]
            scores = scores[keep]
        return proposals, scores
        print('%d/%d: %s' % (i, len(image_subset), im_name))
        im = cv2.imread(osp.join(args.data_dir, im_name))
        assert im.size != 0

        # Detect faces and regress bounding-boxes
        scores, boxes, im_scale, blob_conv = im_detect_bbox(
            net, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE)

        # Format the detection output
        cls_ind = 1
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack(
            (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
        keep = box_utils.nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        keep = np.where(dets[:, 4] > CONF_THRESH)
        dets = dets[keep]
        # (x, y, w, h)
        dets[:, 2] = dets[:, 2] - dets[:, 0] + 1
        dets[:, 3] = dets[:, 3] - dets[:, 1] + 1
        print('Num. detections: %d' % dets.shape[0])
        # if dets.size == 0: # nothing detected
        #     continue

        # Saving visualized frames
        viz_out_path = osp.join(img_output_dir, osp.basename(im_name))
        if dets.size != 0:
            im_det = draw_detection_list(im, dets.copy())
            cv2.imwrite(viz_out_path, im_det)
        txt_name = os.path.splitext(im_name)[0] + '.txt'
        dir_name, tmp_im_name = os.path.split(im_name)

        im = cv2.imread(os.path.join(data_dir, im_name))
        assert im is not None

        scores, boxes, im_scale, blob_conv = im_detect_bbox(
            net, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE)

        cls_ind = 1
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack(
            (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)

        keep = box_utils.nms(dets, cfg.TEST.NMS)
        dets = dets[keep, :]

        keep = np.where(dets[:, 4] > CONF_THRESH)
        dets = dets[keep]

        dets[:, 2] = dets[:, 2] - dets[:, 0] + 1
        dets[:, 3] = dets[:, 3] - dets[:, 1] + 1

        # Save detection results -- [x y w h score]
        dir_name, tmp_im_name = os.path.split(im_name)
        if not os.path.exists(os.path.join(det_dir, dir_name)):
            os.makedirs(os.path.join(det_dir, dir_name))

        with open(os.path.join(det_dir, txt_name), 'w') as fid:
            fid.write(im_name + '\n')
Beispiel #28
0
def im_detect_bbox(model, im, timers=None):
    """Generate RetinaNet detections on a single image."""
    if timers is None:
        timers = defaultdict(Timer)
    # Although anchors are input independent and could be precomputed,
    # recomputing them per image only brings a small overhead
    anchors = _create_cell_anchors()
    timers['im_detect_bbox'].tic()
    k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
    A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS)
    inputs = {}
    inputs['data'], inputs['im_info'] = _get_image_blob(im)
    cls_probs, box_preds = [], []
    for lvl in range(k_min, k_max + 1):
        suffix = 'fpn{}'.format(lvl)
        cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix)))
        box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix)))
    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32,
                                                        copy=False))

    workspace.RunNet(model.net.Proto().name)
    scale = inputs['im_info'][0, 2]
    cls_probs = workspace.FetchBlobs(cls_probs)
    box_preds = workspace.FetchBlobs(box_preds)

    # here the boxes_all are [x0, y0, x1, y1, score]
    boxes_all = defaultdict(list)

    cnt = 0
    for lvl in range(k_min, k_max + 1):
        # create cell anchors array
        stride = 2.**lvl
        cell_anchors = anchors[lvl]

        # fetch per level probability
        cls_prob = cls_probs[cnt]
        box_pred = box_preds[cnt]
        cls_prob = cls_prob.reshape(
            (cls_prob.shape[0], A, int(cls_prob.shape[1] / A),
             cls_prob.shape[2], cls_prob.shape[3]))
        box_pred = box_pred.reshape(
            (box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3]))
        cnt += 1

        if cfg.RETINANET.SOFTMAX:
            cls_prob = cls_prob[:, :, 1::, :, :]

        cls_prob_ravel = cls_prob.ravel()
        # In some cases [especially for very small img sizes], it's possible that
        # candidate_ind is empty if we impose threshold 0.05 at all levels. This
        # will lead to errors since no detections are found for this image. Hence,
        # for lvl 7 which has small spatial resolution, we take the threshold 0.0
        th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0
        candidate_inds = np.where(cls_prob_ravel > th)[0]
        if (len(candidate_inds) == 0):
            continue

        pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds))
        inds = np.argpartition(cls_prob_ravel[candidate_inds],
                               -pre_nms_topn)[-pre_nms_topn:]
        inds = candidate_inds[inds]

        inds_5d = np.array(np.unravel_index(inds, cls_prob.shape)).transpose()
        classes = inds_5d[:, 2]
        anchor_ids, y, x = inds_5d[:, 1], inds_5d[:, 3], inds_5d[:, 4]
        scores = cls_prob[:, anchor_ids, classes, y, x]

        boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32)
        boxes *= stride
        boxes += cell_anchors[anchor_ids, :]

        if not cfg.RETINANET.CLASS_SPECIFIC_BBOX:
            box_deltas = box_pred[0, anchor_ids, :, y, x]
        else:
            box_cls_inds = classes * 4
            box_deltas = np.vstack([
                box_pred[0, ind:ind + 4, yi, xi]
                for ind, yi, xi in zip(box_cls_inds, y, x)
            ])
        pred_boxes = (box_utils.bbox_transform(boxes, box_deltas)
                      if cfg.TEST.BBOX_REG else boxes)
        pred_boxes /= scale
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
        box_scores = np.zeros((pred_boxes.shape[0], 5))
        box_scores[:, 0:4] = pred_boxes
        box_scores[:, 4] = scores

        for cls in range(1, cfg.MODEL.NUM_CLASSES):
            inds = np.where(classes == cls - 1)[0]
            if len(inds) > 0:
                boxes_all[cls].extend(box_scores[inds, :])
    timers['im_detect_bbox'].toc()

    # Combine predictions across all levels and retain the top scoring by class
    timers['misc_bbox'].tic()
    detections = []
    for cls, boxes in boxes_all.items():
        cls_dets = np.vstack(boxes).astype(dtype=np.float32)
        # do class specific nms here
        keep = box_utils.nms(cls_dets, cfg.TEST.NMS)
        cls_dets = cls_dets[keep, :]
        out = np.zeros((len(keep), 6))
        out[:, 0:5] = cls_dets
        out[:, 5].fill(cls)
        detections.append(out)

    # detections (N, 6) format:
    #   detections[:, :4] - boxes
    #   detections[:, 4] - scores
    #   detections[:, 5] - classes
    detections = np.vstack(detections)
    # sort all again
    inds = np.argsort(-detections[:, 4])
    detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :]

    # Convert the detections to image cls_ format (see core/test_engine.py)
    num_classes = cfg.MODEL.NUM_CLASSES
    cls_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)]
    for c in range(1, num_classes):
        inds = np.where(detections[:, 5] == c)[0]
        cls_boxes[c] = detections[inds, :5]
    timers['misc_bbox'].toc()

    return cls_boxes
Beispiel #29
0
def box_results_with_nms_and_limit(scores, boxes,
                                   num_classes=81,
                                   score_thresh=0.05,
                                   overlap_thresh=0.5,
                                   do_soft_nms=False,
                                   soft_nms_sigma=0.5,
                                   soft_nms_method='linear',
                                   do_bbox_vote=False,
                                   bbox_vote_thresh=0.8,
                                   bbox_vote_method='ID',
                                   max_detections_per_img=100, ### over all classes ###
                                   ):
    """Returns bounding-box detection results by thresholding on scores and
    applying non-maximum suppression (NMS).
    
    A number of #detections presist after this and are returned, sorted by class

    `boxes` has shape (#detections, 4 * #classes), where each row represents
    a list of predicted bounding boxes for each of the object classes in the
    dataset (including the background class). The detections in each row
    originate from the same object proposal.

    `scores` has shape (#detection, #classes), where each row represents a list
    of object detection confidence scores for each of the object classes in the
    dataset (including the background class). `scores[i, j]`` corresponds to the
    box at `boxes[i, j * 4:(j + 1) * 4]`.
    """
    cls_boxes = [[] for _ in range(num_classes)]
    # Apply threshold on detection probabilities and apply NMS
    # Skip j = 0, because it's the background class
    for j in range(1, num_classes):
        inds = np.where(scores[:, j] > score_thresh)[0]
        scores_j = scores[inds, j]
        boxes_j = boxes[inds, j * 4:(j + 1) * 4]
        dets_j = np.hstack((boxes_j, scores_j[:, np.newaxis])).astype(
            np.float32, copy=False
        )
        if do_soft_nms:
            nms_dets, _ = box_utils.soft_nms(
                dets_j,
                sigma=soft_nms_sigma,
                overlap_thresh=overlap_thresh,
                score_thresh=0.0001,
                method=soft_nms_method
            )
        else:
            keep = box_utils.nms(dets_j, overlap_thresh)
            nms_dets = dets_j[keep, :]
        # Refine the post-NMS boxes using bounding-box voting
        if do_bbox_vote:
            nms_dets = box_utils.box_voting(
                nms_dets,
                dets_j,
                bbox_vote_thresh,
                scoring_method=bbox_vote_method
            )
        cls_boxes[j] = nms_dets

    # Limit to max_per_image detections **over all classes**
    if max_detections_per_img > 0:
        image_scores = np.hstack(
            [cls_boxes[j][:, -1] for j in range(1, num_classes)]
        )
        if len(image_scores) > max_detections_per_img:
            image_thresh = np.sort(image_scores)[-max_detections_per_img]
            for j in range(1, num_classes):
                keep = np.where(cls_boxes[j][:, -1] >= image_thresh)[0]
                cls_boxes[j] = cls_boxes[j][keep, :]

    im_results = np.vstack([cls_boxes[j] for j in range(1, num_classes)])
    boxes = im_results[:, :-1]
    scores = im_results[:, -1]
    return scores, boxes, cls_boxes