def box_results_with_nms_and_limit(scores, boxes, confs=None): """Returns bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). `boxes` has shape (#detections, 4 * #classes), where each row represents a list of predicted bounding boxes for each of the object classes in the dataset (including the background class). The detections in each row originate from the same object proposal. `scores` has shape (#detection, #classes), where each row represents a list of object detection confidence scores for each of the object classes in the dataset (including the background class). `scores[i, j]`` corresponds to the box at `boxes[i, j * 4:(j + 1) * 4]`. """ num_classes = cfg.MODEL.NUM_CLASSES cls_boxes = [[] for _ in range(num_classes)] # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class for j in range(1, num_classes): inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0] scores_j = scores[inds, j] boxes_j = boxes[inds, j * 4:(j + 1) * 4] dets_j = np.hstack((boxes_j, scores_j[:, np.newaxis])).astype(np.float32, copy=False) if cfg.STD_NMS: confs_j = confs[inds, j * 4:(j + 1) * 4] nms_dets, _ = py_nms_utils.soft(dets_j, confidence=confs_j) elif cfg.TEST.SOFT_NMS.ENABLED: nms_dets, _ = box_utils.soft_nms(dets_j, sigma=cfg.TEST.SOFT_NMS.SIGMA, overlap_thresh=cfg.TEST.NMS, score_thresh=0.0001, method=cfg.TEST.SOFT_NMS.METHOD) else: keep = box_utils.nms(dets_j, cfg.TEST.NMS) nms_dets = dets_j[keep, :] # Refine the post-NMS boxes using bounding-box voting if cfg.TEST.BBOX_VOTE.ENABLED: nms_dets = box_utils.box_voting( nms_dets, dets_j, cfg.TEST.BBOX_VOTE.VOTE_TH, scoring_method=cfg.TEST.BBOX_VOTE.SCORING_METHOD) cls_boxes[j] = nms_dets # Limit to max_per_image detections **over all classes** if cfg.TEST.DETECTIONS_PER_IM > 0: image_scores = np.hstack( [cls_boxes[j][:, -1] for j in range(1, num_classes)]) if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM: image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM] for j in range(1, num_classes): keep = np.where(cls_boxes[j][:, -1] >= image_thresh)[0] cls_boxes[j] = cls_boxes[j][keep, :] im_results = np.vstack([cls_boxes[j] for j in range(1, num_classes)]) boxes = im_results[:, :-1] scores = im_results[:, -1] return scores, boxes, cls_boxes
def im_detect_bbox(model, im, timers=None): """Generate RetinaNet detections on a single image.""" if timers is None: timers = defaultdict(Timer) # Although anchors are input independent and could be precomputed, # recomputing them per image only brings a small overhead anchors = _create_cell_anchors() timers['im_detect_bbox'].tic() k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS) inputs = {} inputs['data'], im_scale, inputs['im_info'] = \ blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE) cls_probs, box_preds = [], [] for lvl in range(k_min, k_max + 1): suffix = 'fpn{}'.format(lvl) cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix))) box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix))) for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False)) workspace.RunNet(model.net.Proto().name) cls_probs = workspace.FetchBlobs(cls_probs) box_preds = workspace.FetchBlobs(box_preds) # here the boxes_all are [x0, y0, x1, y1, score] boxes_all = defaultdict(list) cnt = 0 for lvl in range(k_min, k_max + 1): # create cell anchors array stride = 2.**lvl cell_anchors = anchors[lvl] # fetch per level probability cls_prob = cls_probs[cnt] box_pred = box_preds[cnt] cls_prob = cls_prob.reshape( (cls_prob.shape[0], A, int(cls_prob.shape[1] / A), cls_prob.shape[2], cls_prob.shape[3])) box_pred = box_pred.reshape( (box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3])) cnt += 1 if cfg.RETINANET.SOFTMAX: cls_prob = cls_prob[:, :, 1::, :, :] cls_prob_ravel = cls_prob.ravel() # In some cases [especially for very small img sizes], it's possible that # candidate_ind is empty if we impose threshold 0.05 at all levels. This # will lead to errors since no detections are found for this image. Hence, # for lvl 7 which has small spatial resolution, we take the threshold 0.0 th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0 candidate_inds = np.where(cls_prob_ravel > th)[0] if (len(candidate_inds) == 0): continue pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds)) inds = np.argpartition(cls_prob_ravel[candidate_inds], -pre_nms_topn)[-pre_nms_topn:] inds = candidate_inds[inds] inds_5d = np.array(np.unravel_index(inds, cls_prob.shape)).transpose() classes = inds_5d[:, 2] anchor_ids, y, x = inds_5d[:, 1], inds_5d[:, 3], inds_5d[:, 4] scores = cls_prob[:, anchor_ids, classes, y, x] boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32) boxes *= stride boxes += cell_anchors[anchor_ids, :] if not cfg.RETINANET.CLASS_SPECIFIC_BBOX: box_deltas = box_pred[0, anchor_ids, :, y, x] else: box_cls_inds = classes * 4 box_deltas = np.vstack([ box_pred[0, ind:ind + 4, yi, xi] for ind, yi, xi in zip(box_cls_inds, y, x) ]) pred_boxes = (box_utils.bbox_transform(boxes, box_deltas) if cfg.TEST.BBOX_REG else boxes) pred_boxes /= im_scale pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape) box_scores = np.zeros((pred_boxes.shape[0], 5)) box_scores[:, 0:4] = pred_boxes box_scores[:, 4] = scores for cls in range(1, cfg.MODEL.NUM_CLASSES): inds = np.where(classes == cls - 1)[0] if len(inds) > 0: boxes_all[cls].extend(box_scores[inds, :]) timers['im_detect_bbox'].toc() # Combine predictions across all levels and retain the top scoring by class timers['misc_bbox'].tic() detections = [] for cls, boxes in boxes_all.items(): cls_dets = np.vstack(boxes).astype(dtype=np.float32) # do class specific nms here # keep = box_utils.nms(cls_dets, cfg.TEST.NMS) # cls_dets = cls_dets[keep, :] if cfg.TEST.SOFT_NMS.ENABLED: nms_dets, _ = box_utils.soft_nms(cls_dets, sigma=cfg.TEST.SOFT_NMS.SIGMA, overlap_thresh=cfg.TEST.NMS, score_thresh=0.0001, method=cfg.TEST.SOFT_NMS.METHOD) else: keep = box_utils.nms(cls_dets, cfg.TEST.NMS) nms_dets = cls_dets[keep, :] # Refine the post-NMS boxes using bounding-box voting if cfg.TEST.BBOX_VOTE.ENABLED: nms_dets = box_utils.box_voting( nms_dets, cls_dets, cfg.TEST.BBOX_VOTE.VOTE_TH, scoring_method=cfg.TEST.BBOX_VOTE.SCORING_METHOD) out = np.zeros((len(nms_dets), 6)) out[:, 0:5] = nms_dets out[:, 5].fill(cls) detections.append(out) # detections (N, 6) format: # detections[:, :4] - boxes # detections[:, 4] - scores # detections[:, 5] - classes detections = np.vstack(detections) # sort all again inds = np.argsort(-detections[:, 4]) detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :] # Convert the detections to image cls_ format (see core/test_engine.py) num_classes = cfg.MODEL.NUM_CLASSES cls_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)] for c in range(1, num_classes): inds = np.where(detections[:, 5] == c)[0] cls_boxes[c] = detections[inds, :5] timers['misc_bbox'].toc() return cls_boxes
def box_results_with_nms_and_limit(scores, boxes, feats=None): """Returns bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). `boxes` has shape (#detections, 4 * #classes), where each row represents a list of predicted bounding boxes for each of the object classes in the dataset (including the background class). The detections in each row originate from the same object proposal. `scores` has shape (#detection, #classes), where each row represents a list of object detection confidence scores for each of the object classes in the dataset (including the background class). `scores[i, j]`` corresponds to the box at `boxes[i, j * 4:(j + 1) * 4]`. """ num_classes = cfg.MODEL.NUM_CLASSES cls_boxes = [[] for _ in range(num_classes)] feat_pointers = [ [] for _ in range(num_classes) ] # shadows cls_boxes, remembering the pointers to the corresponding feats. # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class for j in range(1, num_classes): inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0] scores_j = scores[inds, j] boxes_j = boxes[inds, j * 4:(j + 1) * 4] dets_j = np.hstack((boxes_j, scores_j[:, np.newaxis])).astype(np.float32, copy=False) if cfg.TEST.SOFT_NMS.ENABLED: nms_dets, _ = box_utils.soft_nms(dets_j, sigma=cfg.TEST.SOFT_NMS.SIGMA, overlap_thresh=cfg.TEST.NMS, score_thresh=0.0001, method=cfg.TEST.SOFT_NMS.METHOD) else: keep = box_utils.nms(dets_j, cfg.TEST.NMS) nms_dets = dets_j[keep, :] if feats is not None: feat_pointers[j] = keep # Refine the post-NMS boxes using bounding-box voting if cfg.TEST.BBOX_VOTE.ENABLED: nms_dets = box_utils.box_voting( nms_dets, dets_j, cfg.TEST.BBOX_VOTE.VOTE_TH, scoring_method=cfg.TEST.BBOX_VOTE.SCORING_METHOD) cls_boxes[j] = nms_dets if feats is not None: # Before limiting the number of detections, first collect global image class distribution: sum_softmax = np.zeros((num_classes, )) for j in range(1, num_classes): sum_softmax[j] = cls_boxes[j][:, -1].sum() # print('ndetects, sum_probs, min_prob, nclasses:',sum([len(subl) for subl in cls_boxes]), sum_softmax.sum(), sum_softmax.min(), sum([ss != 0 for ss in sum_softmax])) else: sum_softmax = None topk_feats = None # Limit to max_per_image detections **over all classes** if cfg.TEST.DETECTIONS_PER_IM > 0: image_scores = np.hstack( [cls_boxes[j][:, -1] for j in range(1, num_classes)]) if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM: sorted_scores = np.sort(image_scores) image_thresh = sorted_scores[-cfg.TEST.DETECTIONS_PER_IM] for j in range(1, num_classes): keep = np.where(cls_boxes[j][:, -1] >= image_thresh)[0] cls_boxes[j] = cls_boxes[j][keep, :] if feats is not None: feat_pointers[j] = feat_pointers[j][keep] if feats is not None: image_thresh = max( 0.5, sorted_scores[-8] ) # at most the top 8 objects from each image. for j in range(1, num_classes): keep = np.where(cls_boxes[j][:, -1] >= image_thresh)[0] feat_pointers[j] = feat_pointers[j][keep] feat_pointers = list( set([p for points in feat_pointers for p in points])) topk_feats = feats[feat_pointers] im_results = np.vstack([cls_boxes[j] for j in range(1, num_classes)]) boxes = im_results[:, :-1] scores = im_results[:, -1] return scores, boxes, cls_boxes, sum_softmax, topk_feats
def box_results_with_nms_and_limit(scores, boxes): """Returns bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). `boxes` has shape (#detections, 4 * #classes), where each row represents a list of predicted bounding boxes for each of the object classes in the dataset (including the background class). The detections in each row originate from the same object proposal. `scores` has shape (#detection, #classes), where each row represents a list of object detection confidence scores for each of the object classes in the dataset (including the background class). `scores[i, j]`` corresponds to the box at `boxes[i, j * 4:(j + 1) * 4]`. """ num_classes = cfg.MODEL.NUM_CLASSES cls_boxes = [[] for _ in range(num_classes)] # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class for j in range(1, num_classes): inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0] scores_j = scores[inds, j] boxes_j = boxes[inds, j * 4:(j + 1) * 4] dets_j = np.hstack((boxes_j, scores_j[:, np.newaxis])).astype( np.float32, copy=False ) if cfg.TEST.SOFT_NMS.ENABLED: nms_dets, _ = box_utils.soft_nms( dets_j, sigma=cfg.TEST.SOFT_NMS.SIGMA, overlap_thresh=cfg.TEST.NMS, score_thresh=0.0001, method=cfg.TEST.SOFT_NMS.METHOD ) else: keep = box_utils.nms(dets_j, cfg.TEST.NMS) nms_dets = dets_j[keep, :] # Refine the post-NMS boxes using bounding-box voting if cfg.TEST.BBOX_VOTE.ENABLED: nms_dets = box_utils.box_voting( nms_dets, dets_j, cfg.TEST.BBOX_VOTE.VOTE_TH, scoring_method=cfg.TEST.BBOX_VOTE.SCORING_METHOD ) cls_boxes[j] = nms_dets # Limit to max_per_image detections **over all classes** if cfg.TEST.DETECTIONS_PER_IM > 0: image_scores = np.hstack( [cls_boxes[j][:, -1] for j in range(1, num_classes)] ) if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM: image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM] for j in range(1, num_classes): keep = np.where(cls_boxes[j][:, -1] >= image_thresh)[0] cls_boxes[j] = cls_boxes[j][keep, :] im_results = np.vstack([cls_boxes[j] for j in range(1, num_classes)]) boxes = im_results[:, :-1] scores = im_results[:, -1] return scores, boxes, cls_boxes