def box_results_with_nms_and_limit(scores, boxes): """Returns bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). `boxes` has shape (#detections, 4 * #classes), where each row represents a list of predicted bounding boxes for each of the object classes in the dataset (including the background class). The detections in each row originate from the same object proposal. `scores` has shape (#detection, #classes), where each row represents a list of object detection confidence scores for each of the object classes in the dataset (including the background class). `scores[i, j]`` corresponds to the box at `boxes[i, j * 4:(j + 1) * 4]`. """ num_classes = cfg.MODEL.NUM_CLASSES time_dim = boxes.shape[-1] // (num_classes * 4) cls_boxes = [[] for _ in range(num_classes)] # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class for j in range(1, num_classes): inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0] scores_j = scores[inds, j] boxes_j = boxes[inds, j * 4 * time_dim:(j + 1) * 4 * time_dim] dets_j = np.hstack((boxes_j, scores_j[:, np.newaxis])).astype( np.float32, copy=False) if cfg.TEST.SOFT_NMS.ENABLED: # Not implemented for time_dim > 1 nms_dets = soft_nms( dets_j, sigma=cfg.TEST.SOFT_NMS.SIGMA, overlap_thresh=cfg.TEST.NMS, score_thresh=0.0001, method=cfg.TEST.SOFT_NMS.METHOD) else: keep = nms(dets_j, cfg.TEST.NMS) nms_dets = dets_j[keep, :] # Refine the post-NMS boxes using bounding-box voting if cfg.TEST.BBOX_VOTE.ENABLED: nms_dets = box_utils.box_voting( nms_dets, dets_j, cfg.TEST.BBOX_VOTE.VOTE_TH) cls_boxes[j] = nms_dets # Limit to max_per_image detections **over all classes** if cfg.TEST.DETECTIONS_PER_IM > 0: image_scores = np.hstack( [cls_boxes[j][:, -1] for j in range(1, num_classes)]) if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM: image_thresh = np.sort( image_scores)[-cfg.TEST.DETECTIONS_PER_IM] for j in range(1, num_classes): keep = np.where(cls_boxes[j][:, -1] >= image_thresh)[0] cls_boxes[j] = cls_boxes[j][keep, :] im_results = np.vstack([cls_boxes[j] for j in range(1, num_classes)]) boxes = im_results[:, :-1] scores = im_results[:, -1] return scores, boxes, cls_boxes
def apply_nms(all_boxes, thresh): """Apply non-maximum suppression to all predicted boxes output by the test_net method. """ num_classes = len(all_boxes) num_images = len(all_boxes[0]) nms_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)] for cls_ind in range(num_classes): for im_ind in range(num_images): dets = all_boxes[cls_ind][im_ind] if dets == []: continue keep = nms(dets, thresh) if len(keep) == 1: continue nms_boxes[cls_ind][im_ind] = dets[keep, :].copy() return nms_boxes
def extract_nms_detections(self, regions): if not regions: return [], [] boxes = [] for region in regions: boxes.append([region.cx - region.w * 0.5, region.cy - region.h * 0.5, region.cx + region.w * 0.5, region.cy + region.h * 0.5, region.p]) boxes = np.array(boxes, dtype=np.float32) keep = nms(boxes, 0.6) boxes = boxes[keep, :] boxes = boxes[np.argsort(boxes[:, 4])[::-1], :].tolist() detections = [] scores = [] for box in boxes: detections.append([0, box[0], box[1], box[2], box[3]]) scores.append(box[4]) return detections, scores
def extract_nms_detections(self, regions): if not regions: return [], [] boxes = [] for region in regions: boxes.append([ region.cx - region.w * 0.5, region.cy - region.h * 0.5, region.cx + region.w * 0.5, region.cy + region.h * 0.5, region.p ]) boxes = np.array(boxes, dtype=np.float32) keep = nms(boxes, 0.6) boxes = boxes[keep, :] boxes = boxes[np.argsort(boxes[:, 4])[::-1], :].tolist() detections = [] scores = [] for box in boxes: detections.append([0, box[0], box[1], box[2], box[3]]) scores.append(box[4]) return detections, scores
def forward(self, bottom, top): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) assert bottom[0].data.shape[0] == 1, \ 'Only single item batches are supported' t['total'].tic() # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = bottom[0].data[:, :, :, :] bbox_deltas = bottom[1].data im_info = bottom[2].data[0, :] if DEBUG: print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scale: {}'.format(im_info[2])) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] if DEBUG: print('score map size: {}'.format(scores.shape)) # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, self._num_classes + 1, self._num_anchors)) scores = scores.transpose((0, 2, 1)).reshape((-1, self._num_classes + 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image if self._clip_proposals: proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, self._min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep, :] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) t['sort'].tic() order = np.sum(scores[:, 1:], axis=1).ravel().argsort()[::-1] t['sort'].toc() if self._pre_nms_topN > 0: order = order[:self._pre_nms_topN] proposals = proposals[order, :] scores = scores[order, :] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) t['nms'].tic() fg_scores = np.sum(scores[:, 1:], axis=1).reshape((-1, 1)) keep = nms(np.hstack((proposals, fg_scores)), self._nms_thresh) t['nms'].toc() if self._post_nms_topN > 0: keep = keep[:self._post_nms_topN] proposals = proposals[keep, :] scores = scores[keep, :] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) top[0].reshape(*(blob.shape)) top[0].data[...] = blob t['total'].toc() # print('ptl total: {:.4f}, nms: {:.4f}, sort: {:.4f}'.format(t['total'].average_time, # t['nms'].average_time, # t['sort'].average_time)) # [Optional] output scores blob if len(top) > 1: top[1].reshape(*(scores.shape)) top[1].data[...] = scores
def forward(self, bottom, top): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) assert bottom[0].data.shape[0] == 1, \ 'Only single item batches are supported' t['total'].tic() # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = bottom[0].data[:, :, :, :] bbox_deltas = bottom[1].data im_info = bottom[2].data[0, :] if DEBUG: print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scale: {}'.format(im_info[2])) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] if DEBUG: print('score map size: {}'.format(scores.shape)) # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape( (-1, self._num_classes + 1, self._num_anchors)) scores = scores.transpose((0, 2, 1)).reshape( (-1, self._num_classes + 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image if self._clip_proposals: proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, self._min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep, :] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) t['sort'].tic() order = np.sum(scores[:, 1:], axis=1).ravel().argsort()[::-1] t['sort'].toc() if self._pre_nms_topN > 0: order = order[:self._pre_nms_topN] proposals = proposals[order, :] scores = scores[order, :] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) t['nms'].tic() fg_scores = np.sum(scores[:, 1:], axis=1).reshape((-1, 1)) keep = nms(np.hstack((proposals, fg_scores)), self._nms_thresh) t['nms'].toc() if self._post_nms_topN > 0: keep = keep[:self._post_nms_topN] proposals = proposals[keep, :] scores = scores[keep, :] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) top[0].reshape(*(blob.shape)) top[0].data[...] = blob t['total'].toc() # print('ptl total: {:.4f}, nms: {:.4f}, sort: {:.4f}'.format(t['total'].average_time, # t['nms'].average_time, # t['sort'].average_time)) # [Optional] output scores blob if len(top) > 1: top[1].reshape(*(scores.shape)) top[1].data[...] = scores
def test_image_collection(net, model, image_collection, output_dir): max_per_image = cfg.TEST.MAX_PER_IMAGE SCORE_THRESH = 0.05 _t = {'im_detect': Timer(), 'misc': Timer()} all_detections = {} for indx, sample in enumerate(DirectIterator(image_collection)): image_basename = str( PurePath(sample.id).relative_to(image_collection.imgs_path)) _t['im_detect'].tic() scores, boxes = im_detect(net, model, sample) _t['im_detect'].toc() _t['misc'].tic() scores_class = scores.argmax(axis=1) cls_scores = scores.max(axis=1) mask = (scores_class > 0) * (cls_scores > SCORE_THRESH) inds = np.where(mask == True)[0] if np.sum(mask): # print(inds, scores_class) cls_boxes = [] for bindx in inds: # print(indx, scores_class[indx]) j = int(scores_class[bindx]) cls_boxes.append(boxes[bindx, j * 4:(j + 1) * 4]) cls_boxes = np.array(cls_boxes) detections = \ np.hstack((cls_boxes, cls_scores[mask, np.newaxis], scores_class[mask].reshape((-1,1)))) \ .astype(np.float32, copy=False) keep = nms(detections[:, :5], cfg.TEST.FINAL_NMS) detections = detections[keep] json_detections = to_json_format(detections) else: json_detections = [] # json_detections = [] # for j in range(1, scores.shape[1]): # inds = np.where(scores[:, j] > SCORE_THRESH)[0] # cls_scores = scores[inds, j] # cls_boxes = boxes[inds, j*4:(j+1)*4] # top_inds = np.argsort(-cls_scores)[:max_per_image] # cls_scores = cls_scores[top_inds] # cls_boxes = cls_boxes[top_inds, :] # # detections = \ # np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ # .astype(np.float32, copy=False) # # keep = nms(detections, cfg.TEST.FINAL_NMS) # detections = detections[keep] # # json_detections += to_json_format(detections, j) all_detections[image_basename] = json_detections if cfg.TEST.VIZUALIZATION.ENABLE: score_thresh = cfg.TEST.VIZUALIZATION.SCORE_THRESH viz_output_path = os.path.join(output_dir, 'viz', image_basename) viz_output_dir = os.path.dirname(viz_output_path) if not os.path.exists(viz_output_dir): os.makedirs(viz_output_dir) draw_boxes = [ box for box in json_detections if box['score'] >= score_thresh ] if not cfg.TEST.VIZUALIZATION.ONLY_WITH_OBJECTS or draw_boxes: image = sample.bgr_data if cfg.TEST.VIZUALIZATION.DRAW_BOXES: image = plot_bboxes( image, draw_boxes, show_scores=cfg.TEST.VIZUALIZATION.DRAW_SCORES, line_width=1) cv2.imwrite(viz_output_path, image) _t['misc'].toc() print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(indx + 1, len(image_collection), _t['im_detect'].average_time, _t['misc'].average_time)) yield all_detections
def proposals_for_one_image(self, im_info, all_anchors, bbox_deltas, scores, frames_per_vid): # Get mode-dependent configuration cfg_key = 'TRAIN' if self._train else 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # - bbox deltas will be (4 * A * frames_per_vid, H, W) format from # conv output # - transpose to (H, W, 4 * A * frames_per_vid) # - reshape to (H * W * A, 4 * frames_per_vid) where rows are ordered # by (H, W, A) in slowest to fastest order to match the enumerated # anchors bbox_deltas = bbox_deltas.transpose((1, 2, 0)).reshape( (-1, 4 * frames_per_vid)) # Same story for the scores: # - scores are (A, H, W) format from conv output # This computes the score for the tube # - transpose to (H, W, A) # - reshape to (H * W * A, 1) where rows are ordered by (H, W, A) # to match the order of anchors and bbox_deltas scores = scores.transpose((1, 2, 0)).reshape((-1, 1)) # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) if pre_nms_topN <= 0 or pre_nms_topN > len(scores): order = np.argsort(-scores.squeeze()) else: # Avoid sorting possibly large arrays; First partition to get top K # unsorted and then sort just those (~20x faster for 200k scores) inds = np.argpartition(-scores.squeeze(), pre_nms_topN)[:pre_nms_topN] order = np.argsort(-scores[inds].squeeze()) order = inds[order] bbox_deltas = bbox_deltas[order, :] all_anchors = all_anchors[order, :] scores = scores[order] # 1. Transform anchors into proposals via bbox transformations proposals = box_utils.bbox_transform(all_anchors, bbox_deltas, (1.0, 1.0, 1.0, 1.0)) # 2. clip proposals to image (may result in proposals with zero area # that will be removed in the next step) proposals = box_utils.clip_tiled_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < min_size # again, needs to be done one each frame and need to "AND" over frames keep = np.arange(proposals.shape[0]) for frame_id in range(frames_per_vid): keep = np.intersect1d( keep, _filter_boxes(proposals[:, frame_id * 4:(frame_id + 1) * 4], min_size, im_info)) proposals = proposals[keep, :] scores = scores[keep] # 6. apply loose nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) if nms_thresh > 0: # When we're training on multiple GPUs, running NMS on the GPU # causes serious perf issues. We need to debug, but for now force # running on the CPU when training keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] return proposals, scores
def test_image_collection(net, model, image_collection, output_dir, prev_marking=None): max_per_image = cfg.TEST.MAX_PER_IMAGE SCORE_THRESH = 0.05 _t = {'im_detect' : Timer(), 'misc' : Timer()} all_detections = {} if prev_marking is None else prev_marking pbar = tqdm(total=len(image_collection)) for indx, sample in enumerate(DirectIterator(image_collection)): image_basename = str(PurePath(sample.id).relative_to(image_collection.imgs_path)) if image_basename in all_detections: pbar.update(1) continue _t['im_detect'].tic() scores, boxes = im_detect(net, model, sample) _t['im_detect'].toc() _t['misc'].tic() # scores_class = scores.argmax(axis=1) # cls_scores = scores.max(axis=1) # mask = (scores_class > 0) * (cls_scores > SCORE_THRESH) # inds = np.where(mask == True)[0] # if np.sum(mask): # # print(inds, scores_class) # cls_boxes = [] # for bindx in inds: # # print(indx, scores_class[indx]) # j = int(scores_class[bindx]) # cls_boxes.append(boxes[bindx, j*4:(j+1)*4]) # cls_boxes = np.array(cls_boxes) # detections = \ # np.hstack((cls_boxes, cls_scores[mask, np.newaxis], scores_class[mask].reshape((-1,1)))) \ # .astype(np.float32, copy=False) # keep = nms(detections[:, :5], cfg.TEST.FINAL_NMS) # detections = detections[keep] # json_detections = to_json_format(detections) # else: # json_detections = [] json_detections = [] for j in range(1, scores.shape[1]): inds = np.where(scores[:, j] > SCORE_THRESH)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j*4:(j+1)*4] top_inds = np.argsort(-cls_scores)[:max_per_image] cls_scores = cls_scores[top_inds] cls_boxes = cls_boxes[top_inds, :] detections = \ np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) if cfg.TEST.FINAL_NMS_ALG == 'NMS': keep = nms(detections, cfg.TEST.FINAL_NMS) elif cfg.TEST.FINAL_NMS_ALG == 'SOFT_NMS_L': keep = soft_nms(detections, sigma=0.5, Nt=cfg.TEST.FINAL_NMS, method=1) elif cfg.TEST.FINAL_NMS_ALG == 'SOFT_NMS_G': keep = soft_nms(detections, sigma=0.5, Nt=cfg.TEST.FINAL_NMS, method=2) elif cfg.TEST.FINAL_NMS_ALG == 'NONE': keep = list(range(len(detections))) else: raise ValueError('Unknown NMS algorithm: %s' % cfg.TEST.FINAL_NMS_ALG) detections = detections[keep] json_detections += to_json_format(detections, j) all_detections[image_basename] = json_detections if cfg.TEST.VIZUALIZATION.ENABLE: score_thresh = cfg.TEST.VIZUALIZATION.SCORE_THRESH viz_output_path = os.path.join(output_dir, 'viz', image_basename) viz_output_dir = os.path.dirname(viz_output_path) if not os.path.exists(viz_output_dir): os.makedirs(viz_output_dir) draw_boxes = [box for box in json_detections if box['score'] >= score_thresh] if not cfg.TEST.VIZUALIZATION.ONLY_WITH_OBJECTS or draw_boxes: image = sample.bgr_data if cfg.TEST.VIZUALIZATION.DRAW_BOXES: image = plot_bboxes(image, draw_boxes, show_scores=cfg.TEST.VIZUALIZATION.DRAW_SCORES,line_width=3) cv2.imwrite(viz_output_path, image) _t['misc'].toc() pbar.update(1) yield all_detections pbar.close()
def proposals_for_one_image( self, im_info, all_anchors, bbox_deltas, scores, frames_per_vid): # Get mode-dependent configuration cfg_key = 'TRAIN' if self._train else 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # - bbox deltas will be (4 * A * frames_per_vid, H, W) format from # conv output # - transpose to (H, W, 4 * A * frames_per_vid) # - reshape to (H * W * A, 4 * frames_per_vid) where rows are ordered # by (H, W, A) in slowest to fastest order to match the enumerated # anchors bbox_deltas = bbox_deltas.transpose((1, 2, 0)).reshape(( -1, 4 * frames_per_vid)) # Same story for the scores: # - scores are (A, H, W) format from conv output # This computes the score for the tube # - transpose to (H, W, A) # - reshape to (H * W * A, 1) where rows are ordered by (H, W, A) # to match the order of anchors and bbox_deltas scores = scores.transpose((1, 2, 0)).reshape((-1, 1)) # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) if pre_nms_topN <= 0 or pre_nms_topN > len(scores): order = np.argsort(-scores.squeeze()) else: # Avoid sorting possibly large arrays; First partition to get top K # unsorted and then sort just those (~20x faster for 200k scores) inds = np.argpartition( -scores.squeeze(), pre_nms_topN )[:pre_nms_topN] order = np.argsort(-scores[inds].squeeze()) order = inds[order] bbox_deltas = bbox_deltas[order, :] all_anchors = all_anchors[order, :] scores = scores[order] # 1. Transform anchors into proposals via bbox transformations proposals = box_utils.bbox_transform( all_anchors, bbox_deltas, (1.0, 1.0, 1.0, 1.0)) # 2. clip proposals to image (may result in proposals with zero area # that will be removed in the next step) proposals = box_utils.clip_tiled_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < min_size # again, needs to be done one each frame and need to "AND" over frames keep = np.arange(proposals.shape[0]) for frame_id in range(frames_per_vid): keep = np.intersect1d( keep, _filter_boxes( proposals[:, frame_id * 4: (frame_id + 1) * 4], min_size, im_info)) proposals = proposals[keep, :] scores = scores[keep] # 6. apply loose nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) if nms_thresh > 0: # When we're training on multiple GPUs, running NMS on the GPU # causes serious perf issues. We need to debug, but for now force # running on the CPU when training keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] return proposals, scores