def _get_blobs(im, rois, target_scale, target_max_size, pose_model=None, entry=None): """Convert an image and RoIs within that image into network inputs.""" blobs = {} if entry is not None: # add pose to input blobs['data'], im_scale, blobs['im_info'], pose_blob, pose_line = \ blob_utils.get_image_pose_blob(im, target_scale, target_max_size, entry) if 'ATR' in cfg.TEST.DATASETS[0]: blobs['pose_pred_4'], blobs['pose_pred_8'], blobs[ 'pose_pred_16'], blobs['pose_pred_32'] = _resize_pose_blob( pose_blob, 26) else: blobs['pose_pred_4'], blobs['pose_pred_8'], blobs[ 'pose_pred_16'], blobs['pose_pred_32'] = _resize_pose_blob( pose_blob, 26) blobs['pose_line_8'], blobs['pose_line_16'] = _resize_poseline_blob( pose_line) else: # no pose input blobs['data'], im_scale, blobs['im_info'] = \ blob_utils.get_image_blob(im, target_scale, target_max_size) if rois is not None: blobs['rois'] = _get_rois_blob(rois, im_scale) if pose_model is not None: blobs['pose_pred'] = np.asarray(pose_model.pred_pose_one_img(im), dtype=np.float32) return blobs, im_scale
def im_conv_body_only(model, im, target_scale, target_max_size): """Runs `model.conv_body_net` on the given image `im`.""" im_blob, im_scale, _im_info = blob_utils.get_image_blob( im, target_scale, target_max_size) workspace.FeedBlob(core.ScopedName('data'), im_blob) workspace.RunNet(model.conv_body_net.Proto().name) return im_scale
def im_extract_features(model, im, ffpn_levels, timers=None): """ Extracts high level features listed in levels from model for the image im. """ if timers is None: timers = defaultdict(Timer) timers['im_extract_features'].tic() # Get inputs to the caffe2 model blobs = {} blobs['data'], im_scale, blobs['im_info'] = \ blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE) for k, v in blobs.items(): workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False)) # Run the net forward workspace.RunNet(model.backbone.Proto().name) # Extract the features features = {} for feat in ffpn_levels: features[feat] = workspace.FetchBlob(core.ScopedName(feat)) im_info = blobs['im_info'] return features, im_info, im_scale, im.shape
def _get_blobs(im, rois, target_scale, target_max_size): """Convert an image and RoIs within that image into network inputs.""" blobs = {} blobs['data'], im_scale, blobs['im_info'] = \ blob_utils.get_image_blob(im, target_scale, target_max_size) if rois is not None: blobs['rois'] = _get_rois_blob(rois, im_scale) return blobs, im_scale
def im_conv_body_only(model, im, target_scale, target_max_size): """Runs `model.conv_body_net` on the given image `im`.""" im_blob, im_scale, _im_info = blob_utils.get_image_blob( im, target_scale, target_max_size ) workspace.FeedBlob(core.ScopedName('data'), im_blob) workspace.RunNet(model.conv_body_net.Proto().name) return im_scale
def im_proposals(model, im): """Generate RPN proposals on a single image.""" inputs = {} inputs['data'], im_scale, inputs['im_info'] = \ blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE) for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False)) workspace.RunNet(model.net.Proto().name) if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: k_max = cfg.FPN.RPN_MAX_LEVEL k_min = cfg.FPN.RPN_MIN_LEVEL rois_names = [ core.ScopedName('rpn_rois_fpn' + str(l)) for l in range(k_min, k_max + 1) ] score_names = [ core.ScopedName('rpn_roi_probs_fpn' + str(l)) for l in range(k_min, k_max + 1) ] blobs = workspace.FetchBlobs(rois_names + score_names) # Combine predictions across all levels and retain the top scoring boxes = np.concatenate(blobs[:len(rois_names)]) scores = np.concatenate(blobs[len(rois_names):]).squeeze() # Discussion: one could do NMS again after combining predictions from # the different FPN levels. Conceptually, it's probably the right thing # to do. For arbitrary reasons, the original FPN RPN implementation did # not do another round of NMS. inds = np.argsort(-scores)[:cfg.TEST.RPN_POST_NMS_TOP_N] scores = scores[inds] boxes = boxes[inds, :] else: boxes, scores = workspace.FetchBlobs( [core.ScopedName('rpn_rois'), core.ScopedName('rpn_roi_probs')]) scores = scores.squeeze() # Column 0 is the batch index in the (batch ind, x1, y1, x2, y2) encoding, # so we remove it since we just want to return boxes # Scale proposals back to the original input image scale boxes = boxes[:, 1:] / im_scale return boxes, scores
def im_proposals(model, im): """Generate RPN proposals on a single image.""" inputs = {} inputs['data'], im_scale, inputs['im_info'] = \ blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE) for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False)) workspace.RunNet(model.net.Proto().name) if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: k_max = cfg.FPN.RPN_MAX_LEVEL k_min = cfg.FPN.RPN_MIN_LEVEL rois_names = [ core.ScopedName('rpn_rois_fpn' + str(l)) for l in range(k_min, k_max + 1) ] score_names = [ core.ScopedName('rpn_roi_probs_fpn' + str(l)) for l in range(k_min, k_max + 1) ] blobs = workspace.FetchBlobs(rois_names + score_names) # Combine predictions across all levels and retain the top scoring boxes = np.concatenate(blobs[:len(rois_names)]) scores = np.concatenate(blobs[len(rois_names):]).squeeze() # Discussion: one could do NMS again after combining predictions from # the different FPN levels. Conceptually, it's probably the right thing # to do. For arbitrary reasons, the original FPN RPN implementation did # not do another round of NMS. inds = np.argsort(-scores)[:cfg.TEST.RPN_POST_NMS_TOP_N] scores = scores[inds] boxes = boxes[inds, :] else: boxes, scores = workspace.FetchBlobs( [core.ScopedName('rpn_rois'), core.ScopedName('rpn_roi_probs')] ) scores = scores.squeeze() # Column 0 is the batch index in the (batch ind, x1, y1, x2, y2) encoding, # so we remove it since we just want to return boxes # Scale proposals back to the original input image scale boxes = boxes[:, 1:] / im_scale return boxes, scores
def im_conv_body_only(model, im, target_scale, target_max_size): """Runs `model.conv_body_net` on the given image `im`.""" im_blob, im_scale, _ = blob_utils.get_image_blob(im, target_scale, target_max_size) workspace.FeedBlob(core.ScopedName('data'), im_blob) if os.environ.get('INT8INFO') == "1": algorithm = AbsmaxCalib() kind = os.environ.get('INT8CALIB') if kind == "moving_average": ema_alpha = 0.5 algorithm = EMACalib(ema_alpha) elif kind == "kl_divergence": kl_iter_num_for_range = int(os.environ.get('INT8KLNUM')) if not kl_iter_num_for_range: kl_iter_num_for_range = 100 algorithm = KLCalib(kl_iter_num_for_range) calib = Calibrator(algorithm) calib.RunCalibIter(workspace, model.conv_body_net.Proto()) else: workspace.RunNet(model.conv_body_net.Proto().name) return im_scale
def im_detect_bbox(model, im, timers=None): """Generate RetinaNet detections on a single image.""" if timers is None: timers = defaultdict(Timer) # Although anchors are input independent and could be precomputed, # recomputing them per image only brings a small overhead anchors = _create_cell_anchors() timers['im_detect_bbox'].tic() k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS) inputs = {} inputs['data'], im_scale, inputs['im_info'] = \ blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE) cls_probs, box_preds = [], [] for lvl in range(k_min, k_max + 1): suffix = 'fpn{}'.format(lvl) cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix))) box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix))) for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False)) workspace.RunNet(model.net.Proto().name) cls_probs = workspace.FetchBlobs(cls_probs) box_preds = workspace.FetchBlobs(box_preds) # here the boxes_all are [x0, y0, x1, y1, score] boxes_all = defaultdict(list) cnt = 0 for lvl in range(k_min, k_max + 1): # create cell anchors array stride = 2.**lvl cell_anchors = anchors[lvl] # fetch per level probability cls_prob = cls_probs[cnt] box_pred = box_preds[cnt] cls_prob = cls_prob.reshape( (cls_prob.shape[0], A, int(cls_prob.shape[1] / A), cls_prob.shape[2], cls_prob.shape[3])) box_pred = box_pred.reshape( (box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3])) cnt += 1 if cfg.RETINANET.SOFTMAX: cls_prob = cls_prob[:, :, 1::, :, :] cls_prob_ravel = cls_prob.ravel() # In some cases [especially for very small img sizes], it's possible that # candidate_ind is empty if we impose threshold 0.05 at all levels. This # will lead to errors since no detections are found for this image. Hence, # for lvl 7 which has small spatial resolution, we take the threshold 0.0 th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0 candidate_inds = np.where(cls_prob_ravel > th)[0] if (len(candidate_inds) == 0): continue pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds)) inds = np.argpartition(cls_prob_ravel[candidate_inds], -pre_nms_topn)[-pre_nms_topn:] inds = candidate_inds[inds] inds_5d = np.array(np.unravel_index(inds, cls_prob.shape)).transpose() classes = inds_5d[:, 2] anchor_ids, y, x = inds_5d[:, 1], inds_5d[:, 3], inds_5d[:, 4] scores = cls_prob[:, anchor_ids, classes, y, x] boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32) boxes *= stride boxes += cell_anchors[anchor_ids, :] if not cfg.RETINANET.CLASS_SPECIFIC_BBOX: box_deltas = box_pred[0, anchor_ids, :, y, x] else: box_cls_inds = classes * 4 box_deltas = np.vstack([ box_pred[0, ind:ind + 4, yi, xi] for ind, yi, xi in zip(box_cls_inds, y, x) ]) pred_boxes = (box_utils.bbox_transform(boxes, box_deltas) if cfg.TEST.BBOX_REG else boxes) pred_boxes /= im_scale pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape) box_scores = np.zeros((pred_boxes.shape[0], 5)) box_scores[:, 0:4] = pred_boxes box_scores[:, 4] = scores for cls in range(1, cfg.MODEL.NUM_CLASSES): inds = np.where(classes == cls - 1)[0] if len(inds) > 0: boxes_all[cls].extend(box_scores[inds, :]) timers['im_detect_bbox'].toc() # Combine predictions across all levels and retain the top scoring by class timers['misc_bbox'].tic() detections = [] for cls, boxes in boxes_all.items(): cls_dets = np.vstack(boxes).astype(dtype=np.float32) # do class specific nms here keep = box_utils.nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] out = np.zeros((len(keep), 6)) out[:, 0:5] = cls_dets out[:, 5].fill(cls) detections.append(out) # detections (N, 6) format: # detections[:, :4] - boxes # detections[:, 4] - scores # detections[:, 5] - classes detections = np.vstack(detections) # sort all again inds = np.argsort(-detections[:, 4]) detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :] # Convert the detections to image cls_ format (see core/test_engine.py) num_classes = cfg.MODEL.NUM_CLASSES cls_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)] for c in range(1, num_classes): inds = np.where(detections[:, 5] == c)[0] cls_boxes[c] = detections[inds, :5] timers['misc_bbox'].toc() return cls_boxes
def im_detect_bbox(model, im, timers=None): """Generate RetinaNet detections on a single image.""" if timers is None: timers = defaultdict(Timer) # Although anchors are input independent and could be precomputed, # recomputing them per image only brings a small overhead anchors = _create_cell_anchors() timers['im_detect_bbox'].tic() k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS) inputs = {} inputs['data'], im_scale, inputs['im_info'] = \ blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE) cls_probs, box_preds = [], [] for lvl in range(k_min, k_max + 1): suffix = 'fpn{}'.format(lvl) cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix))) box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix))) for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False)) workspace.RunNet(model.net.Proto().name) cls_probs = workspace.FetchBlobs(cls_probs) box_preds = workspace.FetchBlobs(box_preds) # here the boxes_all are [x0, y0, x1, y1, score] boxes_all = defaultdict(list) cnt = 0 for lvl in range(k_min, k_max + 1): # create cell anchors array stride = 2. ** lvl cell_anchors = anchors[lvl] # fetch per level probability cls_prob = cls_probs[cnt] box_pred = box_preds[cnt] cls_prob = cls_prob.reshape(( cls_prob.shape[0], A, int(cls_prob.shape[1] / A), cls_prob.shape[2], cls_prob.shape[3])) box_pred = box_pred.reshape(( box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3])) cnt += 1 if cfg.RETINANET.SOFTMAX: cls_prob = cls_prob[:, :, 1::, :, :] cls_prob_ravel = cls_prob.ravel() # In some cases [especially for very small img sizes], it's possible that # candidate_ind is empty if we impose threshold 0.05 at all levels. This # will lead to errors since no detections are found for this image. Hence, # for lvl 7 which has small spatial resolution, we take the threshold 0.0 th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0 candidate_inds = np.where(cls_prob_ravel > th)[0] if (len(candidate_inds) == 0): continue pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds)) inds = np.argpartition( cls_prob_ravel[candidate_inds], -pre_nms_topn)[-pre_nms_topn:] inds = candidate_inds[inds] inds_5d = np.array(np.unravel_index(inds, cls_prob.shape)).transpose() classes = inds_5d[:, 2] anchor_ids, y, x = inds_5d[:, 1], inds_5d[:, 3], inds_5d[:, 4] scores = cls_prob[:, anchor_ids, classes, y, x] boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32) boxes *= stride boxes += cell_anchors[anchor_ids, :] if not cfg.RETINANET.CLASS_SPECIFIC_BBOX: box_deltas = box_pred[0, anchor_ids, :, y, x] else: box_cls_inds = classes * 4 box_deltas = np.vstack( [box_pred[0, ind:ind + 4, yi, xi] for ind, yi, xi in zip(box_cls_inds, y, x)] ) pred_boxes = ( box_utils.bbox_transform(boxes, box_deltas) if cfg.TEST.BBOX_REG else boxes) pred_boxes /= im_scale pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape) box_scores = np.zeros((pred_boxes.shape[0], 5)) box_scores[:, 0:4] = pred_boxes box_scores[:, 4] = scores for cls in range(1, cfg.MODEL.NUM_CLASSES): inds = np.where(classes == cls - 1)[0] if len(inds) > 0: boxes_all[cls].extend(box_scores[inds, :]) timers['im_detect_bbox'].toc() # Combine predictions across all levels and retain the top scoring by class timers['misc_bbox'].tic() detections = [] for cls, boxes in boxes_all.items(): cls_dets = np.vstack(boxes).astype(dtype=np.float32) # do class specific nms here keep = box_utils.nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] out = np.zeros((len(keep), 6)) out[:, 0:5] = cls_dets out[:, 5].fill(cls) detections.append(out) # detections (N, 6) format: # detections[:, :4] - boxes # detections[:, 4] - scores # detections[:, 5] - classes detections = np.vstack(detections) # sort all again inds = np.argsort(-detections[:, 4]) detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :] # Convert the detections to image cls_ format (see core/test_engine.py) num_classes = cfg.MODEL.NUM_CLASSES cls_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)] for c in range(1, num_classes): inds = np.where(detections[:, 5] == c)[0] cls_boxes[c] = detections[inds, :5] timers['misc_bbox'].toc() return cls_boxes
def im_detect_bbox(model, im, timers=None, model1=None): """Generate RetinaNet detections on a single image.""" if timers is None: timers = defaultdict(Timer) if model1 is None and os.environ.get('COSIM'): print("cosim must has model1") fp32_ws_name = "__fp32_ws__" int8_ws_name = "__int8_ws__" # Although anchors are input independent and could be precomputed, # recomputing them per image only brings a small overhead anchors = _create_cell_anchors() timers['im_detect_bbox'].tic() timers['data1'].tic() k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS) inputs = {} inputs['data'], im_scale, inputs['im_info'] = \ blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, cfg.TEST.SIZEFIX) cls_probs, box_preds = [], [] for lvl in range(k_min, k_max + 1): suffix = 'fpn{}'.format(lvl) cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix))) box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix))) for k, v in inputs.items(): if os.environ.get('COSIM'): workspace.SwitchWorkspace(int8_ws_name, True) workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False)) if os.environ.get('COSIM'): workspace.SwitchWorkspace(fp32_ws_name, True) workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False)) timers['data1'].toc() if os.environ.get('EPOCH2OLD') == "1": workspace.RunNet(model.net.Proto().name) timers['run'].tic() if os.environ.get('INT8INFO') == "1": algorithm = AbsmaxCalib() kind = os.environ.get('INT8CALIB') if kind == "moving_average": ema_alpha = 0.5 algorithm = EMACalib(ema_alpha) elif kind == "kl_divergence": kl_iter_num_for_range = os.environ.get('INT8KLNUM') if not kl_iter_num_for_range: kl_iter_num_for_range = 100 kl_iter_num_for_range = int(kl_iter_num_for_range) algorithm = KLCalib(kl_iter_num_for_range) calib = Calibrator(algorithm) calib.RunCalibIter(workspace, model.net.Proto()) else: if os.environ.get('COSIM'): with open("int8.txt", "wb") as p: p.write(str(model.net.Proto())) with open("fp32.txt", "wb") as p: p.write(str(model1.net.Proto())) for i in range(len(model.net.Proto().op)): workspace.SwitchWorkspace(int8_ws_name) int8_inputs = [] for inp in model.net.Proto().op[i].input: int8_inputs.append(workspace.FetchBlob(str(inp))) logging.warning(" opint8[{0}] is {1}".format( i, model.net.Proto().op[i])) workspace.RunOperatorOnce(model.net.Proto().op[i]) int8_results = [] for res in model.net.Proto().op[i].output: int8_results.append(workspace.FetchBlob(str(res))) workspace.SwitchWorkspace(fp32_ws_name) fp32_inputs = [] for inp1 in model1.net.Proto().op[i].input: fp32_inputs.append(workspace.FetchBlob(str(inp1))) logging.warning(" opfp32[{0}] is {1}".format( i, model1.net.Proto().op[i])) workspace.RunOperatorOnce(model1.net.Proto().op[i]) fp32_results = [] for res1 in model1.net.Proto().op[i].output: fp32_results.append(workspace.FetchBlob(str(res1))) if len(int8_inputs) != len(fp32_inputs): logging.error("Wrong number of inputs") return if len(int8_results) != len(fp32_results): logging.error("Wrong number of outputs") return logging.warning("begin to check op[{}] {} input".format( i, model.net.Proto().op[i].type)) for k in range(len(int8_inputs)): if model.net.Proto().op[i].input[k][0] == '_': continue #assert_allclose(int8_inputs[k], fp32_inputs[k], **tol) logging.warning("pass checking op[{0}] {1} input".format( i, model.net.Proto().op[i].type)) logging.warning("begin to check op[{0}] {1} output".format( i, model.net.Proto().op[i].type)) for j, int8_result in enumerate(int8_results): if model.net.Proto().op[i].output[j][0] == '_': continue #logging.warning("int8_outputis {} and fp32 output is {} ".format(int8_results[j], fp32_results[j])) #if not compare_utils.assert_allclose(int8_results[j], fp32_results[j], **tol): if not compare_utils.assert_compare( int8_result, fp32_results[j], 1e-01, os.environ.get('COSIM')): for k, int8_input in enumerate(int8_inputs): logging.warning("int8_input[{}] is {}".format( k, int8_input)) logging.warning("fp32_input[{}] is {}".format( k, fp32_inputs[k])) logging.warning("pass checking op[{0}] {1} output".format( i, model.net.Proto().op[i].type)) else: workspace.RunNet(model.net.Proto().name) timers['run'].toc() cls_probs = workspace.FetchBlobs(cls_probs) box_preds = workspace.FetchBlobs(box_preds) # here the boxes_all are [x0, y0, x1, y1, score] boxes_all = defaultdict(list) batch_size = cls_probs[0].shape[0] boxes_all_list = [boxes_all] * batch_size cnt = 0 for lvl in range(k_min, k_max + 1): # create cell anchors array stride = 2.**lvl cell_anchors = anchors[lvl] # fetch per level probability cls_prob = cls_probs[cnt] box_pred = box_preds[cnt] cls_prob = cls_prob.reshape( (cls_prob.shape[0], A, int(cls_prob.shape[1] / A), cls_prob.shape[2], cls_prob.shape[3])) box_pred = box_pred.reshape( (box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3])) cnt += 1 if cfg.RETINANET.SOFTMAX: cls_prob = cls_prob[:, :, 1::, :, :] for i in range(batch_size): cls_prob_ravel = cls_prob[i, :].ravel() # In some cases [especially for very small img sizes], it's possible that # candidate_ind is empty if we impose threshold 0.05 at all levels. This # will lead to errors since no detections are found for this image. Hence, # for lvl 7 which has small spatial resolution, we take the threshold 0.0 th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0 candidate_inds = np.where(cls_prob_ravel > th)[0] if (len(candidate_inds) == 0): continue pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds)) inds = np.argpartition(cls_prob_ravel[candidate_inds], -pre_nms_topn)[-pre_nms_topn:] inds = candidate_inds[inds] inds_4d = np.array(np.unravel_index( inds, (cls_prob[i, :]).shape)).transpose() classes = inds_4d[:, 1] anchor_ids, y, x = inds_4d[:, 0], inds_4d[:, 2], inds_4d[:, 3] scores = cls_prob[i, anchor_ids, classes, y, x] boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32) boxes *= stride boxes += cell_anchors[anchor_ids, :] if not cfg.RETINANET.CLASS_SPECIFIC_BBOX: box_deltas = box_pred[i, anchor_ids, :, y, x] else: box_cls_inds = classes * 4 box_deltas = np.vstack([ box_pred[i, ind:ind + 4, yi, xi] for ind, yi, xi in zip(box_cls_inds, y, x) ]) pred_boxes = (box_utils.bbox_transform(boxes, box_deltas) if cfg.TEST.BBOX_REG else boxes) pred_boxes /= im_scale pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im[0].shape) box_scores = np.zeros((pred_boxes.shape[0], 5)) box_scores[:, 0:4] = pred_boxes box_scores[:, 4] = scores for cls in range(1, cfg.MODEL.NUM_CLASSES): inds = np.where(classes == cls - 1)[0] if len(inds) > 0: boxes_all_list[i][cls].extend(box_scores[inds, :]) timers['im_detect_bbox'].toc() cls_boxes_list = [] for i in range(batch_size): boxes_all = boxes_all_list[i] # Combine predictions across all levels and retain the top scoring by class timers['misc_bbox'].tic() detections = [] for cls, boxes in boxes_all.items(): cls_dets = np.vstack(boxes).astype(dtype=np.float32) # do class specific nms here keep = box_utils.nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] out = np.zeros((len(keep), 6)) out[:, 0:5] = cls_dets out[:, 5].fill(cls) detections.append(out) # detections (N, 6) format: # detections[:, :4] - boxes # detections[:, 4] - scores # detections[:, 5] - classes detections = np.vstack(detections) # sort all again inds = np.argsort(-detections[:, 4]) detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :] # Convert the detections to image cls_ format (see core/test_engine.py) num_classes = cfg.MODEL.NUM_CLASSES cls_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)] for c in range(1, num_classes): inds = np.where(detections[:, 5] == c)[0] cls_boxes[c] = detections[inds, :5] cls_boxes_list.append(cls_boxes) timers['misc_bbox'].toc() return cls_boxes_list