def forward(self, inputs, outputs): """See modeling.detector.DecodeBBoxes for inputs/outputs documentation. """ bbox_deltas = inputs[0].data assert cfg.MODEL.CLS_AGNOSTIC_BBOX_REG assert bbox_deltas.shape[1] == 8 bbox_deltas = bbox_deltas[:, -4:] bbox_data = inputs[1].data assert bbox_data.shape[1] == 5 batch_inds = bbox_data[:, :1] bbox_prior = bbox_data[:, 1:] # Transform bbox priors into proposals via bbox transformations bbox_decode = box_utils.bbox_transform(bbox_prior, bbox_deltas, self._bbox_reg_weights) # remove mal-boxes with non-positive width or height and ground # truth boxes during training if len(inputs) > 2: mapped_gt_boxes = inputs[2].data max_overlap = mapped_gt_boxes[:, 4] keep = _filter_boxes(bbox_decode, max_overlap) bbox_decode = bbox_decode[keep, :] batch_inds = batch_inds[keep, :] bbox_decode = np.hstack((batch_inds, bbox_decode)) outputs[0].reshape(bbox_decode.shape) outputs[0].data[...] = bbox_decode
def test_bbox_dataset_to_prediction_roundtrip(self): """Simulate the process of reading a ground-truth box from a dataset, make predictions from proposals, convert the predictions back to the dataset format, and then use the COCO API to compute IoU overlap between the gt box and the predictions. These should have IoU of 1. """ weights = (5, 5, 10, 10) # 1/ "read" a box from a dataset in the default (x1, y1, w, h) format gt_xywh_box = [10, 20, 100, 150] # 2/ convert it to our internal (x1, y1, x2, y2) format gt_xyxy_box = box_utils.xywh_to_xyxy(gt_xywh_box) # 3/ consider nearby proposal boxes prop_xyxy_boxes = random_boxes(gt_xyxy_box, 10, 10) # 4/ compute proposal-to-gt transformation deltas deltas = box_utils.bbox_transform_inv( prop_xyxy_boxes, np.array([gt_xyxy_box]), weights=weights ) # 5/ use deltas to transform proposals to xyxy predicted box pred_xyxy_boxes = box_utils.bbox_transform( prop_xyxy_boxes, deltas, weights=weights ) # 6/ convert xyxy predicted box to xywh predicted box pred_xywh_boxes = box_utils.xyxy_to_xywh(pred_xyxy_boxes) # 7/ use COCO API to compute IoU not_crowd = [int(False)] * pred_xywh_boxes.shape[0] ious = COCOmask.iou(pred_xywh_boxes, np.array([gt_xywh_box]), not_crowd) np.testing.assert_array_almost_equal(ious, np.ones(ious.shape))
def forward(self, inputs, outputs): """See modeling.detector.AddBBoxAccuracy for inputs/outputs documentation. """ # predicted bbox deltas bbox_deltas = inputs[0].data # proposals bbox_data = inputs[1].data assert bbox_data.shape[1] == 5 bbox_prior = bbox_data[:, 1:] # labels labels = inputs[2].data # mapped gt boxes mapped_gt_boxes = inputs[3].data gt_boxes = mapped_gt_boxes[:, :4] max_overlap = mapped_gt_boxes[:, 4] # bbox iou only for fg and non-gt boxes keep_inds = np.where((labels > 0) & (max_overlap < 1.0))[0] num_boxes = keep_inds.size bbox_deltas = bbox_deltas[keep_inds, :] bbox_prior = bbox_prior[keep_inds, :] labels = labels[keep_inds] gt_boxes = gt_boxes[keep_inds, :] max_overlap = max_overlap[keep_inds] if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG or num_boxes == 0: bbox_deltas = bbox_deltas[:, -4:] else: bbox_deltas = np.vstack([ bbox_deltas[i, labels[i] * 4:labels[i] * 4 + 4] for i in range(num_boxes) ]) pred_boxes = box_utils.bbox_transform(bbox_prior, bbox_deltas, self._bbox_reg_weights) avg_iou = 0. pre_avg_iou = sum(max_overlap) for i in range(num_boxes): gt_box = gt_boxes[i, :] pred_box = pred_boxes[i, :] tmp_iou = box_utils.bbox_overlaps( gt_box[np.newaxis, :].astype(dtype=np.float32, copy=False), pred_box[np.newaxis, :].astype(dtype=np.float32, copy=False), ) avg_iou += tmp_iou[0] if num_boxes > 0: avg_iou /= num_boxes pre_avg_iou /= num_boxes outputs[0].reshape([1]) outputs[0].data[...] = avg_iou outputs[1].reshape([1]) outputs[1].data[...] = pre_avg_iou
def test_bbox_transform_and_inverse(self): weights = (5, 5, 10, 10) src_boxes = random_boxes([10, 10, 20, 20], 1, 10) dst_boxes = random_boxes([10, 10, 20, 20], 1, 10) deltas = box_utils.bbox_transform_inv( src_boxes, dst_boxes, weights=weights ) dst_boxes_reconstructed = box_utils.bbox_transform( src_boxes, deltas, weights=weights ) np.testing.assert_array_almost_equal( dst_boxes, dst_boxes_reconstructed, decimal=5 )
def im_detect_bbox(workspace, predict_net, im, target_scale, target_max_size, boxes=None): """Bounding box object detection for an image with given box proposals. Arguments: workspace: the caffe2 workspace to use predict_net: the prediction network im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals in 0-indexed [x1, y1, x2, y2] format, or None if using RPN Returns: scores (ndarray): R x K array of object class scores for K classes (K includes background as object category 0) boxes (ndarray): R x 4*K array of predicted bounding boxes im_scales (list): list of image scales used in the input blob (as returned by _get_blobs and for use with im_detect_mask, etc.) """ inputs, im_scale = _get_blobs(im, boxes, target_scale, target_max_size) for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v, get_device_option_cpu()) workspace.RunNet(predict_net.name) # Read out blobs #rois = workspace.FetchBlob(core.ScopedName('rois')) rois = workspace.FetchBlob(core.ScopedName('rpn_rois')) # unscale back to raw image space boxes = rois[:, 1:5] / im_scale # Softmax class probabilities scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze() # In case there is 1 proposal scores = scores.reshape([-1, scores.shape[-1]]) # Apply bounding-box regression deltas box_deltas = workspace.FetchBlob(core.ScopedName('bbox_pred')).squeeze() # In case there is 1 proposal box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]]) pred_boxes = box_utils.bbox_transform( boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS ) pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape) if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: pred_boxes = np.tile(pred_boxes, (1, scores.shape[1])) return scores, pred_boxes, im_scale
def forward(self, inputs, outputs): # The inputs contains [bbox_pred, cls_prob, rois] # rois --> np.array((num,5)), (batch_idx, x1, y2, x2, y2) # print('++++++++++++++++++++++ Decode BBox of rcnn stage {} +++++++++++++++++++++++'.format(self._stage_num)) cls_prob = inputs[0].data[...] bbox_pred = inputs[1].data[...] rois = inputs[2].data[...] if self._train: overlaps = inputs[3].data[...] im_info = inputs[4].data else: im_info = inputs[3].data proposals_next = rois[:, 1:5] # Use delta with max cls_score as deltas adding to rois if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: delta = bbox_pred[:, 4:bbox_pred.shape[1]] else: cls_idx = cls_prob.argmax(axis=1) delta = np.zeros((bbox_pred.shape[0], 4), dtype=bbox_pred.dtype) for i in range(cls_idx.shape[0]): delta[i, :] = bbox_pred[i, cls_idx[i] * 4:cls_idx[i] * 4 + 4] # Add bbox deltas onto rois to generate new rois if self._stage_num == 1: bbox_reg_weights = cfg.CASCADERCNN.BBOX_REG_WEIGHTS_STAGE1 elif self._stage_num == 2: bbox_reg_weights = cfg.CASCADERCNN.BBOX_REG_WEIGHTS_STAGE2 new_rois = box_utils.bbox_transform(proposals_next, delta, bbox_reg_weights) batch_idxs = rois[:, 0].reshape(rois.shape[0], 1) new_rois = np.hstack((batch_idxs, new_rois)) # remove invalid boxes output_rois = remove_invalid_boxes(new_rois) if self._train: # screen out high IOU boxes, to remove redundant gt boxes output_rois = remove_high_iou_boxes(output_rois, overlaps) else: output_rois = output_rois # clip tiled boxes into image output_rois = clip_tiled_bboxes(output_rois, im_info[0, :2]) blob_utils.py_op_copy_blob(output_rois, outputs[0])
def forward(self, inputs, outputs): """See modeling.detector.DecodeBBoxes for inputs/outputs documentation. """ ###输入是上一阶段的回归参数,以及proposals ###bbox_deltas的shape为(num of proposals, 8) bbox_deltas = inputs[0].data ####必须要是这种类型: CLS_AGNOSTIC_BBOX_REG assert cfg.MODEL.CLS_AGNOSTIC_BBOX_REG assert bbox_deltas.shape[1] == 8 bbox_deltas = bbox_deltas[:, -4:] bbox_data = inputs[1].data ###bbox_data的shape为(num of proposals, 5) assert bbox_data.shape[1] == 5 batch_inds = bbox_data[:, :1] bbox_prior = bbox_data[:, 1:] # Transform bbox priors into proposals via bbox transformations ### 将bbox priors--->bbox predictions或proposals(bbox proposals在cascade 中 # 即为输入给下一级的proposals) bbox_decode = box_utils.bbox_transform(bbox_prior, bbox_deltas, self._bbox_reg_weights) # remove mal-boxes with non-positive width or height and ground # truth boxes during training ###滤除具有负的宽/高的boxes if len(inputs) > 2: ###在训练阶段,inputs[2]为gt boxes, 推断时自然没有这一维, ### mapped_gt_boxes为某张图片(或某个batch??)的所有与proposals # 对应的gt boxes mapped_gt_boxes = inputs[2].data max_overlap = mapped_gt_boxes[:, 4] ###max_overlap什么作用??? keep = _filter_boxes(bbox_decode, max_overlap) ####keep保留所有满足要求的bboxes bbox_decode = bbox_decode[keep, :] batch_inds = batch_inds[keep, :] bbox_decode = np.hstack((batch_inds, bbox_decode)) ###outputs的shape为(1,) outputs[0].reshape(bbox_decode.shape) outputs[0].data[...] = bbox_decode
def im_detect_bbox(model, im, timers=None): """Generate RetinaNet detections on a single image.""" if timers is None: timers = defaultdict(Timer) # Although anchors are input independent and could be precomputed, # recomputing them per image only brings a small overhead anchors = _create_cell_anchors() timers['im_detect_bbox'].tic() k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS) inputs = {} inputs['data'], im_scale, inputs['im_info'] = \ blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE) cls_probs, box_preds = [], [] for lvl in range(k_min, k_max + 1): suffix = 'fpn{}'.format(lvl) cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix))) box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix))) for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False)) workspace.RunNet(model.net.Proto().name) cls_probs = workspace.FetchBlobs(cls_probs) box_preds = workspace.FetchBlobs(box_preds) # here the boxes_all are [x0, y0, x1, y1, score] boxes_all = defaultdict(list) cnt = 0 for lvl in range(k_min, k_max + 1): # create cell anchors array stride = 2.**lvl cell_anchors = anchors[lvl] # fetch per level probability cls_prob = cls_probs[cnt] box_pred = box_preds[cnt] cls_prob = cls_prob.reshape( (cls_prob.shape[0], A, int(cls_prob.shape[1] / A), cls_prob.shape[2], cls_prob.shape[3])) box_pred = box_pred.reshape( (box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3])) cnt += 1 if cfg.RETINANET.SOFTMAX: cls_prob = cls_prob[:, :, 1::, :, :] cls_prob_ravel = cls_prob.ravel() # In some cases [especially for very small img sizes], it's possible that # candidate_ind is empty if we impose threshold 0.05 at all levels. This # will lead to errors since no detections are found for this image. Hence, # for lvl 7 which has small spatial resolution, we take the threshold 0.0 th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0 candidate_inds = np.where(cls_prob_ravel > th)[0] if (len(candidate_inds) == 0): continue pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds)) inds = np.argpartition(cls_prob_ravel[candidate_inds], -pre_nms_topn)[-pre_nms_topn:] inds = candidate_inds[inds] inds_5d = np.array(np.unravel_index(inds, cls_prob.shape)).transpose() classes = inds_5d[:, 2] anchor_ids, y, x = inds_5d[:, 1], inds_5d[:, 3], inds_5d[:, 4] scores = cls_prob[:, anchor_ids, classes, y, x] boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32) boxes *= stride boxes += cell_anchors[anchor_ids, :] if not cfg.RETINANET.CLASS_SPECIFIC_BBOX: box_deltas = box_pred[0, anchor_ids, :, y, x] else: box_cls_inds = classes * 4 box_deltas = np.vstack([ box_pred[0, ind:ind + 4, yi, xi] for ind, yi, xi in zip(box_cls_inds, y, x) ]) pred_boxes = (box_utils.bbox_transform(boxes, box_deltas) if cfg.TEST.BBOX_REG else boxes) pred_boxes /= im_scale pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape) box_scores = np.zeros((pred_boxes.shape[0], 5)) box_scores[:, 0:4] = pred_boxes box_scores[:, 4] = scores for cls in range(1, cfg.MODEL.NUM_CLASSES): inds = np.where(classes == cls - 1)[0] if len(inds) > 0: boxes_all[cls].extend(box_scores[inds, :]) timers['im_detect_bbox'].toc() # Combine predictions across all levels and retain the top scoring by class timers['misc_bbox'].tic() detections = [] for cls, boxes in boxes_all.items(): cls_dets = np.vstack(boxes).astype(dtype=np.float32) # do class specific nms here keep = box_utils.nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] out = np.zeros((len(keep), 6)) out[:, 0:5] = cls_dets out[:, 5].fill(cls) detections.append(out) # detections (N, 6) format: # detections[:, :4] - boxes # detections[:, 4] - scores # detections[:, 5] - classes detections = np.vstack(detections) # sort all again inds = np.argsort(-detections[:, 4]) detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :] # Convert the detections to image cls_ format (see core/test_engine.py) num_classes = cfg.MODEL.NUM_CLASSES cls_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)] for c in range(1, num_classes): inds = np.where(detections[:, 5] == c)[0] cls_boxes[c] = detections[inds, :5] timers['misc_bbox'].toc() return cls_boxes
def proposals_for_one_image( self, im_info, all_anchors, bbox_deltas, scores ): # Get mode-dependent configuration cfg_key = 'TRAIN' if self._train else 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # - bbox deltas will be (4 * A, H, W) format from conv output # - transpose to (H, W, 4 * A) # - reshape to (H * W * A, 4) where rows are ordered by (H, W, A) # in slowest to fastest order to match the enumerated anchors bbox_deltas = bbox_deltas.transpose((1, 2, 0)).reshape((-1, 4)) # Same story for the scores: # - scores are (A, H, W) format from conv output # - transpose to (H, W, A) # - reshape to (H * W * A, 1) where rows are ordered by (H, W, A) # to match the order of anchors and bbox_deltas scores = scores.transpose((1, 2, 0)).reshape((-1, 1)) # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) if pre_nms_topN <= 0 or pre_nms_topN >= len(scores): order = np.argsort(-scores.squeeze()) else: # Avoid sorting possibly large arrays; First partition to get top K # unsorted and then sort just those (~20x faster for 200k scores) inds = np.argpartition( -scores.squeeze(), pre_nms_topN )[:pre_nms_topN] order = np.argsort(-scores[inds].squeeze()) order = inds[order] bbox_deltas = bbox_deltas[order, :] all_anchors = all_anchors[order, :] scores = scores[order] # Transform anchors into proposals via bbox transformations proposals = box_utils.bbox_transform( all_anchors, bbox_deltas, (1.0, 1.0, 1.0, 1.0)) # 2. clip proposals to image (may result in proposals with zero area # that will be removed in the next step) proposals = box_utils.clip_tiled_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < min_size keep = _filter_boxes(proposals, min_size, im_info) proposals = proposals[keep, :] scores = scores[keep] # 6. apply loose nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) if nms_thresh > 0: keep = box_utils.nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] return proposals, scores
def im_detect_bbox(model, im, target_scale, target_max_size, boxes=None): """Bounding box object detection for an image with given box proposals. Arguments: model (DetectionModelHelper): the detection model to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals in 0-indexed [x1, y1, x2, y2] format, or None if using RPN Returns: scores (ndarray): R x K array of object class scores for K classes (K includes background as object category 0) boxes (ndarray): R x 4*K array of predicted bounding boxes im_scales (list): list of image scales used in the input blob (as returned by _get_blobs and for use with im_detect_mask, etc.) """ inputs, im_scale = _get_blobs(im, boxes, target_scale, target_max_size) for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v) workspace.RunNet(model.net.Proto().name) rois = workspace.FetchBlob(core.ScopedName('rois')) split = workspace.FetchBlob(core.ScopedName('roi_numbers')) # Softmax class probabilities scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze() # In case there is 1 proposal scores = scores.reshape([-1, scores.shape[-1]]) if cfg.TEST.WHAT == 'coco': boxes = rois[:int(split[0]), 1:5] / im_scale scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze() box_deltas = workspace.FetchBlob( core.ScopedName('bbox_pred')).squeeze() scores = scores.reshape([-1, scores.shape[-1]]) box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]]) pred_boxes = box_utils.bbox_transform(boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS) pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape) elif cfg.TEST.WHAT == 'toothbrush': boxes = rois[int(split[0]):, 1:5] / im_scale scores = workspace.FetchBlob( core.ScopedName('cls_prob_toothbrush')).squeeze() box_deltas = workspace.FetchBlob( core.ScopedName('bbox_pred_toothbrush')).squeeze() scores = scores.reshape([-1, scores.shape[-1]]) box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]]) pred_boxes = box_utils.bbox_transform(boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS) pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape) elif cfg.TEST.WHAT == 'toothbrush_rpn': boxes = rois[int(split[0]):, 1:5] / im_scale box_deltas = workspace.FetchBlob( core.ScopedName('bbox_pred_toothbrush')).squeeze() box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]]) pred_boxes = box_utils.bbox_transform(boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS) pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape) scores = workspace.FetchBlob(core.ScopedName('roi_scores')) scores = scores[int(split[0]):] #for i in range(scores.shape[0]): # scores[i]=min(scores[i]+0.05,1.0) #print(min(scores)) scores = scores[:, np.newaxis] tmp = np.zeros(scores.shape, dtype=np.float32) scores = np.concatenate((tmp, scores), axis=1) return scores, pred_boxes, im_scale
def im_detect_bbox(model, im, target_scale, target_max_size, size_fix=None, timers=None, model1=None, boxes=None): """Bounding box object detection for an image with given box proposals. Arguments: model (DetectionModelHelper): the detection model to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals in 0-indexed [x1, y1, x2, y2] format, or None if using RPN Returns: scores (ndarray): R x K array of object class scores for K classes (K includes background as object category 0) boxes (ndarray): R x 4*K array of predicted bounding boxes im_scales (list): list of image scales used in the input blob (as returned by _get_blobs and for use with im_detect_mask, etc.) """ if timers is None: timers = defaultdict(Timer) if model1 is None and os.environ.get('COSIM'): print("cosim must has model1") fp32_ws_name = "__fp32_ws__" int8_ws_name = "__int8_ws__" timers['data1'].tic() inputs, im_scale = _get_blobs(im, boxes, target_scale, target_max_size, size_fix) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) inputs['rois'] = inputs['rois'][index, :] boxes = boxes[index, :] # Add multi-level rois for FPN if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN: _add_multilevel_rois_for_test(inputs, 'rois') for k, v in inputs.items(): if os.environ.get('COSIM'): workspace.SwitchWorkspace(int8_ws_name, True) workspace.FeedBlob(core.ScopedName(k), v) if os.environ.get('COSIM'): workspace.SwitchWorkspace(fp32_ws_name, True) workspace.FeedBlob(core.ScopedName(k), v) timers['data1'].toc() # run first time to warm up if os.environ.get('EPOCH2OLD') == "1": workspace.RunNet(model.net.Proto().name) timers['run'].tic() if os.environ.get('INT8INFO') == "1": algorithm = AbsmaxCalib() kind = os.environ.get('INT8CALIB') if kind == "moving_average": ema_alpha = 0.5 algorithm = EMACalib(ema_alpha) elif kind == "kl_divergence": kl_iter_num_for_range = os.environ.get('INT8KLNUM') if not kl_iter_num_for_range: kl_iter_num_for_range = 100 kl_iter_num_for_range = int(kl_iter_num_for_range) algorithm = KLCalib(kl_iter_num_for_range) calib = Calibrator(algorithm) calib.RunCalibIter(workspace, model.net.Proto()) else: if os.environ.get('COSIM'): cosim_alg = os.environ.get('COSIM') with open("int8.txt", "wb") as p: p.write(str(model.net.Proto())) with open("fp32.txt", "wb") as p: p.write(str(model1.net.Proto())) for i in range(len(model.net.Proto().op)): workspace.SwitchWorkspace(int8_ws_name) int8_inputs = [] for inp in model.net.Proto().op[i].input: int8_inputs.append(workspace.FetchBlob(str(inp))) logging.warning(" opint8[{0}] is {1}".format( i, model.net.Proto().op[i])) workspace.RunOperatorOnce(model.net.Proto().op[i]) int8_results = [] for res in model.net.Proto().op[i].output: int8_results.append(workspace.FetchBlob(str(res))) workspace.SwitchWorkspace(fp32_ws_name) fp32_inputs = [] for inp1 in model1.net.Proto().op[i].input: fp32_inputs.append(workspace.FetchBlob(str(inp1))) logging.warning(" opfp32[{0}] is {1}".format( i, model1.net.Proto().op[i])) workspace.RunOperatorOnce(model1.net.Proto().op[i]) fp32_results = [] for res1 in model1.net.Proto().op[i].output: fp32_results.append(workspace.FetchBlob(str(res1))) if len(int8_inputs) != len(fp32_inputs): logging.error("Wrong number of inputs") return if len(int8_results) != len(fp32_results): logging.error("Wrong number of outputs") return logging.warning("begin to check op[{}] {} input".format( i, model.net.Proto().op[i].type)) for k in range(len(int8_inputs)): if model.net.Proto().op[i].input[k][0] == '_': continue #assert_allclose(int8_inputs[k], fp32_inputs[k], **tol) logging.warning("pass checking op[{0}] {1} input".format( i, model.net.Proto().op[i].type)) logging.warning("begin to check op[{0}] {1} output".format( i, model.net.Proto().op[i].type)) for j, int8_result in enumerate(int8_results): if model.net.Proto().op[i].output[j][0] == '_': continue #logging.warning("int8_outputis {} and fp32 output is {} ".format(int8_results[j], fp32_results[j])) #if not compare_utils.assert_allclose(int8_results[j], fp32_results[j], **tol): if not compare_utils.assert_compare( int8_result, fp32_results[j], 1e-01, cosim_alg): for k, int8_input in enumerate(int8_inputs): logging.warning("int8_input[{}] is {}".format( k, int8_input)) logging.warning("fp32_input[{}] is {}".format( k, fp32_inputs[k])) #assert_allclose(int8_results[j], fp32_results[j], **tol) logging.warning("pass checking op[{0}] {1} output".format( i, model.net.Proto().op[i].type)) else: workspace.RunNet(model.net.Proto().name) timers['run'].toc() timers['result'].tic() # Read out blobs if cfg.MODEL.FASTER_RCNN: rois = workspace.FetchBlob(core.ScopedName('rois')) # unscale back to raw image space boxes = rois[:, 1:5] / im_scale batch_indices = rois[:, 0] # Softmax class probabilities scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze() # In case there is 1 proposal scores = scores.reshape([-1, scores.shape[-1]]) if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = workspace.FetchBlob( core.ScopedName('bbox_pred')).squeeze() # In case there is 1 proposal box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]]) if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: # Remove predictions for bg class (compat with MSRA code) box_deltas = box_deltas[:, -4:] pred_boxes = box_utils.bbox_transform(boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS) pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im[0].shape) if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: pred_boxes = np.tile(pred_boxes, (1, scores.shape[1])) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] timers['result'].toc() return scores, pred_boxes, im_scale, batch_indices
def im_detect_bbox_batch(model, ims, target_scale, target_max_size, boxes=None): """Bounding box object detection for an image with given box proposals. Arguments: model (DetectionModelHelper): the detection model to use ims (list): cfg.TEST.IMS_PER_BATCH color images to test (in BGR order) boxes (ndarray): R x 4 array of object proposals in 0-indexed [x1, y1, x2, y2] format, or None if using RPN Returns: scores (ndarray): R x K array of object class scores for K classes (K includes background as object category 0) boxes (ndarray): R x 4*K array of predicted bounding boxes im_scales (list): list of image scales used in the input blob (as returned by _get_blobs and for use with im_detect_mask, etc.) """ inputs, im_scales = _get_blobs_batch(ims, boxes, target_scale, target_max_size) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) inputs['rois'] = inputs['rois'][index, :] boxes = boxes[index, :] # Add multi-level rois for FPN if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN: _add_multilevel_rois_for_test(inputs, 'rois') for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v) workspace.RunNet(model.net.Proto().name) # Read out blobs if cfg.MODEL.FASTER_RCNN: rois = workspace.FetchBlob(core.ScopedName('rois')) ## # unscale back to raw image space ## boxes = rois[:, 1:5] / im_scale # Softmax class probabilities scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze() # # In case there is 1 proposal # scores = scores.reshape([-1, scores.shape[-1]]) if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = workspace.FetchBlob( core.ScopedName('bbox_pred')).squeeze() scores_batch = [] pred_boxes_batch = [] for i in range(len(ims)): # select batch select_inds = np.where(rois[:, 0] == i) # unscale back to raw image space boxes = rois[select_inds, 1:5] / im_scales[i] boxes = boxes.reshape([-1, boxes.shape[-1]]) scores_i = scores[select_inds, :] scores_i = scores_i.reshape([-1, scores_i.shape[-1]]) scores_batch.append(scores_i) if cfg.TEST.BBOX_REG: # In case there is 1 proposal box_deltas_i = box_deltas[select_inds, :] box_deltas_i = box_deltas_i.reshape([-1, box_deltas_i.shape[-1]]) if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: # Remove predictions for bg class (compat with MSRA code) box_deltas_i = box_deltas_i[:, -4:] pred_boxes = box_utils.bbox_transform(boxes, box_deltas_i, cfg.MODEL.BBOX_REG_WEIGHTS) pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, ims[i].shape) if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: pred_boxes = (np.tile(pred_boxes, (1, scores_i.shape[1]))) pred_boxes_batch.append(pred_boxes) else: logger.error('Not implemented.') return None, None, None if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN: raise NotImplementedError( 'Deduplication not implemented with batch inference, set TEST.IMS_PER_BATCH to 1' ) return scores_batch, pred_boxes_batch, im_scales
def im_detect_bbox_given_features(model, features, im_info, im_scales, im_shape): """Bounding box object detection for provided features with given box proposals. Arguments: model (DetectionModelHelper): the detection model to use features (dictionary of ndarray): high level features from which to run detection Returns: scores (ndarray): R x K array of object class scores for K classes (K includes background as object category 0) boxes (ndarray): R x 4*K array of predicted bounding boxes im_scales (list): list of image scales used in the input blob (as returned by _get_blobs and for use with im_detect_mask, etc.) """ # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. # Function simply adapted to use the input features and forward through the # head rather than input images through the entire network. if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) inputs['rois'] = inputs['rois'][index, :] boxes = boxes[index, :] # Add multi-level rois for FPN if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN: _add_multilevel_rois_for_test(inputs, 'rois') blobs = copy.copy(features) blobs['im_info'] = im_info for k, v in blobs.items(): workspace.FeedBlob(caffe2_core.ScopedName(k), v) workspace.RunNet(model.faster_rnn_head.Proto().name) # Read out blobs if cfg.MODEL.FASTER_RCNN: assert len(im_scales) == 1, \ 'Only single-image / single-scale batch implemented' rois = workspace.FetchBlob(caffe2_core.ScopedName('rois')) # unscale back to raw image space boxes = rois[:, 1:5] / im_scales[0] # use softmax estimated probabilities scores = workspace.FetchBlob(caffe2_core.ScopedName('cls_prob')).squeeze() # In case there is 1 proposal scores = scores.reshape([-1, scores.shape[-1]]) if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = workspace.FetchBlob( caffe2_core.ScopedName('bbox_pred')).squeeze() # In case there is 1 proposal box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]]) if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: # Remove predictions for bg class (compat with MSRA code) box_deltas = box_deltas[:, -4:] pred_boxes = box_utils.bbox_transform(boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS) pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im_shape) if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: pred_boxes = np.tile(pred_boxes, (1, scores.shape[1])) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] return scores, pred_boxes, im_scales
def forward(self, inputs, outputs): """See modeling.detector.AddBBoxAccuracy for inputs/outputs documentation. """ # predicted bbox deltas, shape为(R, C*4) bbox_deltas = inputs[0].data # proposals的坐标集合, shape为(R, 5) bbox_data = inputs[1].data assert bbox_data.shape[1] == 5 ### bbox_prior为所有的proposals坐标, shape为(R, 4) bbox_prior = bbox_data[:, 1:] # labels labels = inputs[2].data # mapped gt boxes mapped_gt_boxes = inputs[3].data gt_boxes = mapped_gt_boxes[:, :4] max_overlap = mapped_gt_boxes[:, 4] # bbox iou only for fg and non-gt boxes ###这里的labels指的是mapped_gt_bbox对应的labels吧??? ###同时一移除所有的gt boxes ###相当于对这些gt bbox或proposals进行筛选 keep_inds = np.where((labels > 0) & (max_overlap < 1.0))[0] ###所有符合要求的proposals个数 num_boxes = keep_inds.size bbox_deltas = bbox_deltas[keep_inds, :] bbox_prior = bbox_prior[keep_inds, :] labels = labels[keep_inds] gt_boxes = gt_boxes[keep_inds, :] max_overlap = max_overlap[keep_inds] ### 关于AGNOSTIC_BBOX_REG 这个什么意思我始终云里雾里 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG or num_boxes == 0: bbox_deltas = bbox_deltas[:, -4:] else: ### 将bbox_deltas的数据结构重新组织,即只保留bbox_deltas中 ### 每一组回归参数对应的类别和labels(对应的gt真值)类别相同的回归参数 ### 处理后的bbox_deltas的shape为(num_boxes, 4) bbox_deltas = np.vstack([ bbox_deltas[i, labels[i] * 4:labels[i] * 4 + 4] for i in range(num_boxes) ]) ### 通过bbox_transform函数将得到的proposals经过回归参数回归后 ### 得到预测框predicted_bboxes,注意_bbox_reg_weights pred_boxes = box_utils.bbox_transform(bbox_prior, bbox_deltas, self._bbox_reg_weights) #####平均iou初值为0 avg_iou = 0. pre_avg_iou = sum(max_overlap) for i in range(num_boxes): ###第i个gt_box(对应于第i个pred_bbox)的坐标值 gt_box = gt_boxes[i, :] ###第i个pred_box的坐标值 pred_box = pred_boxes[i, :] ###计算gt_box与pred_box之间的IOU tmp_iou = box_utils.bbox_overlaps( gt_box[np.newaxis, :].astype(dtype=np.float32, copy=False), pred_box[np.newaxis, :].astype(dtype=np.float32, copy=False), ) avg_iou += tmp_iou[0] if num_boxes > 0: avg_iou /= num_boxes pre_avg_iou /= num_boxes ### 即outputs【0】--->本stage的avg_iou ### outputs[1]----->上一个stage的avg_iou outputs[0].reshape([1]) outputs[0].data[...] = avg_iou outputs[1].reshape([1]) outputs[1].data[...] = pre_avg_iou
def get_target_class_weights(inputs,outputs): # 'rois', 'label_mask', 'im_info', 'is_source', 'cls_pred', 'bbox_pred' import numpy as np import detectron.utils.boxes as box_utils rois = inputs[0].data dc_mask = inputs[1].data.astype(bool) im_info = inputs[2].data is_source = inputs[3].data.astype(bool) cls_pred = inputs[4].data bbox_pred = inputs[5].data this_im_info = im_info[~is_source,:][0,:] im_shape = this_im_info[:2] im_scale = this_im_info[2] im_idx = int(this_im_info[3]) # im_info is extended with its index in the roidb. rois = rois[~dc_mask,:] boxes = rois[:, 1:5] / im_scale box_deltas = bbox_pred scores = cls_pred # if cfg.TEST.BBOX_REG: # # Apply bounding-box regression deltas # if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: # # Remove predictions for bg class (compat with MSRA code) # box_deltas = box_deltas[:, -4:] pred_boxes = box_utils.bbox_transform( boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS ) pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im_shape) # if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: # pred_boxes = np.tile(pred_boxes, (1, scores.shape[1])) # else: # # Simply repeat the boxes, once for each class # pred_boxes = np.tile(boxes, (1, scores.shape[1])) # if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN: # # Map scores and predictions back to the original set of boxes # scores = scores[inv_index, :] # pred_boxes = pred_boxes[inv_index, :] # > scores, boxes < num_classes = cfg.MODEL.NUM_CLASSES # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class sum_softmax = np.zeros((num_classes,)) for j in range(1, num_classes): inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0] scores_j = scores[inds, j] boxes_j = pred_boxes[inds, j * 4:(j + 1) * 4] dets_j = np.hstack((boxes_j, scores_j[:, np.newaxis])).astype( np.float32, copy=False ) # if cfg.TEST.SOFT_NMS.ENABLED: # nms_dets, _ = box_utils.soft_nms( # dets_j, # sigma=cfg.TEST.SOFT_NMS.SIGMA, # overlap_thresh=cfg.TEST.NMS, # score_thresh=0.0001, # method=cfg.TEST.SOFT_NMS.METHOD # ) # else: keep = box_utils.nms(dets_j, cfg.TEST.NMS) nms_dets = dets_j[keep, :] # # Refine the post-NMS boxes using bounding-box voting # if cfg.TEST.BBOX_VOTE.ENABLED: # nms_dets = box_utils.box_voting( # nms_dets, # dets_j, # cfg.TEST.BBOX_VOTE.VOTE_TH, # scoring_method=cfg.TEST.BBOX_VOTE.SCORING_METHOD # ) sum_softmax[j] = nms_dets[:,-1].sum() model.class_weight_db.update_class_weights(im_idx,sum_softmax)
def im_detect_bbox(model, im, target_scale, target_max_size, boxes=None): """Bounding box object detection for an image with given box proposals. Arguments: model (DetectionModelHelper): the detection model to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals in 0-indexed [x1, y1, x2, y2] format, or None if using RPN Returns: scores (ndarray): R x K array of object class scores for K classes (K includes background as object category 0) boxes (ndarray): R x 4*K array of predicted bounding boxes im_scales (list): list of image scales used in the input blob (as returned by _get_blobs and for use with im_detect_mask, etc.) """ inputs, im_scale = _get_blobs(im, boxes, target_scale, target_max_size) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique( hashes, return_index=True, return_inverse=True ) inputs['rois'] = inputs['rois'][index, :] boxes = boxes[index, :] # Add multi-level rois for FPN if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN: _add_multilevel_rois_for_test(inputs, 'rois') for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v) workspace.RunNet(model.net.Proto().name) # Read out blobs if cfg.MODEL.FASTER_RCNN: rois = workspace.FetchBlob(core.ScopedName('rois')) # unscale back to raw image space boxes = rois[:, 1:5] / im_scale # Softmax class probabilities scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze() # In case there is 1 proposal scores = scores.reshape([-1, scores.shape[-1]]) if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = workspace.FetchBlob(core.ScopedName('bbox_pred')).squeeze() # In case there is 1 proposal box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]]) if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: # Remove predictions for bg class (compat with MSRA code) box_deltas = box_deltas[:, -4:] pred_boxes = box_utils.bbox_transform( boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS ) pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape) if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: pred_boxes = np.tile(pred_boxes, (1, scores.shape[1])) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] return scores, pred_boxes, im_scale
def proposals_for_one_image(self, im_info, all_anchors, bbox_deltas, scores): # Get mode-dependent configuration cfg_key = 'TRAIN' if self._train else 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # - bbox deltas will be (4 * A, H, W) format from conv output # - transpose to (H, W, 4 * A) # - reshape to (H * W * A, 4) where rows are ordered by (H, W, A) # in slowest to fastest order to match the enumerated anchors bbox_deltas = bbox_deltas.transpose((1, 2, 0)).reshape((-1, 4)) # Same story for the scores: # - scores are (A, H, W) format from conv output # - transpose to (H, W, A) # - reshape to (H * W * A, 1) where rows are ordered by (H, W, A) # to match the order of anchors and bbox_deltas scores = scores.transpose((1, 2, 0)).reshape((-1, 1)) # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) if pre_nms_topN <= 0 or pre_nms_topN >= len(scores): order = np.argsort(-scores.squeeze()) else: # Avoid sorting possibly large arrays; First partition to get top K # unsorted and then sort just those (~20x faster for 200k scores) inds = np.argpartition(-scores.squeeze(), pre_nms_topN)[:pre_nms_topN] order = np.argsort(-scores[inds].squeeze()) order = inds[order] bbox_deltas = bbox_deltas[order, :] all_anchors = all_anchors[order, :] scores = scores[order] # Transform anchors into proposals via bbox transformations proposals = box_utils.bbox_transform(all_anchors, bbox_deltas, (1.0, 1.0, 1.0, 1.0)) # 2. clip proposals to image (may result in proposals with zero area # that will be removed in the next step) proposals = box_utils.clip_tiled_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < min_size keep = _filter_boxes(proposals, min_size, im_info) proposals = proposals[keep, :] scores = scores[keep] # 6. apply loose nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) if nms_thresh > 0: keep = box_utils.nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] return proposals, scores
def im_detect_bbox(model, im, timers=None): """Generate RetinaNet detections on a single image.""" if timers is None: timers = defaultdict(Timer) # Although anchors are input independent and could be precomputed, # recomputing them per image only brings a small overhead anchors = _create_cell_anchors() timers['im_detect_bbox'].tic() k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS) inputs = {} inputs['data'], im_scale, inputs['im_info'] = \ blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE) cls_probs, box_preds = [], [] for lvl in range(k_min, k_max + 1): suffix = 'fpn{}'.format(lvl) cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix))) box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix))) for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False)) workspace.RunNet(model.net.Proto().name) cls_probs = workspace.FetchBlobs(cls_probs) box_preds = workspace.FetchBlobs(box_preds) # here the boxes_all are [x0, y0, x1, y1, score] boxes_all = defaultdict(list) cnt = 0 for lvl in range(k_min, k_max + 1): # create cell anchors array stride = 2. ** lvl cell_anchors = anchors[lvl] # fetch per level probability cls_prob = cls_probs[cnt] box_pred = box_preds[cnt] cls_prob = cls_prob.reshape(( cls_prob.shape[0], A, int(cls_prob.shape[1] / A), cls_prob.shape[2], cls_prob.shape[3])) box_pred = box_pred.reshape(( box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3])) cnt += 1 if cfg.RETINANET.SOFTMAX: cls_prob = cls_prob[:, :, 1::, :, :] cls_prob_ravel = cls_prob.ravel() # In some cases [especially for very small img sizes], it's possible that # candidate_ind is empty if we impose threshold 0.05 at all levels. This # will lead to errors since no detections are found for this image. Hence, # for lvl 7 which has small spatial resolution, we take the threshold 0.0 th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0 candidate_inds = np.where(cls_prob_ravel > th)[0] if (len(candidate_inds) == 0): continue pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds)) inds = np.argpartition( cls_prob_ravel[candidate_inds], -pre_nms_topn)[-pre_nms_topn:] inds = candidate_inds[inds] inds_5d = np.array(np.unravel_index(inds, cls_prob.shape)).transpose() classes = inds_5d[:, 2] anchor_ids, y, x = inds_5d[:, 1], inds_5d[:, 3], inds_5d[:, 4] scores = cls_prob[:, anchor_ids, classes, y, x] boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32) boxes *= stride boxes += cell_anchors[anchor_ids, :] if not cfg.RETINANET.CLASS_SPECIFIC_BBOX: box_deltas = box_pred[0, anchor_ids, :, y, x] else: box_cls_inds = classes * 4 box_deltas = np.vstack( [box_pred[0, ind:ind + 4, yi, xi] for ind, yi, xi in zip(box_cls_inds, y, x)] ) pred_boxes = ( box_utils.bbox_transform(boxes, box_deltas) if cfg.TEST.BBOX_REG else boxes) pred_boxes /= im_scale pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape) box_scores = np.zeros((pred_boxes.shape[0], 5)) box_scores[:, 0:4] = pred_boxes box_scores[:, 4] = scores for cls in range(1, cfg.MODEL.NUM_CLASSES): inds = np.where(classes == cls - 1)[0] if len(inds) > 0: boxes_all[cls].extend(box_scores[inds, :]) timers['im_detect_bbox'].toc() # Combine predictions across all levels and retain the top scoring by class timers['misc_bbox'].tic() detections = [] for cls, boxes in boxes_all.items(): cls_dets = np.vstack(boxes).astype(dtype=np.float32) # do class specific nms here keep = box_utils.nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] out = np.zeros((len(keep), 6)) out[:, 0:5] = cls_dets out[:, 5].fill(cls) detections.append(out) # detections (N, 6) format: # detections[:, :4] - boxes # detections[:, 4] - scores # detections[:, 5] - classes detections = np.vstack(detections) # sort all again inds = np.argsort(-detections[:, 4]) detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :] # Convert the detections to image cls_ format (see core/test_engine.py) num_classes = cfg.MODEL.NUM_CLASSES cls_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)] for c in range(1, num_classes): inds = np.where(detections[:, 5] == c)[0] cls_boxes[c] = detections[inds, :5] timers['misc_bbox'].toc() return cls_boxes
def im_detect_bbox(model, im, target_scale, target_max_size, boxes=None): """Bounding box object detection for an image with given box proposals. Arguments: model (DetectionModelHelper): the detection model to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals in 0-indexed [x1, y1, x2, y2] format, or None if using RPN Returns: scores (ndarray): R x K array of object class scores for K classes (K includes background as object category 0) boxes (ndarray): R x 4*K array of predicted bounding boxes im_scales (list): list of image scales used in the input blob (as returned by _get_blobs and for use with im_detect_mask, etc.) """ inputs, im_scale = _get_blobs(im, boxes, target_scale, target_max_size) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique( hashes, return_index=True, return_inverse=True ) inputs['rois'] = inputs['rois'][index, :] boxes = boxes[index, :] # Add multi-level rois for FPN if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN: _add_multilevel_rois_for_test(inputs, 'rois') for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v) workspace.RunNet(model.net.Proto().name) # Read out blobs if cfg.MODEL.FASTER_RCNN: rois = workspace.FetchBlob(core.ScopedName('rois')) # unscale back to raw image space boxes = rois[:, 1:5] / im_scale # Softmax class probabilities scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze() # In case there is 1 proposal scores = scores.reshape([-1, scores.shape[-1]]) if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = workspace.FetchBlob(core.ScopedName('bbox_pred')).squeeze() # In case there is 1 proposal box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]]) if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: # Remove predictions for bg class (compat with MSRA code) box_deltas = box_deltas[:, -4:] pred_boxes = box_utils.bbox_transform( boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS ) pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape) if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: pred_boxes = np.tile(pred_boxes, (1, scores.shape[1])) #predict depths pred_depths = workspace.FetchBlob(core.ScopedName('depth_pred')).squeeze() # In case there is 1 proposal pred_depths = pred_depths.reshape([-1, pred_depths.shape[-1]]) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] return scores, pred_boxes, pred_depths, im_scale
def im_detect_bbox(model, im, timers=None, model1=None): """Generate RetinaNet detections on a single image.""" if timers is None: timers = defaultdict(Timer) if model1 is None and os.environ.get('COSIM'): print("cosim must has model1") fp32_ws_name = "__fp32_ws__" int8_ws_name = "__int8_ws__" # Although anchors are input independent and could be precomputed, # recomputing them per image only brings a small overhead anchors = _create_cell_anchors() timers['im_detect_bbox'].tic() timers['data1'].tic() k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS) inputs = {} inputs['data'], im_scale, inputs['im_info'] = \ blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, cfg.TEST.SIZEFIX) cls_probs, box_preds = [], [] for lvl in range(k_min, k_max + 1): suffix = 'fpn{}'.format(lvl) cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix))) box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix))) for k, v in inputs.items(): if os.environ.get('COSIM'): workspace.SwitchWorkspace(int8_ws_name, True) workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False)) if os.environ.get('COSIM'): workspace.SwitchWorkspace(fp32_ws_name, True) workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False)) timers['data1'].toc() if os.environ.get('EPOCH2OLD') == "1": workspace.RunNet(model.net.Proto().name) timers['run'].tic() if os.environ.get('INT8INFO') == "1": algorithm = AbsmaxCalib() kind = os.environ.get('INT8CALIB') if kind == "moving_average": ema_alpha = 0.5 algorithm = EMACalib(ema_alpha) elif kind == "kl_divergence": kl_iter_num_for_range = os.environ.get('INT8KLNUM') if not kl_iter_num_for_range: kl_iter_num_for_range = 100 kl_iter_num_for_range = int(kl_iter_num_for_range) algorithm = KLCalib(kl_iter_num_for_range) calib = Calibrator(algorithm) calib.RunCalibIter(workspace, model.net.Proto()) else: if os.environ.get('COSIM'): with open("int8.txt", "wb") as p: p.write(str(model.net.Proto())) with open("fp32.txt", "wb") as p: p.write(str(model1.net.Proto())) for i in range(len(model.net.Proto().op)): workspace.SwitchWorkspace(int8_ws_name) int8_inputs = [] for inp in model.net.Proto().op[i].input: int8_inputs.append(workspace.FetchBlob(str(inp))) logging.warning(" opint8[{0}] is {1}".format( i, model.net.Proto().op[i])) workspace.RunOperatorOnce(model.net.Proto().op[i]) int8_results = [] for res in model.net.Proto().op[i].output: int8_results.append(workspace.FetchBlob(str(res))) workspace.SwitchWorkspace(fp32_ws_name) fp32_inputs = [] for inp1 in model1.net.Proto().op[i].input: fp32_inputs.append(workspace.FetchBlob(str(inp1))) logging.warning(" opfp32[{0}] is {1}".format( i, model1.net.Proto().op[i])) workspace.RunOperatorOnce(model1.net.Proto().op[i]) fp32_results = [] for res1 in model1.net.Proto().op[i].output: fp32_results.append(workspace.FetchBlob(str(res1))) if len(int8_inputs) != len(fp32_inputs): logging.error("Wrong number of inputs") return if len(int8_results) != len(fp32_results): logging.error("Wrong number of outputs") return logging.warning("begin to check op[{}] {} input".format( i, model.net.Proto().op[i].type)) for k in range(len(int8_inputs)): if model.net.Proto().op[i].input[k][0] == '_': continue #assert_allclose(int8_inputs[k], fp32_inputs[k], **tol) logging.warning("pass checking op[{0}] {1} input".format( i, model.net.Proto().op[i].type)) logging.warning("begin to check op[{0}] {1} output".format( i, model.net.Proto().op[i].type)) for j, int8_result in enumerate(int8_results): if model.net.Proto().op[i].output[j][0] == '_': continue #logging.warning("int8_outputis {} and fp32 output is {} ".format(int8_results[j], fp32_results[j])) #if not compare_utils.assert_allclose(int8_results[j], fp32_results[j], **tol): if not compare_utils.assert_compare( int8_result, fp32_results[j], 1e-01, os.environ.get('COSIM')): for k, int8_input in enumerate(int8_inputs): logging.warning("int8_input[{}] is {}".format( k, int8_input)) logging.warning("fp32_input[{}] is {}".format( k, fp32_inputs[k])) logging.warning("pass checking op[{0}] {1} output".format( i, model.net.Proto().op[i].type)) else: workspace.RunNet(model.net.Proto().name) timers['run'].toc() cls_probs = workspace.FetchBlobs(cls_probs) box_preds = workspace.FetchBlobs(box_preds) # here the boxes_all are [x0, y0, x1, y1, score] boxes_all = defaultdict(list) batch_size = cls_probs[0].shape[0] boxes_all_list = [boxes_all] * batch_size cnt = 0 for lvl in range(k_min, k_max + 1): # create cell anchors array stride = 2.**lvl cell_anchors = anchors[lvl] # fetch per level probability cls_prob = cls_probs[cnt] box_pred = box_preds[cnt] cls_prob = cls_prob.reshape( (cls_prob.shape[0], A, int(cls_prob.shape[1] / A), cls_prob.shape[2], cls_prob.shape[3])) box_pred = box_pred.reshape( (box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3])) cnt += 1 if cfg.RETINANET.SOFTMAX: cls_prob = cls_prob[:, :, 1::, :, :] for i in range(batch_size): cls_prob_ravel = cls_prob[i, :].ravel() # In some cases [especially for very small img sizes], it's possible that # candidate_ind is empty if we impose threshold 0.05 at all levels. This # will lead to errors since no detections are found for this image. Hence, # for lvl 7 which has small spatial resolution, we take the threshold 0.0 th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0 candidate_inds = np.where(cls_prob_ravel > th)[0] if (len(candidate_inds) == 0): continue pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds)) inds = np.argpartition(cls_prob_ravel[candidate_inds], -pre_nms_topn)[-pre_nms_topn:] inds = candidate_inds[inds] inds_4d = np.array(np.unravel_index( inds, (cls_prob[i, :]).shape)).transpose() classes = inds_4d[:, 1] anchor_ids, y, x = inds_4d[:, 0], inds_4d[:, 2], inds_4d[:, 3] scores = cls_prob[i, anchor_ids, classes, y, x] boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32) boxes *= stride boxes += cell_anchors[anchor_ids, :] if not cfg.RETINANET.CLASS_SPECIFIC_BBOX: box_deltas = box_pred[i, anchor_ids, :, y, x] else: box_cls_inds = classes * 4 box_deltas = np.vstack([ box_pred[i, ind:ind + 4, yi, xi] for ind, yi, xi in zip(box_cls_inds, y, x) ]) pred_boxes = (box_utils.bbox_transform(boxes, box_deltas) if cfg.TEST.BBOX_REG else boxes) pred_boxes /= im_scale pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im[0].shape) box_scores = np.zeros((pred_boxes.shape[0], 5)) box_scores[:, 0:4] = pred_boxes box_scores[:, 4] = scores for cls in range(1, cfg.MODEL.NUM_CLASSES): inds = np.where(classes == cls - 1)[0] if len(inds) > 0: boxes_all_list[i][cls].extend(box_scores[inds, :]) timers['im_detect_bbox'].toc() cls_boxes_list = [] for i in range(batch_size): boxes_all = boxes_all_list[i] # Combine predictions across all levels and retain the top scoring by class timers['misc_bbox'].tic() detections = [] for cls, boxes in boxes_all.items(): cls_dets = np.vstack(boxes).astype(dtype=np.float32) # do class specific nms here keep = box_utils.nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] out = np.zeros((len(keep), 6)) out[:, 0:5] = cls_dets out[:, 5].fill(cls) detections.append(out) # detections (N, 6) format: # detections[:, :4] - boxes # detections[:, 4] - scores # detections[:, 5] - classes detections = np.vstack(detections) # sort all again inds = np.argsort(-detections[:, 4]) detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :] # Convert the detections to image cls_ format (see core/test_engine.py) num_classes = cfg.MODEL.NUM_CLASSES cls_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)] for c in range(1, num_classes): inds = np.where(detections[:, 5] == c)[0] cls_boxes[c] = detections[inds, :5] cls_boxes_list.append(cls_boxes) timers['misc_bbox'].toc() return cls_boxes_list