def im_detect_keypoints(model, im_scale, boxes): """Infer instance keypoint poses. This function must be called after im_detect_bbox as it assumes that the Caffe2 workspace is already populated with the necessary blobs. Arguments: model (DetectionModelHelper): the detection model to use im_scales (list): image blob scales as returned by im_detect_bbox boxes (ndarray): R x 4 array of bounding box detections (e.g., as returned by im_detect_bbox) Returns: pred_heatmaps (ndarray): R x J x M x M array of keypoint location logits (softmax inputs) for each of the J keypoint types output by the network (must be processed by keypoint_results to convert into point predictions in the original image coordinate space) """ M = cfg.KRCNN.HEATMAP_SIZE if boxes.shape[0] == 0: pred_heatmaps = np.zeros((0, cfg.KRCNN.NUM_KEYPOINTS, M, M), np.float32) return pred_heatmaps inputs = {'keypoint_rois': _get_rois_blob(boxes, im_scale)} # Add multi-level rois for FPN if cfg.FPN.MULTILEVEL_ROIS: _add_multilevel_rois_for_test(inputs, 'keypoint_rois') for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v) if os.environ.get('INT8INFO') == "1": algorithm = AbsmaxCalib() kind = os.environ.get('INT8CALIB') if kind == "moving_average": ema_alpha = 0.5 algorithm = EMACalib(ema_alpha) elif kind == "kl_divergence": kl_iter_num_for_range = int(os.environ.get('INT8KLNUM')) if not kl_iter_num_for_range: kl_iter_num_for_range = 100 algorithm = KLCalib(kl_iter_num_for_range) calib = Calibrator(algorithm) calib.RunCalibIter(workspace, model.keypoint_net.Proto()) else: workspace.RunNet(model.keypoint_net.Proto().name) pred_heatmaps = workspace.FetchBlob(core.ScopedName('kps_score')).squeeze() # In case of 1 if pred_heatmaps.ndim == 3: pred_heatmaps = np.expand_dims(pred_heatmaps, axis=0) return pred_heatmaps
def im_conv_body_only(model, im, target_scale, target_max_size): """Runs `model.conv_body_net` on the given image `im`.""" im_blob, im_scale, _ = blob_utils.get_image_blob(im, target_scale, target_max_size) workspace.FeedBlob(core.ScopedName('data'), im_blob) if os.environ.get('INT8INFO') == "1": algorithm = AbsmaxCalib() kind = os.environ.get('INT8CALIB') if kind == "moving_average": ema_alpha = 0.5 algorithm = EMACalib(ema_alpha) elif kind == "kl_divergence": kl_iter_num_for_range = int(os.environ.get('INT8KLNUM')) if not kl_iter_num_for_range: kl_iter_num_for_range = 100 algorithm = KLCalib(kl_iter_num_for_range) calib = Calibrator(algorithm) calib.RunCalibIter(workspace, model.conv_body_net.Proto()) else: workspace.RunNet(model.conv_body_net.Proto().name) return im_scale
def im_detect_mask(model, im_scale, boxes, timers=None): """Infer instance segmentation masks. This function must be called after im_detect_bbox as it assumes that the Caffe2 workspace is already populated with the necessary blobs. Arguments: model (DetectionModelHelper): the detection model to use im_scales (list): image blob scales as returned by im_detect_bbox boxes (ndarray): R x 4 array of bounding box detections (e.g., as returned by im_detect_bbox) Returns: pred_masks (ndarray): R x K x M x M array of class specific soft masks output by the network (must be processed by segm_results to convert into hard masks in the original image coordinate space) """ if timers is None: timers = defaultdict(Timer) timers['data_mask'].tic() M = cfg.MRCNN.RESOLUTION if boxes.shape[0] == 0: pred_masks = np.zeros((0, M, M), np.float32) return pred_masks inputs = {'mask_rois': _get_rois_blob(boxes, im_scale)} # Add multi-level rois for FPN if cfg.FPN.MULTILEVEL_ROIS: _add_multilevel_rois_for_test(inputs, 'mask_rois') for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v) timers['data_mask'].toc() #run first time to warm up if os.environ.get('EPOCH2OLD') == "1": workspace.RunNet(model.mask_net.Proto().name) timers['run_mask'].tic() if os.environ.get('INT8INFO') == "1": algorithm = AbsmaxCalib() kind = os.environ.get('INT8CALIB') if kind == "moving_average": ema_alpha = 0.5 algorithm = EMACalib(ema_alpha) elif kind == "kl_divergence": kl_iter_num_for_range = int(os.environ.get('INT8KLNUM')) if not kl_iter_num_for_range: kl_iter_num_for_range = 100 algorithm = KLCalib(kl_iter_num_for_range) calib = Calibrator(algorithm) calib.RunCalibIter(workspace, model.mask_net.Proto()) else: workspace.RunNet(model.mask_net.Proto().name) timers['run_mask'].toc() timers['result_mask'].tic() # Fetch masks pred_masks = workspace.FetchBlob( core.ScopedName('mask_fcn_probs')).squeeze() if cfg.MRCNN.CLS_SPECIFIC_MASK: pred_masks = pred_masks.reshape([-1, cfg.MODEL.NUM_CLASSES, M, M]) else: pred_masks = pred_masks.reshape([-1, 1, M, M]) timers['result_mask'].toc() return pred_masks
def im_detect_bbox(model, im, target_scale, target_max_size, size_fix=None, timers=None, model1=None, boxes=None): """Bounding box object detection for an image with given box proposals. Arguments: model (DetectionModelHelper): the detection model to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals in 0-indexed [x1, y1, x2, y2] format, or None if using RPN Returns: scores (ndarray): R x K array of object class scores for K classes (K includes background as object category 0) boxes (ndarray): R x 4*K array of predicted bounding boxes im_scales (list): list of image scales used in the input blob (as returned by _get_blobs and for use with im_detect_mask, etc.) """ if timers is None: timers = defaultdict(Timer) if model1 is None and os.environ.get('COSIM'): print("cosim must has model1") fp32_ws_name = "__fp32_ws__" int8_ws_name = "__int8_ws__" timers['data1'].tic() inputs, im_scale = _get_blobs(im, boxes, target_scale, target_max_size, size_fix) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) inputs['rois'] = inputs['rois'][index, :] boxes = boxes[index, :] # Add multi-level rois for FPN if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN: _add_multilevel_rois_for_test(inputs, 'rois') for k, v in inputs.items(): if os.environ.get('COSIM'): workspace.SwitchWorkspace(int8_ws_name, True) workspace.FeedBlob(core.ScopedName(k), v) if os.environ.get('COSIM'): workspace.SwitchWorkspace(fp32_ws_name, True) workspace.FeedBlob(core.ScopedName(k), v) timers['data1'].toc() # run first time to warm up if os.environ.get('EPOCH2OLD') == "1": workspace.RunNet(model.net.Proto().name) timers['run'].tic() if os.environ.get('INT8INFO') == "1": algorithm = AbsmaxCalib() kind = os.environ.get('INT8CALIB') if kind == "moving_average": ema_alpha = 0.5 algorithm = EMACalib(ema_alpha) elif kind == "kl_divergence": kl_iter_num_for_range = os.environ.get('INT8KLNUM') if not kl_iter_num_for_range: kl_iter_num_for_range = 100 kl_iter_num_for_range = int(kl_iter_num_for_range) algorithm = KLCalib(kl_iter_num_for_range) calib = Calibrator(algorithm) calib.RunCalibIter(workspace, model.net.Proto()) else: if os.environ.get('COSIM'): cosim_alg = os.environ.get('COSIM') with open("int8.txt", "wb") as p: p.write(str(model.net.Proto())) with open("fp32.txt", "wb") as p: p.write(str(model1.net.Proto())) for i in range(len(model.net.Proto().op)): workspace.SwitchWorkspace(int8_ws_name) int8_inputs = [] for inp in model.net.Proto().op[i].input: int8_inputs.append(workspace.FetchBlob(str(inp))) logging.warning(" opint8[{0}] is {1}".format( i, model.net.Proto().op[i])) workspace.RunOperatorOnce(model.net.Proto().op[i]) int8_results = [] for res in model.net.Proto().op[i].output: int8_results.append(workspace.FetchBlob(str(res))) workspace.SwitchWorkspace(fp32_ws_name) fp32_inputs = [] for inp1 in model1.net.Proto().op[i].input: fp32_inputs.append(workspace.FetchBlob(str(inp1))) logging.warning(" opfp32[{0}] is {1}".format( i, model1.net.Proto().op[i])) workspace.RunOperatorOnce(model1.net.Proto().op[i]) fp32_results = [] for res1 in model1.net.Proto().op[i].output: fp32_results.append(workspace.FetchBlob(str(res1))) if len(int8_inputs) != len(fp32_inputs): logging.error("Wrong number of inputs") return if len(int8_results) != len(fp32_results): logging.error("Wrong number of outputs") return logging.warning("begin to check op[{}] {} input".format( i, model.net.Proto().op[i].type)) for k in range(len(int8_inputs)): if model.net.Proto().op[i].input[k][0] == '_': continue #assert_allclose(int8_inputs[k], fp32_inputs[k], **tol) logging.warning("pass checking op[{0}] {1} input".format( i, model.net.Proto().op[i].type)) logging.warning("begin to check op[{0}] {1} output".format( i, model.net.Proto().op[i].type)) for j, int8_result in enumerate(int8_results): if model.net.Proto().op[i].output[j][0] == '_': continue #logging.warning("int8_outputis {} and fp32 output is {} ".format(int8_results[j], fp32_results[j])) #if not compare_utils.assert_allclose(int8_results[j], fp32_results[j], **tol): if not compare_utils.assert_compare( int8_result, fp32_results[j], 1e-01, cosim_alg): for k, int8_input in enumerate(int8_inputs): logging.warning("int8_input[{}] is {}".format( k, int8_input)) logging.warning("fp32_input[{}] is {}".format( k, fp32_inputs[k])) #assert_allclose(int8_results[j], fp32_results[j], **tol) logging.warning("pass checking op[{0}] {1} output".format( i, model.net.Proto().op[i].type)) else: workspace.RunNet(model.net.Proto().name) timers['run'].toc() timers['result'].tic() # Read out blobs if cfg.MODEL.FASTER_RCNN: rois = workspace.FetchBlob(core.ScopedName('rois')) # unscale back to raw image space boxes = rois[:, 1:5] / im_scale batch_indices = rois[:, 0] # Softmax class probabilities scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze() # In case there is 1 proposal scores = scores.reshape([-1, scores.shape[-1]]) if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = workspace.FetchBlob( core.ScopedName('bbox_pred')).squeeze() # In case there is 1 proposal box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]]) if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: # Remove predictions for bg class (compat with MSRA code) box_deltas = box_deltas[:, -4:] pred_boxes = box_utils.bbox_transform(boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS) pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im[0].shape) if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: pred_boxes = np.tile(pred_boxes, (1, scores.shape[1])) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] timers['result'].toc() return scores, pred_boxes, im_scale, batch_indices
def test_net(weights_file, dataset_name, proposal_file, output_dir, ind_range=None, gpu_id=0): """Run inference on all images in a dataset or over an index range of images in a dataset using a single GPU. """ assert not cfg.MODEL.RPN_ONLY, \ 'Use rpn_generate to generate proposals from RPN-only models' fp32_ws_name = "__fp32_ws__" int8_ws_name = "__int8_ws__" roidb, dataset, start_ind, end_ind, total_num_images = get_roidb_and_dataset( dataset_name, proposal_file, ind_range) model1 = None if os.environ.get('COSIM'): workspace.SwitchWorkspace(int8_ws_name, True) model, ob, ob_mask, ob_keypoint = initialize_model_from_cfg(weights_file, gpu_id=gpu_id) if os.environ.get('COSIM'): workspace.SwitchWorkspace(fp32_ws_name, True) model1, _, _, _ = initialize_model_from_cfg(weights_file, gpu_id=gpu_id, int8=False) num_images = len(roidb) num_classes = cfg.MODEL.NUM_CLASSES all_boxes, all_segms, all_keyps = empty_results(num_classes, num_images) timers = defaultdict(Timer) # for kl_divergence calibration, we use the first 100 images to get # the min and max values, and the remaing images are applied to compute the hist. # if the len(images) <= 100, we extend the images with themselves. if os.environ.get('INT8INFO') == "1" and os.environ.get( 'INT8CALIB') == "kl_divergence": kl_iter_num_for_range = int(os.environ.get('INT8KLNUM')) if not kl_iter_num_for_range: kl_iter_num_for_range = 100 while (len(roidb) < 2 * kl_iter_num_for_range): roidb += roidb if os.environ.get('EPOCH2') == "1": for i, entry in enumerate(roidb): if cfg.TEST.PRECOMPUTED_PROPOSALS: # The roidb may contain ground-truth rois (for example, if the roidb # comes from the training or val split). We only want to evaluate # detection on the *non*-ground-truth rois. We select only the rois # that have the gt_classes field set to 0, which means there's no # ground truth. box_proposals = entry['boxes'][entry['gt_classes'] == 0] if len(box_proposals) == 0: continue else: # Faster R-CNN type models generate proposals on-the-fly with an # in-network RPN; 1-stage models don't require proposals. box_proposals = None im = [] im.append(cv2.imread(entry['image'])) print("im is {} and i is {} ".format(entry['image'], i)) with c2_utils.NamedCudaScope(gpu_id): cls_boxes_i, cls_segms_i, cls_keyps_i = im_detect_all( model, im, box_proposals, timers, model1) extend_results(i, all_boxes, cls_boxes_i[0]) if cls_segms_i is not None: extend_results(i, all_segms, cls_segms_i[0]) if cls_keyps_i is not None: extend_results(i, all_keyps, cls_keyps_i[0]) all_boxes, all_segms, all_keyps = empty_results( num_classes, num_images) logging.warning("begin to run benchmark") for i, entry in enumerate(roidb): if cfg.TEST.PRECOMPUTED_PROPOSALS: # The roidb may contain ground-truth rois (for example, if the roidb # comes from the training or val split). We only want to evaluate # detection on the *non*-ground-truth rois. We select only the rois # that have the gt_classes field set to 0, which means there's no # ground truth. box_proposals = entry['boxes'][entry['gt_classes'] == 0] if len(box_proposals) == 0: continue else: # Faster R-CNN type models generate proposals on-the-fly with an # in-network RPN; 1-stage models don't require proposals. box_proposals = None im = [] im.append(cv2.imread(entry['image'])) print("im is {} and i is {} ".format(entry['image'], i)) with c2_utils.NamedCudaScope(gpu_id): cls_boxes_i, cls_segms_i, cls_keyps_i = im_detect_all( model, im, box_proposals, timers, model1) if os.environ.get('DPROFILE') == "1" and ob != None: logging.warning("enter profile log") logging.warning("net observer time = {}".format(ob.average_time())) logging.warning("net observer time = {}".format( ob.average_time_children())) if os.environ.get('DPROFILE') == "1" and ob_mask != None: logging.warning("mask net observer time = {}".format( ob_mask.average_time())) logging.warning("mask net observer time = {}".format( ob_mask.average_time_children())) if os.environ.get('DPROFILE') == "1" and ob_mask != None: logging.warning("keypoint net observer time = {}".format( ob_keypoint.average_time())) logging.warning("keypoint net observer time = {}".format( ob_keypoint.average_time_children())) extend_results(i, all_boxes, cls_boxes_i[0]) if cls_segms_i is not None: extend_results(i, all_segms, cls_segms_i[0]) if cls_keyps_i is not None: extend_results(i, all_keyps, cls_keyps_i[0]) if i % 10 == 0: # Reduce log file size ave_total_time = np.sum([t.average_time for t in timers.values()]) eta_seconds = ave_total_time * (num_images - i - 1) eta = str(datetime.timedelta(seconds=int(eta_seconds))) det_time = (timers['im_detect_bbox'].average_time + timers['im_detect_mask'].average_time + timers['im_detect_keypoints'].average_time) misc_time = (timers['misc_bbox'].average_time + timers['misc_mask'].average_time + timers['misc_keypoints'].average_time) logger.info(('im_detect: range [{:d}, {:d}] of {:d}: ' '{:d}/{:d} {:.3f}s + {:.3f}s (eta: {})').format( start_ind + 1, end_ind, total_num_images, start_ind + i + 1, start_ind + num_images, det_time, misc_time, eta)) if cfg.VIS: im_name = os.path.splitext(os.path.basename(entry['image']))[0] vis_utils.vis_one_image(im[:, :, ::-1], '{:d}_{:s}'.format(i, im_name), os.path.join(output_dir, 'vis'), cls_boxes_i[0], segms=cls_segms_i[0], keypoints=cls_keyps_i[0], thresh=cfg.VIS_TH, box_alpha=0.8, dataset=dataset, show_class=True) for key, value in timers.items(): logger.info('{} : {}'.format(key, value.average_time)) #remove observer if ob != None: model.net.RemoveObserver(ob) if ob_mask != None: model.mask_net.RemoveObserver(ob_mask) if ob_keypoint != None: model.keypoint_net.RemoveObserver(ob_keypoint) if os.environ.get('INT8INFO') == "1": def save_net(net_def, init_def): if net_def is None or init_def is None: return if net_def.name is None or init_def.name is None: return if os.environ.get('INT8PTXT') == "1": with open(net_def.name + '_predict_int8.pbtxt', 'wb') as n: n.write(str(net_def)) with open(net_def.name + '_init_int8.pbtxt', 'wb') as n: n.write(str(init_def)) else: with open(net_def.name + '_predict_int8.pb', 'wb') as n: n.write(net_def.SerializeToString()) with open(net_def.name + '_init_int8.pb', 'wb') as n: n.write(init_def.SerializeToString()) algorithm = AbsmaxCalib() kind = os.environ.get('INT8CALIB') if kind == "moving_average": ema_alpha = 0.5 algorithm = EMACalib(ema_alpha) elif kind == "kl_divergence": algorithm = KLCalib(kl_iter_num_for_range) calib = Calibrator(algorithm) if model.net: predict_quantized, init_quantized = calib.DepositQuantizedModule( workspace, model.net.Proto()) save_net(predict_quantized, init_quantized) if cfg.MODEL.MASK_ON: predict_quantized, init_quantized = calib.DepositQuantizedModule( workspace, model.mask_net.Proto()) save_net(predict_quantized, init_quantized) if cfg.MODEL.KEYPOINTS_ON: predict_quantized, init_quantized = calib.DepositQuantizedModule( workspace, model.keypoint_net.Proto()) save_net(predict_quantized, init_quantized) cfg_yaml = yaml.dump(cfg) if ind_range is not None: det_name = 'detection_range_%s_%s.pkl' % tuple(ind_range) else: det_name = 'detections.pkl' det_file = os.path.join(output_dir, det_name) save_object( dict(all_boxes=all_boxes, all_segms=all_segms, all_keyps=all_keyps, cfg=cfg_yaml), det_file) logger.info('Wrote detections to: {}'.format(os.path.abspath(det_file))) return all_boxes, all_segms, all_keyps
def main(args): """ main entry to run """ logger = logging.getLogger(__name__) merge_cfg_from_file(args.cfg) cfg.NUM_GPUS = 1 args.weights = cache_url(args.weights, cfg.DOWNLOAD_CACHE) assert_and_infer_cfg(cache_urls=False) assert not cfg.MODEL.RPN_ONLY, \ 'RPN models are not supported' assert not cfg.TEST.PRECOMPUTED_PROPOSALS, \ 'Models that require precomputed proposals are not supported' fp32_ws_name = "__fp32_ws__" int8_ws_name = "__int8_ws__" model1 = None if os.environ.get('COSIM'): workspace.SwitchWorkspace(int8_ws_name, True) model, _, _, _ = infer_engine.initialize_model_from_cfg( args.weights, gpu_id=args.device_id) if os.environ.get('COSIM'): workspace.SwitchWorkspace(fp32_ws_name, True) model1, _, _, _ = infer_engine.initialize_model_from_cfg( args.weights, gpu_id=args.device_id, int8=False) dummy_coco_dataset = dummy_datasets.get_coco_dataset() if os.path.isdir(args.im_or_folder): im_list = glob.iglob(args.im_or_folder + '/*.' + args.image_ext) else: im_list = [args.im_or_folder] fnames = batch_image(im_list, args.batch_size) # for kl_divergence calibration, we use the first 100 images to get # the min and max values, and the remaing images are applied to compute the hist. # if the len(images) <= 100, we extend the images with themselves. if os.environ.get('INT8INFO') == "1" and os.environ.get( 'INT8CALIB') == "kl_divergence": kl_iter_num_for_range = os.environ.get('INT8KLNUM') if not kl_iter_num_for_range: kl_iter_num_for_range = 100 kl_iter_num_for_range = int(kl_iter_num_for_range) while (len(fnames) < 2 * kl_iter_num_for_range): fnames += fnames if os.environ.get('EPOCH2') == "1": for i, im_name in enumerate(fnames): im = [] for _, name in enumerate(im_name): image = cv2.imread(name) im.append(image) timers = defaultdict(Timer) t = time.time() with c2_utils.NamedCudaScope(args.device_id): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( model, im, None, timers, model1) logger.warning("begin to run benchmark\n") for i, im_name in enumerate(fnames): im = [] for _, name in enumerate(im_name): image = cv2.imread(name) im.append(image) timers = defaultdict(Timer) t = time.time() with c2_utils.NamedCudaScope(args.device_id): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( model, im, None, timers, model1) logger.info('Inference time: {:.3f}s'.format(time.time() - t)) for k, v in timers.items(): logger.info(' | {}: {:.3f}s'.format(k, v.average_time)) if i == 0: logger.info( ' \ Note: inference on the first batch will be slower than the ' 'rest (caches and auto-tuning need to warm up)') cls_segm = None cls_keyp = None for bs in range(args.batch_size): image = im[bs] if cls_segms != None: cls_segm = cls_segms[bs] if cls_keyp != None: cls_keyp = cls_keyps[bs] cls_box = cls_boxes[bs] image_name = fnames[i][bs].split("/")[-1] vis_utils.vis_one_image( image[:, :, ::-1], # BGR -> RGB for visualization image_name, args.output_dir, cls_box, cls_segm, cls_keyp, dataset=dummy_coco_dataset, box_alpha=0.3, show_class=True, thresh=0.7, kp_thresh=2, ext=args.output_ext, out_when_no_box=args.out_when_no_box) if os.environ.get('INT8INFO') == "1": def save_net(net_def, init_def): if net_def is None or init_def is None: return if net_def.name is None or init_def.name is None: return if os.environ.get('INT8PTXT') == "1": with open(net_def.name + '_predict_int8.pbtxt', 'wb') as n: n.write(str(net_def)) with open(net_def.name + '_init_int8.pbtxt', 'wb') as n: n.write(str(init_def)) else: with open(net_def.name + '_predict_int8.pb', 'wb') as n: n.write(net_def.SerializeToString()) with open(net_def.name + '_init_int8.pb', 'wb') as n: n.write(init_def.SerializeToString()) algorithm = AbsmaxCalib() kind = os.environ.get('INT8CALIB') if kind == "moving_average": ema_alpha = 0.5 algorithm = EMACalib(ema_alpha) elif kind == "kl_divergence": algorithm = KLCalib(kl_iter_num_for_range) calib = Calibrator(algorithm) if model.net: predict_quantized, init_quantized = calib.DepositQuantizedModule( workspace, model.net.Proto()) save_net(predict_quantized, init_quantized) if cfg.MODEL.MASK_ON: predict_quantized, init_quantized = calib.DepositQuantizedModule( workspace, model.mask_net.Proto()) save_net(predict_quantized, init_quantized) if cfg.MODEL.KEYPOINTS_ON: predict_quantized, init_quantized = calib.DepositQuantizedModule( workspace, model.keypoint_net.Proto()) save_net(predict_quantized, init_quantized)
def im_detect_bbox(model, im, timers=None, model1=None): """Generate RetinaNet detections on a single image.""" if timers is None: timers = defaultdict(Timer) if model1 is None and os.environ.get('COSIM'): print("cosim must has model1") fp32_ws_name = "__fp32_ws__" int8_ws_name = "__int8_ws__" # Although anchors are input independent and could be precomputed, # recomputing them per image only brings a small overhead anchors = _create_cell_anchors() timers['im_detect_bbox'].tic() timers['data1'].tic() k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS) inputs = {} inputs['data'], im_scale, inputs['im_info'] = \ blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, cfg.TEST.SIZEFIX) cls_probs, box_preds = [], [] for lvl in range(k_min, k_max + 1): suffix = 'fpn{}'.format(lvl) cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix))) box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix))) for k, v in inputs.items(): if os.environ.get('COSIM'): workspace.SwitchWorkspace(int8_ws_name, True) workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False)) if os.environ.get('COSIM'): workspace.SwitchWorkspace(fp32_ws_name, True) workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False)) timers['data1'].toc() if os.environ.get('EPOCH2OLD') == "1": workspace.RunNet(model.net.Proto().name) timers['run'].tic() if os.environ.get('INT8INFO') == "1": algorithm = AbsmaxCalib() kind = os.environ.get('INT8CALIB') if kind == "moving_average": ema_alpha = 0.5 algorithm = EMACalib(ema_alpha) elif kind == "kl_divergence": kl_iter_num_for_range = os.environ.get('INT8KLNUM') if not kl_iter_num_for_range: kl_iter_num_for_range = 100 kl_iter_num_for_range = int(kl_iter_num_for_range) algorithm = KLCalib(kl_iter_num_for_range) calib = Calibrator(algorithm) calib.RunCalibIter(workspace, model.net.Proto()) else: if os.environ.get('COSIM'): with open("int8.txt", "wb") as p: p.write(str(model.net.Proto())) with open("fp32.txt", "wb") as p: p.write(str(model1.net.Proto())) for i in range(len(model.net.Proto().op)): workspace.SwitchWorkspace(int8_ws_name) int8_inputs = [] for inp in model.net.Proto().op[i].input: int8_inputs.append(workspace.FetchBlob(str(inp))) logging.warning(" opint8[{0}] is {1}".format( i, model.net.Proto().op[i])) workspace.RunOperatorOnce(model.net.Proto().op[i]) int8_results = [] for res in model.net.Proto().op[i].output: int8_results.append(workspace.FetchBlob(str(res))) workspace.SwitchWorkspace(fp32_ws_name) fp32_inputs = [] for inp1 in model1.net.Proto().op[i].input: fp32_inputs.append(workspace.FetchBlob(str(inp1))) logging.warning(" opfp32[{0}] is {1}".format( i, model1.net.Proto().op[i])) workspace.RunOperatorOnce(model1.net.Proto().op[i]) fp32_results = [] for res1 in model1.net.Proto().op[i].output: fp32_results.append(workspace.FetchBlob(str(res1))) if len(int8_inputs) != len(fp32_inputs): logging.error("Wrong number of inputs") return if len(int8_results) != len(fp32_results): logging.error("Wrong number of outputs") return logging.warning("begin to check op[{}] {} input".format( i, model.net.Proto().op[i].type)) for k in range(len(int8_inputs)): if model.net.Proto().op[i].input[k][0] == '_': continue #assert_allclose(int8_inputs[k], fp32_inputs[k], **tol) logging.warning("pass checking op[{0}] {1} input".format( i, model.net.Proto().op[i].type)) logging.warning("begin to check op[{0}] {1} output".format( i, model.net.Proto().op[i].type)) for j, int8_result in enumerate(int8_results): if model.net.Proto().op[i].output[j][0] == '_': continue #logging.warning("int8_outputis {} and fp32 output is {} ".format(int8_results[j], fp32_results[j])) #if not compare_utils.assert_allclose(int8_results[j], fp32_results[j], **tol): if not compare_utils.assert_compare( int8_result, fp32_results[j], 1e-01, os.environ.get('COSIM')): for k, int8_input in enumerate(int8_inputs): logging.warning("int8_input[{}] is {}".format( k, int8_input)) logging.warning("fp32_input[{}] is {}".format( k, fp32_inputs[k])) logging.warning("pass checking op[{0}] {1} output".format( i, model.net.Proto().op[i].type)) else: workspace.RunNet(model.net.Proto().name) timers['run'].toc() cls_probs = workspace.FetchBlobs(cls_probs) box_preds = workspace.FetchBlobs(box_preds) # here the boxes_all are [x0, y0, x1, y1, score] boxes_all = defaultdict(list) batch_size = cls_probs[0].shape[0] boxes_all_list = [boxes_all] * batch_size cnt = 0 for lvl in range(k_min, k_max + 1): # create cell anchors array stride = 2.**lvl cell_anchors = anchors[lvl] # fetch per level probability cls_prob = cls_probs[cnt] box_pred = box_preds[cnt] cls_prob = cls_prob.reshape( (cls_prob.shape[0], A, int(cls_prob.shape[1] / A), cls_prob.shape[2], cls_prob.shape[3])) box_pred = box_pred.reshape( (box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3])) cnt += 1 if cfg.RETINANET.SOFTMAX: cls_prob = cls_prob[:, :, 1::, :, :] for i in range(batch_size): cls_prob_ravel = cls_prob[i, :].ravel() # In some cases [especially for very small img sizes], it's possible that # candidate_ind is empty if we impose threshold 0.05 at all levels. This # will lead to errors since no detections are found for this image. Hence, # for lvl 7 which has small spatial resolution, we take the threshold 0.0 th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0 candidate_inds = np.where(cls_prob_ravel > th)[0] if (len(candidate_inds) == 0): continue pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds)) inds = np.argpartition(cls_prob_ravel[candidate_inds], -pre_nms_topn)[-pre_nms_topn:] inds = candidate_inds[inds] inds_4d = np.array(np.unravel_index( inds, (cls_prob[i, :]).shape)).transpose() classes = inds_4d[:, 1] anchor_ids, y, x = inds_4d[:, 0], inds_4d[:, 2], inds_4d[:, 3] scores = cls_prob[i, anchor_ids, classes, y, x] boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32) boxes *= stride boxes += cell_anchors[anchor_ids, :] if not cfg.RETINANET.CLASS_SPECIFIC_BBOX: box_deltas = box_pred[i, anchor_ids, :, y, x] else: box_cls_inds = classes * 4 box_deltas = np.vstack([ box_pred[i, ind:ind + 4, yi, xi] for ind, yi, xi in zip(box_cls_inds, y, x) ]) pred_boxes = (box_utils.bbox_transform(boxes, box_deltas) if cfg.TEST.BBOX_REG else boxes) pred_boxes /= im_scale pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im[0].shape) box_scores = np.zeros((pred_boxes.shape[0], 5)) box_scores[:, 0:4] = pred_boxes box_scores[:, 4] = scores for cls in range(1, cfg.MODEL.NUM_CLASSES): inds = np.where(classes == cls - 1)[0] if len(inds) > 0: boxes_all_list[i][cls].extend(box_scores[inds, :]) timers['im_detect_bbox'].toc() cls_boxes_list = [] for i in range(batch_size): boxes_all = boxes_all_list[i] # Combine predictions across all levels and retain the top scoring by class timers['misc_bbox'].tic() detections = [] for cls, boxes in boxes_all.items(): cls_dets = np.vstack(boxes).astype(dtype=np.float32) # do class specific nms here keep = box_utils.nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] out = np.zeros((len(keep), 6)) out[:, 0:5] = cls_dets out[:, 5].fill(cls) detections.append(out) # detections (N, 6) format: # detections[:, :4] - boxes # detections[:, 4] - scores # detections[:, 5] - classes detections = np.vstack(detections) # sort all again inds = np.argsort(-detections[:, 4]) detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :] # Convert the detections to image cls_ format (see core/test_engine.py) num_classes = cfg.MODEL.NUM_CLASSES cls_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)] for c in range(1, num_classes): inds = np.where(detections[:, 5] == c)[0] cls_boxes[c] = detections[inds, :5] cls_boxes_list.append(cls_boxes) timers['misc_bbox'].toc() return cls_boxes_list