def im_proposals(model, im): """Generate RPN proposals on a single image.""" inputs = {} inputs['data'], inputs['im_info'] = _get_image_blob(im) for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False)) workspace.RunNet(model.net.Proto().name) scale = inputs['im_info'][0, 2] if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: k_max = cfg.FPN.RPN_MAX_LEVEL k_min = cfg.FPN.RPN_MIN_LEVEL rois_names = [ core.ScopedName('rpn_rois_fpn' + str(l)) for l in range(k_min, k_max + 1)] score_names = [ core.ScopedName('rpn_roi_probs_fpn' + str(l)) for l in range(k_min, k_max + 1)] blobs = workspace.FetchBlobs(rois_names + score_names) # Combine predictions across all levels and retain the top scoring boxes = np.concatenate(blobs[:len(rois_names)]) scores = np.concatenate(blobs[len(rois_names):]).squeeze() # TODO(rbg): NMS again? inds = np.argsort(-scores)[:cfg.TEST.RPN_POST_NMS_TOP_N] scores = scores[inds] boxes = boxes[inds, :] else: boxes, scores = workspace.FetchBlobs( [core.ScopedName('rpn_rois'), core.ScopedName('rpn_roi_probs')]) scores = scores.squeeze() # Column 0 is the batch index in the (batch ind, x1, y1, x2, y2) encoding, # so we remove it since we just want to return boxes boxes = boxes[:, 1:] / scale return boxes, scores
def test_nativeop_forward( model, inputs: List[Tuple[str, np.ndarray]], reference_outputs: List[Tuple[str, Optional[np.ndarray]]], metrics: List[TestMetric] = DefaultOpMetrics() ) -> List[Any]: """ Tests a framework-native operator's forward method on list of given input tensors, without surrounding overhead. Similar to `test_op_forward`, but does not incur the overhead of converting from/to numpy arrays, creating multiple sessions etc. @param op An operator to test. @param inputs An array of inputs to pass to the operator. @param reference_outputs An array of reference outputs. @param metrics A list of TestMetric objects to measure. @return List of test metric results for every output. """ assert isinstance(inputs, (list, tuple)) == True normal_metrics = [m for m in metrics if m.reruns == 0] rerun_metrics = [m for m in metrics if m.reruns > 0] for (name, inp) in inputs: workspace.FeedBlob(name, inp) num_outputs = len(reference_outputs) # Create a single session workspace.CreateNet(model.net) for metric in normal_metrics: metric.begin(inputs) workspace.RunNet(model.net) outputs = workspace.FetchBlobs([name for (name, out) in reference_outputs]) for metric in normal_metrics: metric.end(outputs) for metric in rerun_metrics: for i in range(metric.reruns): metric.begin(inputs) workspace.RunNet(model.net) outputs = workspace.FetchBlobs([name for (name, out) in reference_outputs]) metric.end(outputs) if not isinstance(outputs, (list, tuple)): outputs = [outputs] results = [] for i in range(num_outputs): # Execute metrics. for metric in metrics: result = metric.measure(inputs, outputs[i], (reference_outputs[i])[1]) results.append(result) summary = metric.measure_summary(inputs, outputs[i], reference_outputs[i][1]) print("{} on native inference for output {}: {}".format( type(metric).__name__, i, summary)) return results
def _test_std(self): root_dir = osp.join('/private', 'home', 'xinleic', 'pyramid') cfg_file = osp.join(root_dir, 'configs', 'visual_genome', 'e2e_faster_rcnn_R-50-FPN_1x.yaml') merge_cfg_from_file(cfg_file) cfg.NUM_GPUS = 1 cfg.TEST.RPN_PRE_NMS_TOP_N = 100 cfg.TEST.RPN_POST_NMS_TOP_N = 20 assert_and_infer_cfg() test_weight = osp.join(root_dir, 'outputs', 'train', 'visual_genome_train', 'e2e_faster_rcnn_R-50-FPN_1x', 'RNG_SEED#3', 'model_final.pkl') model = test_engine.initialize_model_from_cfg(test_weight, gpu_id=0) dataset = JsonDataset('visual_genome_val') roidb = dataset.get_roidb() num_images = len(roidb) num_classes = cfg.MODEL.NUM_CLASSES entry = roidb[1] im = cv2.imread(entry['image']) max_level = cfg.FPN.RPN_MAX_LEVEL min_level = cfg.FPN.RPN_MIN_LEVEL # input: rpn_cls_probs_fpn2, rpn_bbox_pred_fpn2 # output: rpn_rois_fpn2, rpn_roi_probs_fpn2 with utils.c2.NamedCudaScope(0): # let's manually do the testing here inputs, im_scale = _get_blobs(im, None, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE) for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v) workspace.RunNet(model.net.Proto().name) cls_probs = [core.ScopedName('rpn_cls_probs_fpn%d' % i) for i in range(min_level, max_level+1)] box_preds = [core.ScopedName('rpn_bbox_pred_fpn%d' % i) for i in range(min_level, max_level+1)] rpn_rois = [core.ScopedName('rpn_rois_fpn%d' % i) for i in range(min_level, max_level+1)] rpn_roi_probs = [core.ScopedName('rpn_roi_probs_fpn%d' % i) for i in range(min_level, max_level+1)] cls_probs = workspace.FetchBlobs(cls_probs) box_preds = workspace.FetchBlobs(box_preds) rpn_rois = workspace.FetchBlobs(rpn_rois) rpn_roi_probs = workspace.FetchBlobs(rpn_roi_probs) rpn_rois = np.vstack(rpn_rois) rpn_roi_probs = np.vstack(rpn_roi_probs) # remove the image dimension rpn_rois = rpn_rois[:, 1:] boxes = np.hstack([rpn_rois, rpn_roi_probs]) im_name = osp.splitext(osp.basename(entry['image']))[0] utils.vis.vis_one_image(im[:, :, ::-1], '{:s}-std-output'.format(im_name), osp.join(root_dir, 'tests'), boxes, segms=None, keypoints=None, thresh=0., box_alpha=0.8, dataset=dataset, show_class=False) workspace.ResetWorkspace() im_info = inputs['im_info'].astype(np.float32) return cls_probs, box_preds, im_info, im, im_name, root_dir, dataset
def test_log_barrier(self, X): param = core.BlobReference("X") workspace.FeedBlob(param, X) train_init_net, train_net = self.get_training_nets() reg = regularizer.LogBarrier(1.0) output = reg(train_net, train_init_net, param, by=RegularizationBy.ON_LOSS) reg( train_net, train_init_net, param, grad=None, by=RegularizationBy.AFTER_OPTIMIZER, ) workspace.RunNetOnce(train_init_net) workspace.RunNetOnce(train_net) def ref(X): return ( np.array(np.sum(-np.log(np.clip(X, 1e-9, None))) * 0.5).astype( np.float32), np.clip(X, 1e-9, None), ) for x, y in zip(workspace.FetchBlobs([output, param]), ref(X)): npt.assert_allclose(x, y, rtol=1e-3)
def testFetchBlobs(self): s1 = "test1" s2 = "test2" workspace.FeedBlob('s1', s1) workspace.FeedBlob('s2', s2) fetch1, fetch2 = workspace.FetchBlobs(['s1', 's2']) self.assertEquals(s1, fetch1) self.assertEquals(s2, fetch2)
def testFetchBlobs(self): s1 = b"test1" s2 = b"test2" workspace.FeedBlob("s1", s1) workspace.FeedBlob("s2", s2) fetch1, fetch2 = workspace.FetchBlobs(["s1", "s2"]) self.assertEquals(s1, fetch1) self.assertEquals(s2, fetch2)
def test_grad(self, size): X = np.random.random_sample(size) workspace.ResetWorkspace() workspace.FeedBlob("X", X.astype(np.float32)) net = core.Net("negate_grad_test") Y = net.NegateGradient(["X"], ["Y"]) grad_map = net.AddGradientOperators([Y]) workspace.RunNetOnce(net) # check X_grad == negate of Y_grad x_val, y_val = workspace.FetchBlobs(['X', 'Y']) x_grad_val, y_grad_val = workspace.FetchBlobs( [grad_map['X'], grad_map['Y']]) np.testing.assert_array_equal(x_val, y_val) np.testing.assert_array_equal(x_grad_val, y_grad_val * (-1))
def test_transpose_network(self, batch_size, channels, height, width, seed, kernel): net = core.Net("net") net.Conv(["X", "w1", "b1"], ["c1"], stride=1, pad=0, kernel=kernel) net.Conv(["X", "w2", "b2"], ["c2"], stride=1, pad=0, kernel=kernel) # c1 and c2: batch_size, 2*channels, height - kernel + 1, width - kernel + 1 net.Conv(["c1", "w3", "b3"], ["c3"], stride=1, pad=0, kernel=kernel) net.Conv(["c1", "w4", "b4"], ["c4"], stride=1, pad=0, kernel=kernel) # c3 and c4: batch_size, 2*channels, height - 2*kernel + 2, width - 2*kernel + 2 net.Flatten(["c3"], "c3f") net.Flatten(["c4"], "c4f") net.Flatten(["X"], "Xf") net.Concat(["c3f", "c4f", "Xf"], ["out", "split_info"], axis=1, add_axis=0) np.random.seed(seed) workspace.ResetWorkspace() tu.randBlobFloat32("X", batch_size, channels, height, width) tu.randBlobsFloat32(["w1", "w2"], 2 * channels, channels, kernel, kernel) tu.randBlobsFloat32(["b1", "b2"], 2 * channels) tu.randBlobsFloat32(["w3", "w4"], 4 * channels, 2 * channels, kernel, kernel) tu.randBlobsFloat32(["b3", "b4"], 4 * channels) all_inp_names = ["X", "w1", "w2", "b1", "b2", "w3", "w4", "b3", "b4"] all_input = workspace.FetchBlobs(all_inp_names) workspace.RunNetOnce(net) preTransformC1 = workspace.FetchBlob("c1") preTransformC3 = workspace.FetchBlob("c3") preTransformOut = workspace.FetchBlob("out") nn = ng.NNModule(net) preTransformNumOperators = len(nn.operators) preTransformNumTensors = len(nn.tensors) transpose_network(nn) new_netdef = nn.convertToCaffe2Proto() postTransformNumOperators = len(nn.operators) postTransformNumTensors = len(nn.tensors) # The minimal number of additional operators and tensors is at least one # NCHW2NHWC operator and tensor for each channel-based input tensor # and a NHWC2NCHW operator and tensor for the output of each convolution # X, w1, w2, w3, w4 are channel-based inputs # c1, c2, c3, c4 are the outputs of convolutions # i.e. a total of 9. self.assertEqual(postTransformNumOperators, preTransformNumOperators + 9, "expected 9 additional operators") self.assertEqual(postTransformNumTensors, preTransformNumTensors + 9, "expected 9 additional tensors") workspace.ResetWorkspace() for name, val in zip(all_inp_names, all_input): workspace.FeedBlob(name, val) workspace.RunNetOnce(new_netdef) postTransformC1 = workspace.FetchBlob("c1") postTransformC3 = workspace.FetchBlob("c3") postTransformOut = workspace.FetchBlob("out") np.testing.assert_almost_equal(postTransformC1, preTransformC1, 1) np.testing.assert_almost_equal(postTransformC3, preTransformC3, 1) np.testing.assert_almost_equal(postTransformOut, preTransformOut, 1)
def im_proposals(model, im): """Generate RPN proposals on a single image.""" inputs = {} inputs['data'], inputs['im_info'] = _get_image_blob(im) scale = inputs['im_info'][0, 2] for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False)) workspace.RunNet(model.net.Proto().name) if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: k_max = cfg.FPN.RPN_MAX_LEVEL k_min = cfg.FPN.RPN_MIN_LEVEL rois_names = [ core.ScopedName('rpn_rois_fpn' + str(l)) for l in range(k_min, k_max + 1) ] score_names = [ core.ScopedName('rpn_roi_probs_fpn' + str(l)) for l in range(k_min, k_max + 1) ] blobs = workspace.FetchBlobs(rois_names + score_names) # Combine predictions across all levels and retain the top scoring boxes = np.concatenate(blobs[:len(rois_names)]) scores = np.concatenate(blobs[len(rois_names):]).squeeze() # Discussion: one could do NMS again after combining predictions from # the different FPN levels. Conceptually, it's probably the right thing # to do. For arbitrary reasons, the original FPN RPN implementation did # not do another round of NMS. inds = np.argsort(-scores)[:cfg.TEST.RPN_POST_NMS_TOP_N] scores = scores[inds] boxes = boxes[inds, :] else: boxes, scores = workspace.FetchBlobs( [core.ScopedName('rpn_rois'), core.ScopedName('rpn_roi_probs')]) scores = scores.squeeze() # Column 0 is the batch index in the (batch ind, x1, y1, x2, y2) encoding, # so we remove it since we just want to return boxes # Scale proposals back to the original input image scale boxes = boxes[:, 1:] / scale return boxes, scores
def _run_general_op_cpu(self, op_name, blobs_in, values_in, blobs_out, **kwargs): with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU, 0)): op = core.CreateOperator(op_name, blobs_in, blobs_out, **kwargs) for name, value in zip(blobs_in, values_in): workspace.FeedBlob(name, value) workspace.RunOperatorOnce(op) values_out = workspace.FetchBlobs(blobs_out) workspace.ResetWorkspace() return values_out
def im_detect_bbox(model, im, timers=None): """Generate RetinaNet detections on a single image.""" if timers is None: timers = defaultdict(Timer) # Although anchors are input independent and could be precomputed, # recomputing them per image only brings a small overhead anchors = _create_cell_anchors() timers['im_detect_bbox'].tic() k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS) inputs = {} inputs['data'], inputs['im_info'] = _get_image_blob(im) cls_probs, box_preds = [], [] for lvl in range(k_min, k_max + 1): suffix = 'fpn{}'.format(lvl) cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix))) box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix))) for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False)) workspace.RunNet(model.net.Proto().name) scale = inputs['im_info'][0, 2] cls_probs = workspace.FetchBlobs(cls_probs) box_preds = workspace.FetchBlobs(box_preds) # here the boxes_all are [x0, y0, x1, y1, score] boxes_all = defaultdict(list) cnt = 0 for lvl in range(k_min, k_max + 1): # create cell anchors array stride = 2.**lvl cell_anchors = anchors[lvl] # fetch per level probability cls_prob = cls_probs[cnt] box_pred = box_preds[cnt] cls_prob = cls_prob.reshape( (cls_prob.shape[0], A, int(cls_prob.shape[1] / A), cls_prob.shape[2], cls_prob.shape[3])) box_pred = box_pred.reshape( (box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3])) cnt += 1 if cfg.RETINANET.SOFTMAX: cls_prob = cls_prob[:, :, 1::, :, :] cls_prob_ravel = cls_prob.ravel() # In some cases [especially for very small img sizes], it's possible that # candidate_ind is empty if we impose threshold 0.05 at all levels. This # will lead to errors since no detections are found for this image. Hence, # for lvl 7 which has small spatial resolution, we take the threshold 0.0 th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0 candidate_inds = np.where(cls_prob_ravel > th)[0] if (len(candidate_inds) == 0): continue pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds)) inds = np.argpartition(cls_prob_ravel[candidate_inds], -pre_nms_topn)[-pre_nms_topn:] inds = candidate_inds[inds] inds_5d = np.array(np.unravel_index(inds, cls_prob.shape)).transpose() classes = inds_5d[:, 2] anchor_ids, y, x = inds_5d[:, 1], inds_5d[:, 3], inds_5d[:, 4] scores = cls_prob[:, anchor_ids, classes, y, x] boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32) boxes *= stride boxes += cell_anchors[anchor_ids, :] if not cfg.RETINANET.CLASS_SPECIFIC_BBOX: box_deltas = box_pred[0, anchor_ids, :, y, x] else: box_cls_inds = classes * 4 box_deltas = np.vstack([ box_pred[0, ind:ind + 4, yi, xi] for ind, yi, xi in zip(box_cls_inds, y, x) ]) pred_boxes = (box_utils.bbox_transform(boxes, box_deltas) if cfg.TEST.BBOX_REG else boxes) pred_boxes /= scale pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape) box_scores = np.zeros((pred_boxes.shape[0], 5)) box_scores[:, 0:4] = pred_boxes box_scores[:, 4] = scores for cls in range(1, cfg.MODEL.NUM_CLASSES): inds = np.where(classes == cls - 1)[0] if len(inds) > 0: boxes_all[cls].extend(box_scores[inds, :]) timers['im_detect_bbox'].toc() # Combine predictions across all levels and retain the top scoring by class timers['misc_bbox'].tic() detections = [] for cls, boxes in boxes_all.items(): cls_dets = np.vstack(boxes).astype(dtype=np.float32) # do class specific nms here keep = box_utils.nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] out = np.zeros((len(keep), 6)) out[:, 0:5] = cls_dets out[:, 5].fill(cls) detections.append(out) # detections (N, 6) format: # detections[:, :4] - boxes # detections[:, 4] - scores # detections[:, 5] - classes detections = np.vstack(detections) # sort all again inds = np.argsort(-detections[:, 4]) detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :] # Convert the detections to image cls_ format (see core/test_engine.py) num_classes = cfg.MODEL.NUM_CLASSES cls_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)] for c in range(1, num_classes): inds = np.where(detections[:, 5] == c)[0] cls_boxes[c] = detections[inds, :5] timers['misc_bbox'].toc() return cls_boxes
def make_nps(xs): assert isinstance(xs, list), 'ERROR: should pass list of names of the blobs' return workspace.FetchBlobs(xs)
def test_record_queue(self): num_prod = 8 num_consume = 3 schema = Struct( ('floats', Map(Scalar(np.int32), Scalar(np.float32))), ) contents_raw = [ [1, 2, 3], # len [11, 21, 22, 31, 32, 33], # key [1.1, 2.1, 2.2, 3.1, 3.2, 3.3], # value ] contents = from_blob_list(schema, contents_raw) ds = Dataset(schema) net = core.Net('init') ds.init_empty(net) content_blobs = NewRecord(net, contents) FeedRecord(content_blobs, contents) writer = ds.writer(init_net=net) writer.write_record(net, content_blobs) reader = ds.reader(init_net=net) # prepare receiving dataset rec_dataset = Dataset(contents, name='rec') rec_dataset.init_empty(init_net=net) rec_dataset_writer = rec_dataset.writer(init_net=net) workspace.RunNetOnce(net) queue = RecordQueue(contents, num_threads=num_prod) def process(net, fields): new_fields = [] for f in fields.field_blobs(): new_f = net.Copy(f) new_fields.append(new_f) new_fields = from_blob_list(fields, new_fields) return new_fields q_reader, q_step, q_exit, fields = queue.build(reader, process) producer_step = core.execution_step('producer', [q_step, q_exit]) consumer_steps = [] for i in range(num_consume): name = 'queue_reader_' + str(i) net_consume = core.Net(name) should_stop, fields = q_reader.read_record(net_consume) step_consume = core.execution_step(name, net_consume) name = 'dataset_writer_' + str(i) net_dataset = core.Net(name) rec_dataset_writer.write(net_dataset, fields.field_blobs()) step_dataset = core.execution_step(name, net_dataset) step = core.execution_step('consumer_' + str(i), [step_consume, step_dataset], should_stop_blob=should_stop) consumer_steps.append(step) consumer_step = core.execution_step('consumers', consumer_steps, concurrent_substeps=True) work_steps = core.execution_step('work', [producer_step, consumer_step], concurrent_substeps=True) plan = core.Plan('test') plan.AddStep(work_steps) core.workspace.RunPlan(plan) data = workspace.FetchBlobs(rec_dataset.get_blobs()) self.assertEqual(6, sum(data[0])) self.assertEqual(150, sum(data[1])) self.assertAlmostEqual(15, sum(data[2]), places=5)
def im_detect_bbox(model, im, timers=None, model1=None): """Generate RetinaNet detections on a single image.""" if timers is None: timers = defaultdict(Timer) if model1 is None and os.environ.get('COSIM'): print("cosim must has model1") fp32_ws_name = "__fp32_ws__" int8_ws_name = "__int8_ws__" # Although anchors are input independent and could be precomputed, # recomputing them per image only brings a small overhead anchors = _create_cell_anchors() timers['im_detect_bbox'].tic() timers['data1'].tic() k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS) inputs = {} inputs['data'], im_scale, inputs['im_info'] = \ blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, cfg.TEST.SIZEFIX) cls_probs, box_preds = [], [] for lvl in range(k_min, k_max + 1): suffix = 'fpn{}'.format(lvl) cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix))) box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix))) for k, v in inputs.items(): if os.environ.get('COSIM'): workspace.SwitchWorkspace(int8_ws_name, True) workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False)) if os.environ.get('COSIM'): workspace.SwitchWorkspace(fp32_ws_name, True) workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False)) timers['data1'].toc() if os.environ.get('EPOCH2OLD') == "1": workspace.RunNet(model.net.Proto().name) timers['run'].tic() if os.environ.get('INT8INFO') == "1": algorithm = AbsmaxCalib() kind = os.environ.get('INT8CALIB') if kind == "moving_average": ema_alpha = 0.5 algorithm = EMACalib(ema_alpha) elif kind == "kl_divergence": kl_iter_num_for_range = os.environ.get('INT8KLNUM') if not kl_iter_num_for_range: kl_iter_num_for_range = 100 kl_iter_num_for_range = int(kl_iter_num_for_range) algorithm = KLCalib(kl_iter_num_for_range) calib = Calibrator(algorithm) calib.RunCalibIter(workspace, model.net.Proto()) else: if os.environ.get('COSIM'): with open("int8.txt", "wb") as p: p.write(str(model.net.Proto())) with open("fp32.txt", "wb") as p: p.write(str(model1.net.Proto())) for i in range(len(model.net.Proto().op)): workspace.SwitchWorkspace(int8_ws_name) int8_inputs = [] for inp in model.net.Proto().op[i].input: int8_inputs.append(workspace.FetchBlob(str(inp))) logging.warning(" opint8[{0}] is {1}".format( i, model.net.Proto().op[i])) workspace.RunOperatorOnce(model.net.Proto().op[i]) int8_results = [] for res in model.net.Proto().op[i].output: int8_results.append(workspace.FetchBlob(str(res))) workspace.SwitchWorkspace(fp32_ws_name) fp32_inputs = [] for inp1 in model1.net.Proto().op[i].input: fp32_inputs.append(workspace.FetchBlob(str(inp1))) logging.warning(" opfp32[{0}] is {1}".format( i, model1.net.Proto().op[i])) workspace.RunOperatorOnce(model1.net.Proto().op[i]) fp32_results = [] for res1 in model1.net.Proto().op[i].output: fp32_results.append(workspace.FetchBlob(str(res1))) if len(int8_inputs) != len(fp32_inputs): logging.error("Wrong number of inputs") return if len(int8_results) != len(fp32_results): logging.error("Wrong number of outputs") return logging.warning("begin to check op[{}] {} input".format( i, model.net.Proto().op[i].type)) for k in range(len(int8_inputs)): if model.net.Proto().op[i].input[k][0] == '_': continue #assert_allclose(int8_inputs[k], fp32_inputs[k], **tol) logging.warning("pass checking op[{0}] {1} input".format( i, model.net.Proto().op[i].type)) logging.warning("begin to check op[{0}] {1} output".format( i, model.net.Proto().op[i].type)) for j, int8_result in enumerate(int8_results): if model.net.Proto().op[i].output[j][0] == '_': continue #logging.warning("int8_outputis {} and fp32 output is {} ".format(int8_results[j], fp32_results[j])) #if not compare_utils.assert_allclose(int8_results[j], fp32_results[j], **tol): if not compare_utils.assert_compare( int8_result, fp32_results[j], 1e-01, os.environ.get('COSIM')): for k, int8_input in enumerate(int8_inputs): logging.warning("int8_input[{}] is {}".format( k, int8_input)) logging.warning("fp32_input[{}] is {}".format( k, fp32_inputs[k])) logging.warning("pass checking op[{0}] {1} output".format( i, model.net.Proto().op[i].type)) else: workspace.RunNet(model.net.Proto().name) timers['run'].toc() cls_probs = workspace.FetchBlobs(cls_probs) box_preds = workspace.FetchBlobs(box_preds) # here the boxes_all are [x0, y0, x1, y1, score] boxes_all = defaultdict(list) batch_size = cls_probs[0].shape[0] boxes_all_list = [boxes_all] * batch_size cnt = 0 for lvl in range(k_min, k_max + 1): # create cell anchors array stride = 2.**lvl cell_anchors = anchors[lvl] # fetch per level probability cls_prob = cls_probs[cnt] box_pred = box_preds[cnt] cls_prob = cls_prob.reshape( (cls_prob.shape[0], A, int(cls_prob.shape[1] / A), cls_prob.shape[2], cls_prob.shape[3])) box_pred = box_pred.reshape( (box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3])) cnt += 1 if cfg.RETINANET.SOFTMAX: cls_prob = cls_prob[:, :, 1::, :, :] for i in range(batch_size): cls_prob_ravel = cls_prob[i, :].ravel() # In some cases [especially for very small img sizes], it's possible that # candidate_ind is empty if we impose threshold 0.05 at all levels. This # will lead to errors since no detections are found for this image. Hence, # for lvl 7 which has small spatial resolution, we take the threshold 0.0 th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0 candidate_inds = np.where(cls_prob_ravel > th)[0] if (len(candidate_inds) == 0): continue pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds)) inds = np.argpartition(cls_prob_ravel[candidate_inds], -pre_nms_topn)[-pre_nms_topn:] inds = candidate_inds[inds] inds_4d = np.array(np.unravel_index( inds, (cls_prob[i, :]).shape)).transpose() classes = inds_4d[:, 1] anchor_ids, y, x = inds_4d[:, 0], inds_4d[:, 2], inds_4d[:, 3] scores = cls_prob[i, anchor_ids, classes, y, x] boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32) boxes *= stride boxes += cell_anchors[anchor_ids, :] if not cfg.RETINANET.CLASS_SPECIFIC_BBOX: box_deltas = box_pred[i, anchor_ids, :, y, x] else: box_cls_inds = classes * 4 box_deltas = np.vstack([ box_pred[i, ind:ind + 4, yi, xi] for ind, yi, xi in zip(box_cls_inds, y, x) ]) pred_boxes = (box_utils.bbox_transform(boxes, box_deltas) if cfg.TEST.BBOX_REG else boxes) pred_boxes /= im_scale pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im[0].shape) box_scores = np.zeros((pred_boxes.shape[0], 5)) box_scores[:, 0:4] = pred_boxes box_scores[:, 4] = scores for cls in range(1, cfg.MODEL.NUM_CLASSES): inds = np.where(classes == cls - 1)[0] if len(inds) > 0: boxes_all_list[i][cls].extend(box_scores[inds, :]) timers['im_detect_bbox'].toc() cls_boxes_list = [] for i in range(batch_size): boxes_all = boxes_all_list[i] # Combine predictions across all levels and retain the top scoring by class timers['misc_bbox'].tic() detections = [] for cls, boxes in boxes_all.items(): cls_dets = np.vstack(boxes).astype(dtype=np.float32) # do class specific nms here keep = box_utils.nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] out = np.zeros((len(keep), 6)) out[:, 0:5] = cls_dets out[:, 5].fill(cls) detections.append(out) # detections (N, 6) format: # detections[:, :4] - boxes # detections[:, 4] - scores # detections[:, 5] - classes detections = np.vstack(detections) # sort all again inds = np.argsort(-detections[:, 4]) detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :] # Convert the detections to image cls_ format (see core/test_engine.py) num_classes = cfg.MODEL.NUM_CLASSES cls_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)] for c in range(1, num_classes): inds = np.where(detections[:, 5] == c)[0] cls_boxes[c] = detections[inds, :5] cls_boxes_list.append(cls_boxes) timers['misc_bbox'].toc() return cls_boxes_list