def __init__(self, onnx_file, class_names, device_id): super(ONNXRuntimeDetector, self).__init__(class_names, device_id) import onnxruntime as ort # get the custom op path ort_custom_op_path = '' try: from mmcv.ops import get_onnxruntime_op_path ort_custom_op_path = get_onnxruntime_op_path() except (ImportError, ModuleNotFoundError): warnings.warn('If input model has custom op from mmcv, \ you may have to build mmcv with ONNXRuntime from source.') session_options = ort.SessionOptions() # register custom op for onnxruntime if osp.exists(ort_custom_op_path): session_options.register_custom_ops_library(ort_custom_op_path) sess = ort.InferenceSession(onnx_file, session_options) providers = ['CPUExecutionProvider'] options = [{}] is_cuda_available = ort.get_device() == 'GPU' if is_cuda_available: providers.insert(0, 'CUDAExecutionProvider') options.insert(0, {'device_id': device_id}) sess.set_providers(providers, options) self.sess = sess self.io_binding = sess.io_binding() self.output_names = [_.name for _ in sess.get_outputs()] self.is_cuda_available = is_cuda_available
def test_grid_sample(mode, padding_mode, align_corners): from mmcv.onnx.symbolic import register_extra_symbolics opset_version = 11 register_extra_symbolics(opset_version) from mmcv.ops import get_onnxruntime_op_path ort_custom_op_path = get_onnxruntime_op_path() if not os.path.exists(ort_custom_op_path): pytest.skip('custom ops for onnxruntime are not compiled.') input = torch.rand(1, 1, 10, 10) grid = torch.Tensor([[[1, 0, 0], [0, 1, 0]]]) grid = nn.functional.affine_grid(grid, (1, 1, 15, 15)).type_as(input) def func(input, grid): return nn.functional.grid_sample(input, grid, mode=mode, padding_mode=padding_mode, align_corners=align_corners) wrapped_model = WrapFunction(func).eval() input_names = ['input', 'grid'] output_names = ['output'] with torch.no_grad(): torch.onnx.export(wrapped_model, (input, grid), onnx_file, export_params=True, keep_initializers_as_inputs=True, input_names=input_names, output_names=output_names, opset_version=11) onnx_model = onnx.load(onnx_file) session_options = rt.SessionOptions() session_options.register_custom_ops_library(ort_custom_op_path) # get onnx output input_all = [node.name for node in onnx_model.graph.input] input_initializer = [node.name for node in onnx_model.graph.initializer] net_feed_input = list(set(input_all) - set(input_initializer)) assert (len(net_feed_input) == 2) sess = rt.InferenceSession(onnx_file, session_options) ort_result = sess.run(None, { 'input': input.detach().numpy(), 'grid': grid.detach().numpy() }) pytorch_results = wrapped_model(input.clone(), grid.clone()) os.remove(onnx_file) assert np.allclose(pytorch_results, ort_result, atol=1e-3)
def symbolic(g, input, rois, output_size, spatial_scale, sampling_ratio, pool_mode, aligned): has_custom_op = False try: import os.path as osp from mmcv.ops import get_onnxruntime_op_path ort_op_path = get_onnxruntime_op_path() has_custom_op = osp.exists(ort_op_path) except ImportError: pass if has_custom_op: return g.op( 'mmcv::MMCVRoiAlign', input, rois, aligned_height_i=output_size[0], aligned_width_i=output_size[1], spatial_scale_f=spatial_scale, sampling_ratio_i=max(0, sampling_ratio), pool_mode_s=pool_mode, aligned_i=aligned) from torch.onnx.symbolic_opset9 import sub, squeeze from torch.onnx.symbolic_helper import _slice_helper from torch.onnx import TensorProtoDataType # batch_indices = rois[:, 0].long() batch_indices = _slice_helper(g, rois, axes=[1], starts=[0], ends=[1]) batch_indices = squeeze(g, batch_indices, 1) batch_indices = g.op( 'Cast', batch_indices, to_i=TensorProtoDataType.INT64) # rois = rois[:, 1:] rois = _slice_helper(g, rois, axes=[1], starts=[1], ends=[5]) if aligned: # rois -= 0.5/spatial_scale aligned_offset = g.op( 'Constant', value_t=torch.tensor([0.5 / spatial_scale], dtype=torch.float32)) rois = sub(g, rois, aligned_offset) # roi align return g.op( 'RoiAlign', input, rois, batch_indices, output_height_i=output_size[0], output_width_i=output_size[1], spatial_scale_f=spatial_scale, sampling_ratio_i=max(0, sampling_ratio), mode_s=pool_mode)
def test_nms(): if torch.__version__ == 'parrots': pytest.skip('onnx is not supported in parrots directly') from mmcv.ops import get_onnxruntime_op_path, nms np_boxes = np.array([[6.0, 3.0, 8.0, 7.0], [3.0, 6.0, 9.0, 11.0], [3.0, 7.0, 10.0, 12.0], [1.0, 4.0, 13.0, 7.0]], dtype=np.float32) np_scores = np.array([0.6, 0.9, 0.7, 0.2], dtype=np.float32) boxes = torch.from_numpy(np_boxes) scores = torch.from_numpy(np_scores) nms = partial(nms, iou_threshold=0.3, offset=0, score_threshold=0, max_num=0) pytorch_dets, _ = nms(boxes, scores) pytorch_score = pytorch_dets[:, 4] wrapped_model = WrapFunction(nms) wrapped_model.cpu().eval() with torch.no_grad(): torch.onnx.export(wrapped_model, (boxes, scores), onnx_file, export_params=True, keep_initializers_as_inputs=True, input_names=['boxes', 'scores'], opset_version=11) onnx_model = onnx.load(onnx_file) ort_custom_op_path = get_onnxruntime_op_path() session_options = rt.SessionOptions() if os.path.exists(ort_custom_op_path): session_options.register_custom_ops_library(ort_custom_op_path) # get onnx output input_all = [node.name for node in onnx_model.graph.input] input_initializer = [node.name for node in onnx_model.graph.initializer] net_feed_input = list(set(input_all) - set(input_initializer)) assert (len(net_feed_input) == 2) sess = rt.InferenceSession(onnx_file, session_options) onnx_dets, _ = sess.run(None, { 'scores': scores.detach().numpy(), 'boxes': boxes.detach().numpy() }) onnx_score = onnx_dets[:, 4] os.remove(onnx_file) assert np.allclose(pytorch_score, onnx_score, atol=1e-3)
def test_corner_pool(mode, opset=11): if torch.__version__ == 'parrots': pytest.skip('onnx is not supported in parrots directly') from mmcv.ops import get_onnxruntime_op_path ort_custom_op_path = get_onnxruntime_op_path() if not os.path.exists(ort_custom_op_path): pytest.skip('custom ops for onnxruntime are not compiled.') from mmcv.ops.corner_pool import CornerPool def corner_pool_func(input): corner_pool_module = CornerPool(mode) return corner_pool_module.corner_pool.apply(input) wrapped_model = WrapFunction(corner_pool_func).eval() input = torch.rand((2, 3, 9, 12)) # (n,c,h,w) with torch.no_grad(): torch.onnx.export(wrapped_model, input, onnx_file, export_params=True, keep_initializers_as_inputs=True, input_names=['input'], output_names=['output'], opset_version=opset) onnx_model = onnx.load(onnx_file) input_all = [node.name for node in onnx_model.graph.input] input_initializer = [node.name for node in onnx_model.graph.initializer] net_feed_input = list(set(input_all) - set(input_initializer)) assert (len(net_feed_input) == 1) session_options = rt.SessionOptions() session_options.register_custom_ops_library(ort_custom_op_path) sess = rt.InferenceSession(onnx_file, session_options) ort_result = sess.run(None, {'input': input.detach().numpy()}) pytorch_results = wrapped_model(input.clone()) os.remove(onnx_file) assert np.allclose(pytorch_results, ort_result, atol=1e-5)
def __init__(self, onnx_file: str, cfg: Any, device_id: int, show_score: bool = False): SingleStageTextDetector.__init__(self, cfg.model.backbone, cfg.model.neck, cfg.model.bbox_head, cfg.model.train_cfg, cfg.model.test_cfg, cfg.model.pretrained) TextDetectorMixin.__init__(self, show_score) import onnxruntime as ort # get the custom op path ort_custom_op_path = '' try: from mmcv.ops import get_onnxruntime_op_path ort_custom_op_path = get_onnxruntime_op_path() except (ImportError, ModuleNotFoundError): warnings.warn('If input model has custom op from mmcv, \ you may have to build mmcv with ONNXRuntime from source.') session_options = ort.SessionOptions() # register custom op for onnxruntime if osp.exists(ort_custom_op_path): session_options.register_custom_ops_library(ort_custom_op_path) sess = ort.InferenceSession(onnx_file, session_options) providers = ['CPUExecutionProvider'] options = [{}] is_cuda_available = ort.get_device() == 'GPU' if is_cuda_available: providers.insert(0, 'CUDAExecutionProvider') options.insert(0, {'device_id': device_id}) sess.set_providers(providers, options) self.sess = sess self.device_id = device_id self.io_binding = sess.io_binding() self.output_names = [_.name for _ in sess.get_outputs()] for name in self.output_names: self.io_binding.bind_output(name) self.cfg = cfg
def test_grid_sample(mode, padding_mode, align_corners): from mmcv.onnx.symbolic import register_extra_symbolics opset_version = 11 register_extra_symbolics(opset_version) from mmcv.ops import get_onnxruntime_op_path ort_custom_op_path = get_onnxruntime_op_path() if not os.path.exists(ort_custom_op_path): pytest.skip('custom ops for onnxruntime are not compiled.') input = torch.rand(1, 1, 10, 10) grid = torch.Tensor([[[1, 0, 0], [0, 1, 0]]]) grid = nn.functional.affine_grid(grid, (1, 1, 15, 15)).type_as(input) def func(input, grid): return nn.functional.grid_sample(input, grid, mode=mode, padding_mode=padding_mode, align_corners=align_corners) return process_grid_sample(func, input, grid, ort_custom_op_path)
def forward( model: onnx.ModelProto, inputs: Dict[str, np.ndarray] = None, input_shapes: Optional[TensorShapes] = None) -> Dict[str, np.ndarray]: """Run forward on a model. Args: model (onnx.ModelProto): Input ONNX model. inputs (Dict[str, np.ndarray], optional): Inputs of the model. input_shapes (TensorShapes, optional): Input shapes of the model. Returns: Dict[str, np.ndarray]: Outputs of the model. """ if input_shapes is None: input_shapes = {} sess_options = rt.SessionOptions() # load custom lib for onnxruntime in mmcv ort_custom_op_path = '' try: from mmcv.ops import get_onnxruntime_op_path ort_custom_op_path = get_onnxruntime_op_path() except ImportError: pass if os.path.exists(ort_custom_op_path): sess_options.register_custom_ops_library(ort_custom_op_path) sess_options.graph_optimization_level = rt.GraphOptimizationLevel(0) sess_options.log_severity_level = 3 sess = rt.InferenceSession(model.SerializeToString(), sess_options=sess_options, providers=['CPUExecutionProvider']) if inputs is None: inputs = generate_rand_input(model, input_shapes=input_shapes) outputs = [x.name for x in sess.get_outputs()] run_options = rt.RunOptions() run_options.log_severity_level = 3 res = OrderedDict( zip(outputs, sess.run(outputs, inputs, run_options=run_options))) return res
def __init__(self, onnx_file, cfg, device_id): super(ONNXRuntimeEditing, self).__init__() ort_custom_op_path = '' try: from mmcv.ops import get_onnxruntime_op_path ort_custom_op_path = get_onnxruntime_op_path() except (ImportError, ModuleNotFoundError): warnings.warn('If input model has custom op from mmcv, \ you may have to build mmcv with ONNXRuntime from source.') session_options = ort.SessionOptions() # register custom op for onnxruntime if osp.exists(ort_custom_op_path): session_options.register_custom_ops_library(ort_custom_op_path) sess = ort.InferenceSession(onnx_file, session_options) providers = ['CPUExecutionProvider'] options = [{}] is_cuda_available = ort.get_device() == 'GPU' if is_cuda_available: providers.insert(0, 'CUDAExecutionProvider') options.insert(0, {'device_id': device_id}) sess.set_providers(providers, options) self.sess = sess self.device_id = device_id self.io_binding = sess.io_binding() self.output_names = [_.name for _ in sess.get_outputs()] base_model = build_model(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) if isinstance(base_model, BaseMattor): WraperClass = ONNXRuntimeMattor elif isinstance(base_model, BasicRestorer): WraperClass = ONNXRuntimeRestorer self.wraper = WraperClass(self.sess, self.io_binding, self.output_names, base_model)
def onnx2tensorrt(onnx_file: str, trt_file: str, config: dict, input_config: dict, fp16: bool = False, verify: bool = False, show: bool = False, dataset: str = 'CityscapesDataset', workspace_size: int = 1, verbose: bool = False): import tensorrt as trt min_shape = input_config['min_shape'] max_shape = input_config['max_shape'] # create trt engine and wrapper opt_shape_dict = {'input': [min_shape, min_shape, max_shape]} max_workspace_size = get_GiB(workspace_size) trt_engine = onnx2trt( onnx_file, opt_shape_dict, log_level=trt.Logger.VERBOSE if verbose else trt.Logger.ERROR, fp16_mode=fp16, max_workspace_size=max_workspace_size) save_dir, _ = osp.split(trt_file) if save_dir: os.makedirs(save_dir, exist_ok=True) save_trt_engine(trt_engine, trt_file) print(f'Successfully created TensorRT engine: {trt_file}') if verify: inputs = _prepare_input_img(input_config['input_path'], config.data.test.pipeline, shape=min_shape[2:]) imgs = inputs['imgs'] img_metas = inputs['img_metas'] img_list = [img[None, :] for img in imgs] img_meta_list = [[img_meta] for img_meta in img_metas] # update img_meta img_list, img_meta_list = _update_input_img(img_list, img_meta_list) if max_shape[0] > 1: # concate flip image for batch test flip_img_list = [_.flip(-1) for _ in img_list] img_list = [ torch.cat((ori_img, flip_img), 0) for ori_img, flip_img in zip(img_list, flip_img_list) ] # Get results from ONNXRuntime ort_custom_op_path = get_onnxruntime_op_path() session_options = ort.SessionOptions() if osp.exists(ort_custom_op_path): session_options.register_custom_ops_library(ort_custom_op_path) sess = ort.InferenceSession(onnx_file, session_options) sess.set_providers(['CPUExecutionProvider'], [{}]) # use cpu mode onnx_output = sess.run(['output'], {'input': img_list[0].detach().numpy()})[0][0] # Get results from TensorRT trt_model = TRTWraper(trt_file, ['input'], ['output']) with torch.no_grad(): trt_outputs = trt_model({'input': img_list[0].contiguous().cuda()}) trt_output = trt_outputs['output'][0].cpu().detach().numpy() if show: dataset = DATASETS.get(dataset) assert dataset is not None palette = dataset.PALETTE show_result_pyplot(input_config['input_path'], (onnx_output[0].astype(np.uint8), ), palette=palette, title='ONNXRuntime', block=False) show_result_pyplot(input_config['input_path'], (trt_output[0].astype(np.uint8), ), palette=palette, title='TensorRT') np.testing.assert_allclose(onnx_output, trt_output, rtol=1e-03, atol=1e-05) print('TensorRT and ONNXRuntime output all close.')
def is_in_onnx_export_without_custom_ops(): from mmcv.ops import get_onnxruntime_op_path ort_custom_op_path = get_onnxruntime_op_path() return torch.onnx.is_in_onnx_export( ) and not osp.exists(ort_custom_op_path)
def pytorch2onnx(model, input_img, input_shape, normalize_cfg, opset_version=11, show=False, output_file='tmp.onnx', verify=False, test_img=None, do_simplify=False, dynamic_export=None, skip_postprocess=False): input_config = { 'input_shape': input_shape, 'input_path': input_img, 'normalize_cfg': normalize_cfg } # prepare input one_img, one_meta = preprocess_example_input(input_config) img_list, img_meta_list = [one_img], [[one_meta]] if skip_postprocess: warnings.warn('Not all models support export onnx without post ' 'process, especially two stage detectors!') model.forward = model.forward_dummy torch.onnx.export(model, one_img, output_file, input_names=['input'], export_params=True, keep_initializers_as_inputs=True, do_constant_folding=True, verbose=show, opset_version=opset_version) print(f'Successfully exported ONNX model without ' f'post process: {output_file}') return # replace original forward function origin_forward = model.forward model.forward = partial(model.forward, img_metas=img_meta_list, return_loss=False, rescale=False) output_names = ['dets', 'labels'] if model.with_mask: output_names.append('masks') input_name = 'input' dynamic_axes = None if dynamic_export: dynamic_axes = { input_name: { 0: 'batch', 2: 'width', 3: 'height' }, 'dets': { 0: 'batch', 1: 'num_dets', }, 'labels': { 0: 'batch', 1: 'num_dets', }, } if model.with_mask: dynamic_axes['masks'] = {0: 'batch', 1: 'num_dets'} torch.onnx.export(model, img_list, output_file, input_names=[input_name], output_names=output_names, export_params=True, keep_initializers_as_inputs=True, do_constant_folding=True, verbose=show, opset_version=opset_version, dynamic_axes=dynamic_axes) model.forward = origin_forward # get the custom op path ort_custom_op_path = '' try: from mmcv.ops import get_onnxruntime_op_path ort_custom_op_path = get_onnxruntime_op_path() except (ImportError, ModuleNotFoundError): warnings.warn('If input model has custom op from mmcv, \ you may have to build mmcv with ONNXRuntime from source.') if do_simplify: import onnxsim from mmdet import digit_version min_required_version = '0.3.0' assert digit_version(onnxsim.__version__) >= digit_version( min_required_version ), f'Requires to install onnx-simplify>={min_required_version}' input_dic = {'input': img_list[0].detach().cpu().numpy()} onnxsim.simplify(output_file, input_data=input_dic, custom_lib=ort_custom_op_path) print(f'Successfully exported ONNX model: {output_file}') if verify: # check by onnx onnx_model = onnx.load(output_file) onnx.checker.check_model(onnx_model) # wrap onnx model onnx_model = ONNXRuntimeDetector(output_file, model.CLASSES, 0) if dynamic_export: # scale up to test dynamic shape h, w = [int((_ * 1.5) // 32 * 32) for _ in input_shape[2:]] h, w = min(1344, h), min(1344, w) input_config['input_shape'] = (1, 3, h, w) if test_img is None: input_config['input_path'] = input_img # prepare input once again one_img, one_meta = preprocess_example_input(input_config) img_list, img_meta_list = [one_img], [[one_meta]] # get pytorch output with torch.no_grad(): pytorch_results = model(img_list, img_metas=img_meta_list, return_loss=False, rescale=True)[0] img_list = [_.cuda().contiguous() for _ in img_list] if dynamic_export: img_list = img_list + [_.flip(-1).contiguous() for _ in img_list] img_meta_list = img_meta_list * 2 # get onnx output onnx_results = onnx_model(img_list, img_metas=img_meta_list, return_loss=False)[0] # visualize predictions score_thr = 0.3 if show: out_file_ort, out_file_pt = None, None else: out_file_ort, out_file_pt = 'show-ort.png', 'show-pt.png' show_img = one_meta['show_img'] model.show_result(show_img, pytorch_results, score_thr=score_thr, show=True, win_name='PyTorch', out_file=out_file_pt) onnx_model.show_result(show_img, onnx_results, score_thr=score_thr, show=True, win_name='ONNXRuntime', out_file=out_file_ort) # compare a part of result if model.with_mask: compare_pairs = list(zip(onnx_results, pytorch_results)) else: compare_pairs = [(onnx_results, pytorch_results)] err_msg = 'The numerical values are different between Pytorch' + \ ' and ONNX, but it does not necessarily mean the' + \ ' exported ONNX model is problematic.' # check the numerical value for onnx_res, pytorch_res in compare_pairs: for o_res, p_res in zip(onnx_res, pytorch_res): np.testing.assert_allclose(o_res, p_res, rtol=1e-03, atol=1e-05, err_msg=err_msg) print('The numerical values are the same between Pytorch and ONNX')
def pytorch2onnx(config_path, checkpoint_path, input_img, input_shape, opset_version=11, show=False, output_file='tmp.onnx', verify=False, normalize_cfg=None, dataset='coco', test_img=None, do_simplify=False, cfg_options=None, dynamic_export=None): input_config = { 'input_shape': input_shape, 'input_path': input_img, 'normalize_cfg': normalize_cfg } # prepare original model and meta for verifying the onnx model orig_model = build_model_from_cfg(config_path, checkpoint_path, cfg_options=cfg_options) one_img, one_meta = preprocess_example_input(input_config) model, tensor_data = generate_inputs_and_wrap_model( config_path, checkpoint_path, input_config, cfg_options=cfg_options) output_names = ['dets', 'labels'] if model.with_mask: output_names.append('masks') dynamic_axes = None if dynamic_export: dynamic_axes = { 'input': { 0: 'batch', 2: 'width', 3: 'height' }, 'dets': { 0: 'batch', 1: 'num_dets', }, 'labels': { 0: 'batch', 1: 'num_dets', }, } if model.with_mask: dynamic_axes['masks'] = {0: 'batch', 1: 'num_dets'} torch.onnx.export(model, tensor_data, output_file, input_names=['input'], output_names=output_names, export_params=True, keep_initializers_as_inputs=True, do_constant_folding=True, verbose=show, opset_version=opset_version, dynamic_axes=dynamic_axes) model.forward = orig_model.forward # get the custom op path ort_custom_op_path = '' try: from mmcv.ops import get_onnxruntime_op_path ort_custom_op_path = get_onnxruntime_op_path() except (ImportError, ModuleNotFoundError): warnings.warn('If input model has custom op from mmcv, \ you may have to build mmcv with ONNXRuntime from source.') if do_simplify: from mmdet import digit_version import onnxsim min_required_version = '0.3.0' assert digit_version(onnxsim.__version__) >= digit_version( min_required_version ), f'Requires to install onnx-simplify>={min_required_version}' input_dic = {'input': one_img.detach().cpu().numpy()} onnxsim.simplify(output_file, input_data=input_dic, custom_lib=ort_custom_op_path) print(f'Successfully exported ONNX model: {output_file}') if verify: from mmdet.core import get_classes, bbox2result from mmdet.apis import show_result_pyplot model.CLASSES = get_classes(dataset) num_classes = len(model.CLASSES) # check by onnx onnx_model = onnx.load(output_file) onnx.checker.check_model(onnx_model) if dynamic_export: # scale up to test dynamic shape h, w = [int((_ * 1.5) // 32 * 32) for _ in input_shape[2:]] input_config['input_shape'] = (1, 3, h, w) if test_img is not None: input_config['input_path'] = test_img one_img, one_meta = preprocess_example_input(input_config) tensor_data = [one_img] # get pytorch output pytorch_results = model(tensor_data, [[one_meta]], return_loss=False) pytorch_results = pytorch_results[0] # get onnx output input_all = [node.name for node in onnx_model.graph.input] input_initializer = [ node.name for node in onnx_model.graph.initializer ] net_feed_input = list(set(input_all) - set(input_initializer)) assert (len(net_feed_input) == 1) session_options = rt.SessionOptions() # register custom op for ONNX Runtime if osp.exists(ort_custom_op_path): session_options.register_custom_ops_library(ort_custom_op_path) feed_input_img = one_img.detach().numpy() if dynamic_export: # test batch with two input images feed_input_img = np.vstack([feed_input_img, feed_input_img]) sess = rt.InferenceSession(output_file, session_options) onnx_outputs = sess.run(None, {net_feed_input[0]: feed_input_img}) output_names = [_.name for _ in sess.get_outputs()] output_shapes = [_.shape for _ in onnx_outputs] print(f'ONNX Runtime output names: {output_names}, \ output shapes: {output_shapes}') # get last image's outputs onnx_outputs = [_[-1] for _ in onnx_outputs] ort_dets, ort_labels = onnx_outputs[:2] onnx_results = bbox2result(ort_dets, ort_labels, num_classes) if model.with_mask: segm_results = onnx_outputs[2] cls_segms = [[] for _ in range(num_classes)] for i in range(ort_dets.shape[0]): cls_segms[ort_labels[i]].append(segm_results[i]) onnx_results = (onnx_results, cls_segms) # visualize predictions if show: show_result_pyplot(model, one_meta['show_img'], pytorch_results, title='Pytorch') show_result_pyplot(model, one_meta['show_img'], onnx_results, title='ONNXRuntime') # compare a part of result if model.with_mask: compare_pairs = list(zip(onnx_results, pytorch_results)) else: compare_pairs = [(onnx_results, pytorch_results)] err_msg = 'The numerical values are different between Pytorch' + \ ' and ONNX, but it does not necessarily mean the' + \ ' exported ONNX model is problematic.' # check the numerical value for onnx_res, pytorch_res in compare_pairs: for o_res, p_res in zip(onnx_res, pytorch_res): np.testing.assert_allclose(o_res, p_res, rtol=1e-03, atol=1e-05, err_msg=err_msg) print('The numerical values are the same between Pytorch and ONNX')
import os import os.path as osp import warnings import numpy as np import onnx import onnxruntime as ort import torch import torch.nn as nn ort_custom_op_path = '' try: from mmcv.ops import get_onnxruntime_op_path ort_custom_op_path = get_onnxruntime_op_path() except (ImportError, ModuleNotFoundError): warnings.warn('If input model has custom op from mmcv, \ you may have to build mmcv with ONNXRuntime from source.') class WrapFunction(nn.Module): """Wrap the function to be tested for torch.onnx.export tracking.""" def __init__(self, wrapped_function): super(WrapFunction, self).__init__() self.wrapped_function = wrapped_function def forward(self, *args, **kwargs): return self.wrapped_function(*args, **kwargs) def ort_validate(model, feats, onnx_io='tmp.onnx'):
def onnx2tensorrt(onnx_file: str, trt_file: str, config: dict, input_config: dict, model_type: str, img_path: str, fp16: bool = False, verify: bool = False, show: bool = False, workspace_size: int = 1, verbose: bool = False): """Convert ONNX model to TensorRT model Args: onnx_file (str): the path of the input ONNX file. trt_file (str): the path to output the TensorRT file. config (dict): MMCV configuration. input_config (dict): contains min_shape, max_shape and \ input image path. fp16 (bool): whether to enable fp16 mode. verify (bool): whether to verify the ouputs of TensorRT \ and ONNX are same. show (bool): whether to show the outputs of TensorRT and ONNX. verbose (bool): whether to print the log when generating \ TensorRT model. """ import tensorrt as trt min_shape = input_config['min_shape'] max_shape = input_config['max_shape'] # create trt engine and wraper opt_shape_dict = {'input': [min_shape, min_shape, max_shape]} max_workspace_size = get_GiB(workspace_size) trt_engine = onnx2trt( onnx_file, opt_shape_dict, log_level=trt.Logger.VERBOSE if verbose else trt.Logger.ERROR, fp16_mode=fp16, max_workspace_size=max_workspace_size) save_dir, _ = osp.split(trt_file) if save_dir: os.makedirs(save_dir, exist_ok=True) save_trt_engine(trt_engine, trt_file) print(f'Successfully created TensorRT engine: {trt_file}') if verify: inputs = _prepare_input_img(model_type=model_type, img_path=img_path, config=config) imgs = inputs['imgs'] img_list = [imgs.unsqueeze(0)] if max_shape[0] > 1: # concate flip image for batch test flip_img_list = [_.flip(-1) for _ in img_list] img_list = [ torch.cat((ori_img, flip_img), 0) for ori_img, flip_img in zip(img_list, flip_img_list) ] # Get results from ONNXRuntime ort_custom_op_path = get_onnxruntime_op_path() session_options = ort.SessionOptions() if osp.exists(ort_custom_op_path): session_options.register_custom_ops_library(ort_custom_op_path) sess = ort.InferenceSession(onnx_file, session_options) sess.set_providers(['CPUExecutionProvider'], [{}]) # use cpu mode onnx_output = sess.run(['output'], {'input': img_list[0].detach().numpy()})[0][0] # Get results from TensorRT trt_model = TRTWraper(trt_file, ['input'], ['output']) with torch.no_grad(): trt_outputs = trt_model({'input': img_list[0].contiguous().cuda()}) trt_output = trt_outputs['output'][0].cpu().detach().numpy() if show: onnx_visualize = onnx_output.transpose(1, 2, 0) onnx_visualize = np.clip(onnx_visualize, 0, 1)[:, :, ::-1] trt_visualize = trt_output.transpose(1, 2, 0) trt_visualize = np.clip(trt_visualize, 0, 1)[:, :, ::-1] cv2.imshow('ONNXRuntime', onnx_visualize) cv2.imshow('TensorRT', trt_visualize) cv2.waitKey() np.testing.assert_allclose(onnx_output, trt_output, rtol=1e-03, atol=1e-05) print('TensorRT and ONNXRuntime output all close.')
def test_cummax_cummin(key, opset=11): if torch.__version__ == 'parrots': pytest.skip('onnx is not supported in parrots directly') # Note generally `cummax` or `cummin` is exportable to ONNX # as long as the pytorch version >= 1.5.0, since `torch.cummax` # is only supported with torch >= 1.5.0. # But when `cummax` or `cummin` serves as an intermediate component # whose outputs is used as inputs for another modules, it's expected # that pytorch version must be >= 1.7.0. Otherwise error appears like: # `RuntimeError: tuple appears in op that does not forward tuples, # unsupported 'kind: prim::PythonOp`. if version.parse(torch.__version__) < version.parse('1.7.0'): pytest.skip('test_cummax_cummin should be ran with pytorch >= 1.7.0') # register custom op `mmcv::cummax` and `mmcv::cummin` from mmcv.onnx.symbolic import register_extra_symbolics register_extra_symbolics(opset) from mmcv.ops import get_onnxruntime_op_path ort_custom_op_path = get_onnxruntime_op_path() if not os.path.exists(ort_custom_op_path): pytest.skip('custom ops for onnxruntime are not compiled.') input_list = [ # arbitrary shape, e.g. 1-D, 2-D, 3-D, ... torch.rand((2, 3, 4, 1, 5)), torch.rand((1)), torch.rand((2, 0, 1)), # tensor.numel() is 0 torch.FloatTensor(), # empty tensor ] cummax_cummin_funcs = {'cummax': torch.cummax, 'cummin': torch.cummin} for input in input_list: ndims = input.dim() # valid dim range is [-ndims, ndims-1] # test for all `dim` value which is valid for dim in range(-ndims, ndims): cummax_func = partial(cummax_cummin_funcs[key], dim=dim) wrapped_model = WrapFunction(cummax_func).eval() with torch.no_grad(): torch.onnx.export(wrapped_model, input, onnx_file, export_params=True, keep_initializers_as_inputs=True, input_names=['input'], output_names=['output', 'indices'], opset_version=opset) onnx_model = onnx.load(onnx_file) input_all = [node.name for node in onnx_model.graph.input] input_initializer = [ node.name for node in onnx_model.graph.initializer ] net_feed_input = list(set(input_all) - set(input_initializer)) assert (len(net_feed_input) == 1) session_options = rt.SessionOptions() session_options.register_custom_ops_library(ort_custom_op_path) sess = rt.InferenceSession(onnx_file, session_options) ort_output, ort_inds = sess.run(None, {'input': input.detach().numpy()}) pytorch_output, pytorch_inds = wrapped_model(input.clone()) pytorch_output = pytorch_output.detach().numpy() pytorch_inds = pytorch_inds.detach().numpy() assert np.allclose(pytorch_output, ort_output, atol=1e-5) assert np.all(pytorch_inds == ort_inds) os.remove(onnx_file)
def onnx2tensorrt(onnx_file, trt_file, input_config, verify=False, show=False, dataset='coco', workspace_size=1): onnx_model = onnx.load(onnx_file) input_shape = input_config['input_shape'] # create trt engine and wraper opt_shape_dict = {'input': [input_shape, input_shape, input_shape]} max_workspace_size = get_GiB(workspace_size) trt_engine = onnx2trt(onnx_model, opt_shape_dict, fp16_mode=False, max_workspace_size=max_workspace_size) save_dir, _ = osp.split(trt_file) if save_dir: os.makedirs(save_dir, exist_ok=True) save_trt_engine(trt_engine, trt_file) print(f'Successfully created TensorRT engine: {trt_file}') if verify: one_img, one_meta = preprocess_example_input(input_config) input_img_cpu = one_img.detach().cpu().numpy() input_img_cuda = one_img.cuda() img = one_meta['show_img'] # Get results from TensorRT trt_model = TRTWraper(trt_file, ['input'], ['boxes', 'labels']) with torch.no_grad(): trt_outputs = trt_model({'input': input_img_cuda}) trt_boxes = trt_outputs['boxes'].detach().cpu().numpy() trt_labels = trt_outputs['labels'].detach().cpu().numpy() # Get results from ONNXRuntime ort_custom_op_path = get_onnxruntime_op_path() session_options = ort.SessionOptions() if osp.exists(ort_custom_op_path): session_options.register_custom_ops_library(ort_custom_op_path) sess = ort.InferenceSession(onnx_file, session_options) onnx_outputs = sess.run(None, { 'input': input_img_cpu, }) ort_boxes, ort_labels = onnx_outputs # Show detection outputs if show: CLASSES = get_classes(dataset) score_thr = 0.35 imshow_det_bboxes(img.copy(), trt_boxes, trt_labels, CLASSES, score_thr=score_thr, win_name='TensorRT') imshow_det_bboxes(img.copy(), ort_boxes, ort_labels, CLASSES, score_thr=score_thr, win_name='ONNXRuntime') # Compare results np.testing.assert_allclose(ort_boxes, trt_boxes, rtol=1e-03, atol=1e-05) np.testing.assert_allclose(ort_labels, trt_labels) print('The numerical values are the same ' + 'between ONNXRuntime and TensorRT')
def test_modulated_deform_conv2d(): try: from mmcv.ops import ModulatedDeformConv2d, get_onnxruntime_op_path except (ImportError, ModuleNotFoundError): pytest.skip('modulated_deform_conv op is not successfully compiled') ort_custom_op_path = get_onnxruntime_op_path() # modulated deform conv config in_channels = 3 out_channels = 64 stride = 1 padding = 0 dilation = 1 groups = 1 deform_groups = 1 kernel_size = 3 input = torch.rand(1, in_channels, 28, 28).cuda() # (n, c, h, w) conv_offset = nn.Conv2d(in_channels=3, out_channels=deform_groups * 3 * kernel_size * kernel_size, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, bias=True).cuda() conv_offset.cuda() out = conv_offset(input) o1, o2, mask = torch.chunk(out, 3, dim=1) offset = torch.cat((o1, o2), dim=1) mask = torch.sigmoid(mask) model_with_bias = ModulatedDeformConv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, deform_groups, bias=True) model_without_bias = ModulatedDeformConv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, deform_groups, bias=False) models = [model_with_bias.cuda(), model_without_bias.cuda()] for model in models: # export and load onnx model with torch.no_grad(): torch.onnx.export(model, (input, offset, mask), onnx_file, export_params=True, keep_initializers_as_inputs=True, input_names=['input', 'offset', 'mask'], opset_version=11) session_options = rt.SessionOptions() if os.path.exists(ort_custom_op_path): session_options.register_custom_ops_library(ort_custom_op_path) # compute onnx_output sess = rt.InferenceSession(onnx_file, session_options) onnx_output = sess.run( None, { 'input': input.cpu().detach().numpy(), 'offset': offset.cpu().detach().numpy(), 'mask': mask.cpu().detach().numpy() })[0] # compute pytorch_output with torch.no_grad(): pytorch_output = model(input, offset, mask).cpu() # allclose assert np.allclose(pytorch_output, onnx_output, atol=1e-3)
def test_deform_conv2d(threshold=1e-3): try: from mmcv.ops import DeformConv2d, get_onnxruntime_op_path except (ImportError, ModuleNotFoundError): pytest.skip('deform_conv op is not successfully compiled') ort_custom_op_path = get_onnxruntime_op_path() if not os.path.exists(ort_custom_op_path): pytest.skip('custom ops for onnxruntime are not compiled.') # deform conv config # modulated deform conv config in_channels = 1 out_channels = 64 stride = 1 padding = 0 dilation = 1 groups = 1 deform_groups = 1 kernel_size = 2 input = [[[[1., 2., 3.], [0., 1., 2.], [3., 5., 2.]]]] offset_weight = [[[0.1, 0.4, 0.6, 0.1]], [[0.3, 0.2, 0.1, 0.3]], [[0.5, 0.5, 0.2, 0.8]], [[0.8, 0.3, 0.9, 0.1]], [[0.3, 0.1, 0.2, 0.5]], [[0.3, 0.7, 0.5, 0.3]], [[0.6, 0.2, 0.5, 0.3]], [[0.4, 0.1, 0.8, 0.4]]] offset_bias = [0.7, 0.1, 0.8, 0.5, 0.6, 0.5, 0.4, 0.7] deform_weight = [[[0.4, 0.2, 0.1, 0.9]]] x = torch.tensor(input) conv_offset = nn.Conv2d(in_channels=in_channels, out_channels=deform_groups * 2 * kernel_size * kernel_size, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, bias=True) conv_offset.weight.data = torch.nn.Parameter( torch.Tensor(offset_weight).reshape(8, 1, 2, 2)) conv_offset.bias.data = torch.nn.Parameter( torch.Tensor(offset_bias).reshape(8)) offset = conv_offset(x) model = DeformConv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, deform_groups) model.weight.data = torch.nn.Parameter( torch.Tensor(deform_weight).reshape(1, 1, 2, 2)) with torch.no_grad(): torch.onnx.export(model, (x, offset), onnx_file, export_params=True, keep_initializers_as_inputs=True, input_names=['input', 'offset'], opset_version=11) session_options = rt.SessionOptions() if os.path.exists(ort_custom_op_path): session_options.register_custom_ops_library(ort_custom_op_path) # compute onnx_output sess = rt.InferenceSession(onnx_file, session_options) onnx_output = sess.run( None, { 'input': x.cpu().detach().numpy(), 'offset': offset.cpu().detach().numpy(), })[0] # compute pytorch_output with torch.no_grad(): pytorch_output = model(x, offset).cpu() # allclose assert np.allclose(pytorch_output, onnx_output, atol=1e-3)
def pytorch2onnx(config_path, checkpoint_path, input_img, input_shape, opset_version=11, show=False, output_file='tmp.onnx', verify=False, normalize_cfg=None, dataset='coco', test_img=None, do_simplify=False, cfg_options=None): input_config = { 'input_shape': input_shape, 'input_path': input_img, 'normalize_cfg': normalize_cfg } # prepare original model and meta for verifying the onnx model orig_model = build_model_from_cfg(config_path, checkpoint_path, cfg_options=cfg_options) one_img, one_meta = preprocess_example_input(input_config) model, tensor_data = generate_inputs_and_wrap_model( config_path, checkpoint_path, input_config, cfg_options=cfg_options) output_names = ['boxes'] if model.with_bbox: output_names.append('labels') if model.with_mask: output_names.append('masks') torch.onnx.export(model, tensor_data, output_file, input_names=['input'], output_names=output_names, export_params=True, keep_initializers_as_inputs=True, do_constant_folding=True, verbose=show, opset_version=opset_version) model.forward = orig_model.forward # simplify onnx model if do_simplify: from mmdet import digit_version import mmcv min_required_version = '1.2.5' assert digit_version(mmcv.__version__) >= digit_version( min_required_version ), f'Requires to install mmcv>={min_required_version}' from mmcv.onnx.simplify import simplify input_dic = {'input': one_img.detach().cpu().numpy()} _ = simplify(output_file, [input_dic], output_file) print(f'Successfully exported ONNX model: {output_file}') if verify: from mmdet.core import get_classes, bbox2result from mmdet.apis import show_result_pyplot ort_custom_op_path = '' try: from mmcv.ops import get_onnxruntime_op_path ort_custom_op_path = get_onnxruntime_op_path() except (ImportError, ModuleNotFoundError): warnings.warn('If input model has custom op from mmcv, \ you may have to build mmcv with ONNXRuntime from source.') model.CLASSES = get_classes(dataset) num_classes = len(model.CLASSES) # check by onnx onnx_model = onnx.load(output_file) onnx.checker.check_model(onnx_model) if test_img is not None: input_config['input_path'] = test_img one_img, one_meta = preprocess_example_input(input_config) tensor_data = [one_img] # check the numerical value # get pytorch output pytorch_results = model(tensor_data, [[one_meta]], return_loss=False) pytorch_results = pytorch_results[0] # get onnx output input_all = [node.name for node in onnx_model.graph.input] input_initializer = [ node.name for node in onnx_model.graph.initializer ] net_feed_input = list(set(input_all) - set(input_initializer)) assert (len(net_feed_input) == 1) session_options = rt.SessionOptions() # register custom op for onnxruntime if osp.exists(ort_custom_op_path): session_options.register_custom_ops_library(ort_custom_op_path) sess = rt.InferenceSession(output_file, session_options) onnx_outputs = sess.run(None, {net_feed_input[0]: one_img.detach().numpy()}) output_names = [_.name for _ in sess.get_outputs()] output_shapes = [_.shape for _ in onnx_outputs] print(f'onnxruntime output names: {output_names}, \ output shapes: {output_shapes}') nrof_out = len(onnx_outputs) assert nrof_out > 0, 'Must have output' with_mask = nrof_out == 3 if nrof_out == 1: onnx_results = onnx_outputs[0] else: det_bboxes, det_labels = onnx_outputs[:2] onnx_results = bbox2result(det_bboxes, det_labels, num_classes) if with_mask: segm_results = onnx_outputs[2].squeeze(1) cls_segms = [[] for _ in range(num_classes)] for i in range(det_bboxes.shape[0]): cls_segms[det_labels[i]].append(segm_results[i]) onnx_results = (onnx_results, cls_segms) # visualize predictions if show: show_result_pyplot(model, one_meta['show_img'], pytorch_results, title='Pytorch') show_result_pyplot(model, one_meta['show_img'], onnx_results, title='ONNX') # compare a part of result if with_mask: compare_pairs = list(zip(onnx_results, pytorch_results)) else: compare_pairs = [(onnx_results, pytorch_results)] for onnx_res, pytorch_res in compare_pairs: for o_res, p_res in zip(onnx_res, pytorch_res): np.testing.assert_allclose( o_res, p_res, rtol=1e-03, atol=1e-05, ) print('The numerical values are the same between Pytorch and ONNX')
def test_roialign(): try: from mmcv.ops import roi_align from mmcv.ops import get_onnxruntime_op_path except (ImportError, ModuleNotFoundError): pytest.skip('roi_align op is not successfully compiled') ort_custom_op_path = get_onnxruntime_op_path() # roi align config pool_h = 2 pool_w = 2 spatial_scale = 1.0 sampling_ratio = 2 inputs = [([[[[1., 2.], [3., 4.]]]], [[0., 0., 0., 1., 1.]]), ([[[[1., 2.], [3., 4.]], [[4., 3.], [2., 1.]]]], [[0., 0., 0., 1., 1.]]), ([[[[1., 2., 5., 6.], [3., 4., 7., 8.], [9., 10., 13., 14.], [11., 12., 15., 16.]]]], [[0., 0., 0., 3., 3.]])] def warpped_function(torch_input, torch_rois): return roi_align(torch_input, torch_rois, (pool_w, pool_h), spatial_scale, sampling_ratio, 'avg', True) for case in inputs: np_input = np.array(case[0], dtype=np.float32) np_rois = np.array(case[1], dtype=np.float32) input = torch.from_numpy(np_input) rois = torch.from_numpy(np_rois) # compute pytorch_output with torch.no_grad(): pytorch_output = roi_align(input, rois, (pool_w, pool_h), spatial_scale, sampling_ratio, 'avg', True) # export and load onnx model wrapped_model = WrapFunction(warpped_function) with torch.no_grad(): torch.onnx.export(wrapped_model, (input, rois), onnx_file, export_params=True, keep_initializers_as_inputs=True, input_names=['input', 'rois'], opset_version=11) onnx_model = onnx.load(onnx_file) session_options = rt.SessionOptions() if os.path.exists(ort_custom_op_path): session_options.register_custom_ops_library(ort_custom_op_path) # compute onnx_output input_all = [node.name for node in onnx_model.graph.input] input_initializer = [ node.name for node in onnx_model.graph.initializer ] net_feed_input = list(set(input_all) - set(input_initializer)) assert (len(net_feed_input) == 2) sess = rt.InferenceSession(onnx_file, session_options) onnx_output = sess.run(None, { 'input': input.detach().numpy(), 'rois': rois.detach().numpy() }) onnx_output = onnx_output[0] # allclose os.remove(onnx_file) assert np.allclose(pytorch_output, onnx_output, atol=1e-3)
def test_softnms(): from mmcv.ops import get_onnxruntime_op_path, soft_nms # only support pytorch >= 1.7.0 if version.parse(torch.__version__) < version.parse('1.7.0'): warnings.warn('test_softnms should be ran with pytorch >= 1.7.0') return # only support onnxruntime >= 1.5.1 assert version.parse(rt.__version__) >= version.parse( '1.5.1'), 'test_softnms should be ran with onnxruntime >= 1.5.1' ort_custom_op_path = get_onnxruntime_op_path() if not os.path.exists(ort_custom_op_path): pytest.skip('softnms for onnxruntime is not compiled.') np_boxes = np.array([[6.0, 3.0, 8.0, 7.0], [3.0, 6.0, 9.0, 11.0], [3.0, 7.0, 10.0, 12.0], [1.0, 4.0, 13.0, 7.0]], dtype=np.float32) np_scores = np.array([0.6, 0.9, 0.7, 0.2], dtype=np.float32) boxes = torch.from_numpy(np_boxes) scores = torch.from_numpy(np_scores) configs = [[0.3, 0.5, 0.01, 'linear'], [0.3, 0.5, 0.01, 'gaussian'], [0.3, 0.5, 0.01, 'naive']] session_options = rt.SessionOptions() session_options.register_custom_ops_library(ort_custom_op_path) for _iou_threshold, _sigma, _min_score, _method in configs: pytorch_dets, pytorch_inds = soft_nms(boxes, scores, iou_threshold=_iou_threshold, sigma=_sigma, min_score=_min_score, method=_method) nms = partial(soft_nms, iou_threshold=_iou_threshold, sigma=_sigma, min_score=_min_score, method=_method) wrapped_model = WrapFunction(nms) wrapped_model.cpu().eval() with torch.no_grad(): torch.onnx.export(wrapped_model, (boxes, scores), onnx_file, export_params=True, keep_initializers_as_inputs=True, input_names=['boxes', 'scores'], opset_version=11) onnx_model = onnx.load(onnx_file) # get onnx output input_all = [node.name for node in onnx_model.graph.input] input_initializer = [ node.name for node in onnx_model.graph.initializer ] net_feed_input = list(set(input_all) - set(input_initializer)) assert (len(net_feed_input) == 2) sess = rt.InferenceSession(onnx_file, session_options) onnx_dets, onnx_inds = sess.run(None, { 'scores': scores.detach().numpy(), 'boxes': boxes.detach().numpy() }) os.remove(onnx_file) assert np.allclose(pytorch_dets, onnx_dets, atol=1e-3) assert np.allclose(onnx_inds, onnx_inds, atol=1e-3)
def onnx2tensorrt(onnx_file, trt_file, input_config, verify=False, show=False, dataset='coco', workspace_size=1, verbose=False): import tensorrt as trt onnx_model = onnx.load(onnx_file) input_shape = input_config['input_shape'] # create trt engine and wraper opt_shape_dict = {'input': [input_shape, input_shape, input_shape]} max_workspace_size = get_GiB(workspace_size) trt_engine = onnx2trt( onnx_model, opt_shape_dict, log_level=trt.Logger.VERBOSE if verbose else trt.Logger.ERROR, fp16_mode=False, max_workspace_size=max_workspace_size) save_dir, _ = osp.split(trt_file) if save_dir: os.makedirs(save_dir, exist_ok=True) save_trt_engine(trt_engine, trt_file) print(f'Successfully created TensorRT engine: {trt_file}') if verify: one_img, one_meta = preprocess_example_input(input_config) input_img_cpu = one_img.detach().cpu().numpy() input_img_cuda = one_img.cuda() img = one_meta['show_img'] # Get results from ONNXRuntime ort_custom_op_path = get_onnxruntime_op_path() session_options = ort.SessionOptions() if osp.exists(ort_custom_op_path): session_options.register_custom_ops_library(ort_custom_op_path) sess = ort.InferenceSession(onnx_file, session_options) output_names = [_.name for _ in sess.get_outputs()] ort_outputs = sess.run(None, { 'input': input_img_cpu, }) with_mask = len(output_names) == 3 ort_outputs = [_.squeeze(0) for _ in ort_outputs] ort_dets, ort_labels = ort_outputs[:2] ort_masks = ort_outputs[2] if with_mask else None ort_shapes = [_.shape for _ in ort_outputs] print(f'ONNX Runtime output names: {output_names}, \ output shapes: {ort_shapes}') # Get results from TensorRT trt_model = TRTWraper(trt_file, ['input'], output_names) with torch.no_grad(): trt_outputs = trt_model({'input': input_img_cuda}) trt_outputs = [ trt_outputs[_].detach().cpu().numpy().squeeze(0) for _ in output_names ] trt_dets, trt_labels = trt_outputs[:2] trt_shapes = [_.shape for _ in trt_outputs] print(f'TensorRT output names: {output_names}, \ output shapes: {trt_shapes}') trt_masks = trt_outputs[2] if with_mask else None # Show detection outputs if show: CLASSES = get_classes(dataset) score_thr = 0.35 imshow_det_bboxes(img.copy(), trt_dets, trt_labels, segms=trt_masks, class_names=CLASSES, score_thr=score_thr, win_name='TensorRT') imshow_det_bboxes(img.copy(), ort_dets, ort_labels, segms=ort_masks, class_names=CLASSES, score_thr=score_thr, win_name='ONNXRuntime') # Compare results np.testing.assert_allclose(ort_dets, trt_dets, rtol=1e-03, atol=1e-05) np.testing.assert_allclose(ort_labels, trt_labels) if with_mask: np.testing.assert_allclose(ort_masks, trt_masks, rtol=1e-03, atol=1e-05) print('The numerical values are the same ' + 'between ONNXRuntime and TensorRT')