def onnx2trt( onnx_model: onnx.ModelProto, *, shapes: Dict[str, ShapeSpec], max_workspace_size: int, max_batch_size: int, model_precision: str, ) -> "trt.ICudaEngine": """ Converts onnx model to TensorRT ICudaEngine Args: onnx_model: onnx.Model to convert shapes: dictionary containing min shape, max shape, opt shape for each input name max_workspace_size: The maximum GPU temporary memory which the CudaEngine can use at execution time. max_batch_size: The maximum batch size which can be used at execution time, and also the batch size for which the CudaEngine will be optimized. model_precision: precision of kernels (possible values: fp16, fp32) Returns: TensorRT ICudaEngine """ # Whether or not 16-bit kernels are permitted. # During :class:`ICudaEngine` build fp16 kernels will also be tried when this mode is enabled. fp16_mode = "16" in model_precision builder = trt.Builder(TRT_LOGGER) builder.fp16_mode = fp16_mode builder.max_batch_size = max_batch_size builder.max_workspace_size = max_workspace_size # In TensorRT 7.0, the ONNX parser only supports full-dimensions mode, # meaning that your network definition must be created with the explicitBatch flag set. # For more information, see # https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#work_dynamic_shapes flags = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) network = builder.create_network(flags) with trt.OnnxParser(network, TRT_LOGGER) as parser: # onnx model parsing if not parser.parse(onnx_model.SerializeToString()): for i in range(parser.num_errors): LOGGER.error( f"OnnxParser error {i}/{parser.num_errors}: {parser.get_error(i)}" ) raise RuntimeError( "Error during parsing ONNX model (see logs for details)") # optimization config = builder.create_builder_config() config.flags |= bool(fp16_mode) << int(trt.BuilderFlag.FP16) config.max_workspace_size = max_workspace_size profile = builder.create_optimization_profile() for name, spec in shapes.items(): profile.set_shape(name, **spec._asdict()) config.add_optimization_profile(profile) engine = builder.build_engine(network, config=config) return engine
def import_onnx_model(model: onnx.ModelProto) -> Model: onnx.checker.check_model(model) model_byte_string = model.SerializeToString() ie = Core() func = ie.read_model(bytes(model_byte_string), Tensor(type=np.uint8, shape=[])) return func
def __generate_onnx_model(onnx_model: ModelProto, out_file: Path) -> None: """Generate onnx model to file. Args: onnx_model: onnx model. out_file: output model file. """ with out_file.open('wb') as o_file: o_file.write(onnx_model.SerializeToString())
def save_model(model: onnx.ModelProto, f=None): fb = model.SerializeToString() if f is not None: if hasattr(f, 'write'): f.write(fb) else: with open(f, "wb") as f: f.write(fb) return fb
def import_onnx_model(model: onnx.ModelProto) -> Function: onnx.checker.check_model(model) model_byte_string = model.SerializeToString() ie = IECore() ie_network = ie.read_network(model=model_byte_string, weights=b"", init_from_buffer=True) ng_function = ng.function_from_cnn(ie_network) return ng_function
def import_onnx_model(model: onnx.ModelProto) -> Function: onnx.checker.check_model(model) model_byte_string = model.SerializeToString() ie = Core() ie_network = ie.read_network(bytes(model_byte_string), Blob(TensorDesc("U8", [], "C"))) ng_function = ie_network.get_function() return ng_function
def get_cpu_output(onnx_model: ModelProto, data: Dict[AnyStr, numpy.ndarray]) -> List[numpy.ndarray]: """ Run inference with CPUExecutionProvider """ # pylint: disable=no-member sess = onnxruntime.InferenceSession( onnx_model.SerializeToString(), providers=["CPUExecutionProvider"], ) output = sess.run(None, data) return output
def test_version_exists(self): model = ModelProto() # When we create it, graph should not have a version string. self.assertFalse(model.HasField('ir_version')) # We should touch the version so it is annotated with the current # ir version of the running ONNX model.ir_version = IR_VERSION model_string = model.SerializeToString() model.ParseFromString(model_string) self.assertTrue(model.HasField('ir_version')) # Check if the version is correct. self.assertEqual(model.ir_version, IR_VERSION)
def convert_version(model: ModelProto, target_version: int) -> ModelProto: if not isinstance(model, ModelProto): raise ValueError( 'VersionConverter only accepts ModelProto as model, incorrect type: {}' .format(type(model))) if not isinstance(target_version, int): raise ValueError( 'VersionConverter only accepts int as target_version, incorrect type: {}' .format(type(target_version))) model_str = model.SerializeToString() converted_model_str = C.convert_version(model_str, target_version) return onnx.load_from_string(converted_model_str)
def onnx_inference(image: np.ndarray, onnx_model: onnx.ModelProto): """ test onnx model with onnx runtime :param image: input image, a numpy array :param onnx_model: onnx model :return: """ assert os.path.isfile(image_path) sess = ort.InferenceSession(onnx_model.SerializeToString()) assert len(sess.get_outputs()) == 1 and len(sess.get_inputs()) <= 1 ipt_dict = OrderedDict() for ipt in sess.get_inputs(): ipt_dict[ipt.name] = image onnx_res = sess.run([], ipt_dict)[0] return onnx_res
def main(): args = parse_arguments() setup_logging(args.verbose) exclude_names = set() if args.exclude is None else set(args.exclude.split(';')) model = ModelProto() with open(args.input, "rb") as input_file: model.ParseFromString(input_file.read()) convert_initializers(model, exclude_names, args.sparsity_threshold, args.tolerance) with open(args.output, "wb") as output_file: s = model.SerializeToString() output_file.write(s)
def _forward( model: onnx.ModelProto, extra_output_nodes: Optional[List[onnx.NodeProto]] = None ) -> OrderedDict[str, np.ndarray]: # add outputs of the argument nodes as model outputs. if extra_output_nodes is not None: model = deepcopy(model) for node in extra_output_nodes: for output in node.output: value_info = onnx.ValueInfoProto(name=output) model.graph.output.append(value_info) # create ONNX runtime session sess_options = onnxrt.SessionOptions() sess_options.graph_optimization_level = onnxrt.GraphOptimizationLevel(0) sess_options.log_severity_level = 3 sess = onnxrt.InferenceSession( model.SerializeToString(), sess_options=sess_options, providers=["CPUExecutionProvider"], ) # get names of input nodes that are not initializers input_names = set([v.name for v in model.graph.input]) init_names = set([v.name for v in model.graph.initializer]) input_names = input_names - init_names # generate random inputs inputs = {} for v in model.graph.input: name = v.name shape = (d.dim_value for d in v.type.tensor_type.shape.dim) dtype = _numpy_dtype(v.type.tensor_type.elem_type) if name in input_names: inputs[name] = np.random.rand(*shape).astype(dtype) output_names = [x.name for x in sess.get_outputs()] run_options = onnxrt.RunOptions() run_options.log_severity_level = 3 outputs = sess.run(output_names, inputs, run_options=run_options) return OrderedDict(zip(output_names, outputs))
def get_tvm_output(onnx_model: ModelProto, data: Dict[AnyStr, numpy.ndarray], provider_options: Dict[AnyStr, Any]) -> List[numpy.ndarray]: """ Run inference with TVMExecutionProvider """ session_options = onnxruntime.SessionOptions() # pylint: disable=no-member session_options.log_severity_level = 0 session_options.log_verbosity_level = 0 # pylint: disable=no-member session_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_DISABLE_ALL sess = onnxruntime.InferenceSession( onnx_model.SerializeToString(), session_options, providers=["TvmExecutionProvider"], provider_options=[provider_options], ) output = sess.run(None, data) return output
def forward( model: onnx.ModelProto, inputs: Dict[str, np.ndarray] = None, input_shapes: Optional[TensorShapes] = None) -> Dict[str, np.ndarray]: """Run forward on a model. Args: model (onnx.ModelProto): Input ONNX model. inputs (Dict[str, np.ndarray], optional): Inputs of the model. input_shapes (TensorShapes, optional): Input shapes of the model. Returns: Dict[str, np.ndarray]: Outputs of the model. """ if input_shapes is None: input_shapes = {} sess_options = rt.SessionOptions() # load custom lib for onnxruntime in mmcv ort_custom_op_path = '' try: from mmcv.ops import get_onnxruntime_op_path ort_custom_op_path = get_onnxruntime_op_path() except ImportError: pass if os.path.exists(ort_custom_op_path): sess_options.register_custom_ops_library(ort_custom_op_path) sess_options.graph_optimization_level = rt.GraphOptimizationLevel(0) sess_options.log_severity_level = 3 sess = rt.InferenceSession(model.SerializeToString(), sess_options=sess_options, providers=['CPUExecutionProvider']) if inputs is None: inputs = generate_rand_input(model, input_shapes=input_shapes) outputs = [x.name for x in sess.get_outputs()] run_options = rt.RunOptions() run_options.log_severity_level = 3 res = OrderedDict( zip(outputs, sess.run(outputs, inputs, run_options=run_options))) return res
def test_load(self): # Create a model proto. model = ModelProto() model.ir_version = IR_VERSION model_string = model.SerializeToString() # Test if input is string loaded_model = onnx.load_from_string(model_string) self.assertTrue(model == loaded_model) # Test if input has a read function f = io.BytesIO(model_string) loaded_model = onnx.load(f) self.assertTrue(model == loaded_model) # Test if input is a file name f = tempfile.NamedTemporaryFile(delete=False) f.write(model_string) f.close() loaded_model = onnx.load(f.name) self.assertTrue(model == loaded_model) os.remove(f.name)
def check_model(model: onnx.ModelProto, check_runnable: bool = True) -> None: """ Check if model's well-defined and executable on onnxruntime """ # TODO After collecting possible errors, # pass through only if all error messages are "No opset import for domain 'com.microsoft'". # The code below is only to see the first error encountered. acceptable_error_msg = [ "No opset import for domain 'com.microsoft'", 'No Op registered for LayerNormalization with domain_version of 12' ] try: checker.check_model(model) except checker.ValidationError as e: if str(e).split("==>")[0].rstrip() in acceptable_error_msg: pass else: checker.check_model(model) if check_runnable: ort.set_default_logger_severity(3) ort.InferenceSession(model.SerializeToString())
def forward( model: onnx.ModelProto, inputs: Tensors, custom_lib: Optional[str]=None ) -> Dict[str, np.ndarray]: sess_options = rt.SessionOptions() if custom_lib is not None: if os.path.exists(custom_lib): sess_options.register_custom_ops_library(custom_lib) else: raise ValueError("No such file '{}'".format(custom_lib)) sess_options.graph_optimization_level = rt.GraphOptimizationLevel(0) sess_options.log_severity_level = 3 sess = rt.InferenceSession( model.SerializeToString(), sess_options=sess_options, providers=["CPUExecutionProvider"], ) outputs = [x.name for x in sess.get_outputs()] run_options = rt.RunOptions() run_options.log_severity_level = 3 res = OrderedDict( zip(outputs, sess.run(outputs, inputs, run_options=run_options)) ) return res
def convert_version(model: ModelProto, target_version: int) -> ModelProto: """Apply the version conversion on the serialized ModelProto. Arguments: input (ModelProto): model target_version (int): target opset version Returns: return (ModelProto) converted model Raises Exceptions: RuntimeError when some necessary conversion is not supported Supported adapters: - Add from Opset 7 to Opset 6 - Add from Opset 6 to Opset 5 - Add from Opset 6 to Opset 7 - Add from Opset 5 to Opset 6 - Mul from Opset 6 to Opset 7 - Mul from Opset 7 to Opset 6 - Mul from Opset 6 to Opset 5 - Mul from Opset 5 to Opset 6 - Gemm from Opset 7 to Opset 6 - Gemm from Opset 6 to Opset 5 - Gemm from Opset 6 to Opset 7 - Gemm from Opset 5 to Opset 6 - Relu from Opset 6 to Opset 5 - Relu from Opset 5 to Opset 6 - BatchNorm from Opset 7 to Opset 6 - BatchNorm from Opset 6 to Opset 7 - BatchNorm from Opset 6 to Opset 5 - BatchNorm from Opset 5 to Opset 6 - Concat from Opset 4 to Opset 3 - Concat from Opset 3 to Opset 4 - Reshape from Opset 5 to Opset 4 - Reshape from Opset 4 to Opset 5 - Sum from Opset 7 to Opset 8 - Sum from Opset 8 to Opset 7 - Sum from Opset 6 to Opset 5 - Sum from Opset 5 to Opset 6 - MaxPool from Opset 8 to Opset 7 - MaxPool from Opset 7 to Opset 8 - AveragePool from Opset 7 to Opset 6 - AveragePool from Opset 6 to Opset 7 - Dropout from Opset 7 to Opset 6 - Dropout from Opset 6 to Opset 5 - Dropout from Opset 6 to Opset 7 - Dropout from Opset 5 to Opset 6 - RNN from Opset 13 to Opset 14 - RNN from Opset 14 to Opset 13 - GRU from Opset 13 to Opset 14 - GRU from Opset 14 to Opset 13 - LSTM from Opset 13 to Opset 14 - LSTM from Opset 14 to Opset 13 Unsupported adapters: - Min from Opset 8 to Opset 7 - Min from Opset 7 to Opset 8 - Min from Opset 6 to Opset 5 - Min from Opset 5 to Opset 6 - Mean from Opset 8 to Opset 7 - Mean from Opset 7 to Opset 8 - Mean from Opset 6 to Opset 5 - Mean from Opset 5 to Opset 6 - Max from Opset 8 to Opset 7 - Max from Opset 7 to Opset 8 - Max from Opset 6 to Opset 5 - Max from Opset 5 to Opset 6 - Xor from Opset 6 to Opset 7 - Xor from Opset 7 to Opset 6 - Upsample from Opset 6 to Opset 7 - Upsample from Opset 7 to Opset 6 - Sub from Opset 6 to Opset 7 - Sub from Opset 7 to Opset 6 - Sub from Opset 6 to Opset 5 - Sub from Opset 5 to Opset 6 - RNN from Opset 6 to Opset 7 - RNN from Opset 7 to Opset 6 - Pow from Opset 6 to Opset 7 - Pow from Opset 7 to Opset 6 - PRelu from Opset 6 to Opset 7 - PRelu from Opset 7 to Opset 6 - PRelu from Opset 6 to Opset 5 - PRelu from Opset 5 to Opset 6 - Or from Opset 6 to Opset 7 - Or from Opset 7 to Opset 6 - Less from Opset 6 to Opset 7 - Less from Opset 7 to Opset 6 - LSTM from Opset 6 to Opset 7 - LSTM from Opset 7 to Opset 6 - Greater from Opset 6 to Opset 7 - Greater from Opset 7 to Opset 6 - GRU from Opset 6 to Opset 7 - GRU from Opset 7 to Opset 6 - GRU from Opset 3 to Opset 2 - GRU from Opset 2 to Opset 3 - Equal from Opset 6 to Opset 7 - Equal from Opset 7 to Opset 6 - Div from Opset 6 to Opset 7 - Div from Opset 7 to Opset 6 - Div from Opset 6 to Opset 5 - Div from Opset 5 to Opset 6 - And from Opset 6 to Opset 7 - And from Opset 7 to Opset 6 - And from Opset 6 to Opset 5 - And from Opset 5 to Opset 6 - Tile from Opset 6 to Opset 5 - Tile from Opset 5 to Opset 6 - Sqrt from Opset 6 to Opset 5 - Sqrt from Opset 5 to Opset 6 - Sigmoid from opset 6 to opset 5 - Sigmoid from opset 5 to opset 6 - Selu from opset 6 to opset 5 - Selu from opset 5 to opset 6 - Reciprocal from opset 6 to opset 5 - Reciprocal from opset 5 to opset 6 - Neg from opset 6 to opset 5 - Neg from opset 5 to opset 6 - Log from opset 6 to opset 5 - Log from opset 5 to opset 6 - LeakyRelu from opset 6 to opset 5 - LeakyRelu from opset 5 to opset 6 - InstanceNormalization from opset 6 to opset 5 - InstanceNormalization from opset 5 to opset 6 - HardSigmoid from opset 6 to opset 5 - HardSigmoid from opset 5 to opset 6 - Floor from opset 6 to opset 5 - Floor from opset 5 to opset 6 - Exp from opset 6 to opset 5 - Exp from opset 5 to opset 6 - Elu from opset 6 to opset 5 - Elu from opset 5 to opset 6 - Clip from opset 6 to opset 5 - Clip from opset 5 to opset 6 - Ceil from opset 6 to opset 5 - Ceil from opset 5 to opset 6 - Cast from opset 6 to opset 5 - Cast from opset 5 to opset 6 - Abs from opset 6 to opset 5 - Abs from opset 5 to opset 6 - Split from opset 2 to opset 1 - Split from opset 1 to opset 2 - Pad from opset 2 to opset 1 - Pad from opset 1 to opset 2 - LpPool from opset 2 to opset 1 - LpPool from opset 1 to opset 2 - GlobalLpPool from opset 2 to opset 1 - GlobalLpPool from opset 1 to opset 2 """ if not isinstance(model, ModelProto): raise ValueError(f'VersionConverter only accepts ModelProto as model, incorrect type: {type(model)}') if not isinstance(target_version, int): raise ValueError(f'VersionConverter only accepts int as target_version, incorrect type: {type(target_version)}') model_str = model.SerializeToString() converted_model_str = C.convert_version(model_str, target_version) return load_from_string(converted_model_str)
def get_input_tensors(model: onnx.ModelProto) -> List[str]: ort.set_default_logger_severity(3) sess = ort.InferenceSession(model.SerializeToString()) input_tensors = [inp.name for inp in sess.get_inputs()] return input_tensors
def collect_scales_of_features(model: onnx.ModelProto, image_dir: str, features: List[str] = None, batch_size=56, num_workers=1, show_cls=False) -> None: """ Collect infos of features by running model in onnxruntime :param model: the model :param image_dir: the directory of images :param features: names of features that need to collect, None for all features :param batch_size: batch size for net forward :param num_workers: number of thread fetching and preprocessing images """ from queue import Queue import threading import glob q = Queue() def worker(paths): def read_img(path, norm=True): import cv2 a = cv2.imread(path) a = cv2.resize(a, (224, 224)) a = a.astype(np.float32) a = cv2.cvtColor(a, cv2.COLOR_BGR2RGB) if norm: a /= 255 # mean and std for RGB images a -= [0.485, 0.456, 0.406] a /= [0.229, 0.224, 0.225] a = np.moveaxis(a, -1, 0) return a for i in range(0, len(paths), batch_size): xs = np.stack( list(map(lambda x: read_img(x, True), paths[i:i + batch_size]))) q.put(xs) q.put(None) image_exts = ['JPEG', 'jpg', 'jpeg', 'png'] filenames = list( itertools.chain(*[ glob.glob(os.path.join(image_dir, '**/*.' + ext), recursive=True) for ext in image_exts ])) import random random.shuffle(filenames) filenames = filenames[:10000] file_num = len(filenames) num_workers = min((file_num + batch_size - 1) // batch_size, num_workers) chunk_num = file_num // num_workers threads = [] for i in range(num_workers): t = threading.Thread(target=worker, args=((filenames[chunk_num * i:chunk_num * (i + 1)], ))) t.start() threads.append(t) import onnxruntime as rt sess = rt.InferenceSession(model.SerializeToString()) all_outputs = [x.name for x in sess.get_outputs()] features = all_outputs if features is None else list( OrderedSet(features) & OrderedSet(all_outputs)) i = 0 done_workers = 0 if show_cls: from collections import defaultdict, Counter d = defaultdict(int) while True: xs = q.get() if xs is None: done_workers += 1 if done_workers == num_workers: break continue i += xs.shape[0] update_scale_and_zp('data', xs) from collections import OrderedDict res = OrderedDict(zip(features, sess.run(features, {'data': xs}))) if show_cls: cls = Counter(res['mobilenetv20_output_pred_fwd'].squeeze( axis=(2, 3)).argmax(axis=1)) for key in cls: d[key] += cls[key] for key in res: update_scale_and_zp(key, res[key]) q.task_done() print("{}/{}".format(i, file_num)) if show_cls: print(d) for t in threads: t.join()
#!/usr/bin/env python import io import onnx import os import tempfile from onnx import AttributeProto, NodeProto, GraphProto, ModelProto, IR_VERSION # Create a model proto. model = ModelProto() model.ir_version = IR_VERSION model_string = model.SerializeToString() # Test if input is string loaded_model = onnx.load_from_string(model_string) assert model == loaded_model # Test if input has a read function f = io.BytesIO(model_string) loaded_model = onnx.load(f) assert model == loaded_model # Test if input is a file name f = tempfile.NamedTemporaryFile(delete=False) f.write(model_string) f.close() loaded_model = onnx.load(f.name) assert model == loaded_model os.remove(f.name) try: