def test_save_and_load_tensor(self): # type: () -> None proto = self._simple_tensor() cls = TensorProto proto_string = onnx._serialize(proto) # Test if input is string loaded_proto = onnx.load_tensor_from_string(proto_string) self.assertTrue(proto == loaded_proto) # Test if input has a read function f = io.BytesIO() onnx.save_tensor(loaded_proto, f) f = io.BytesIO(f.getvalue()) loaded_proto = onnx.load_tensor(f, cls) self.assertTrue(proto == loaded_proto) # Test if input is a file name try: tfile = tempfile.NamedTemporaryFile(delete=False) onnx.save_tensor(proto, tfile) tfile.close() loaded_proto = onnx.load_tensor(tfile.name, cls) self.assertTrue(proto == loaded_proto) finally: os.remove(tfile.name)
def __init__( self, model: onnx.ModelProto, execution_providers: List[str] = None, context: mlrun.MLClientCtx = None, ): # Set the context: self._context = (context if context is not None else mlrun.get_or_create_ctx(self.DEFAULT_CONTEXT_NAME)) # Store the model: self._model = model # Set the execution providers (default will prefer CUDA Execution Provider over CPU Execution Provider): self._execution_providers = ([ "CUDAExecutionProvider", "CPUExecutionProvider" ] if execution_providers is None else execution_providers) # initialize the onnx run time session: self._inference_session = onnxruntime.InferenceSession( onnx._serialize(model), providers=self._execution_providers, ) # Get the input layers names: self._input_layers = [ input_layer.name for input_layer in self._inference_session.get_inputs() ] # Get the outputs layers names: self._output_layers = [ output_layer.name for output_layer in self._inference_session.get_outputs() ]
def test_save_and_load_model(self): proto = self._simple_model() cls = ModelProto proto_string = onnx._serialize(proto) # Test if input is string loaded_proto = onnx.load_model_from_string(proto_string) self.assertTrue(proto == loaded_proto) # Test if input has a read function f = io.BytesIO() onnx.save_model(proto_string, f) f = io.BytesIO(f.getvalue()) loaded_proto = onnx.load_model(f, cls) self.assertTrue(proto == loaded_proto) # Test if input is a file name try: f = tempfile.NamedTemporaryFile(delete=False) onnx.save_model(proto, f) f.close() loaded_proto = onnx.load_model(f.name, cls) self.assertTrue(proto == loaded_proto) finally: os.remove(f.name)
def load(self): """ Use the model handler to get the model file path and initialize an ONNX run time inference session. """ # Load the model: if self._model_handler.model is None: self._model_handler.load() self.model = self._model_handler.model # initialize the onnx run time session: self._inference_session = onnxruntime.InferenceSession( onnx._serialize(self._model_handler.model), providers=self._execution_providers, ) # Get the input layers names: self._input_layers = [ input_layer.name for input_layer in self._inference_session.get_inputs() ] # Get the outputs layers names: self._output_layers = [ output_layer.name for output_layer in self._inference_session.get_outputs() ]
def save_model(proto, f, format=None, save_as_external_data=False, all_tensors_to_one_file=True, location=None, size_threshold=1024, convert_attribute=False): if isinstance(proto, bytes): proto = onnx._deserialize(proto, onnx.ModelProto()) if save_as_external_data: convert_model_to_external_data(proto, all_tensors_to_one_file, location, size_threshold, convert_attribute) s = onnx._serialize(proto) onnx._save_bytes(s, f)
def initialize(self): """ Parse the processed model to create the network. """ # Create network. self.network = self.builder.create_network( 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) channel_idx = 1 # Input shape input_tensor_dim = [-1] + self.input_volume_dim input_tensor_dim.insert(channel_idx, self.num_input_channel) # Parse from onnx file. parser = trt.OnnxParser(self.network, self.logger) model = self.preprocess_onnx(onnx.load(self.model_path)) success = parser.parse(onnx._serialize(model)) if not success: raise RuntimeError( "3D-Unet onnx model parsing failed! Error: {:}".format( parser.get_error(0).desc())) # Set input/output tensor dtype and formats input_tensor = self.network.get_input(0) output_tensor = self.network.get_output(0) input_tensor.shape = input_tensor_dim if self.input_dtype == "int8": input_tensor.dtype = trt.int8 elif self.input_dtype == "fp16": input_tensor.dtype = trt.float16 elif self.input_dtype == "fp32": input_tensor.dtype = trt.float32 if self.input_format == "linear": input_tensor.allowed_formats = 1 << int(trt.TensorFormat.LINEAR) elif self.input_format == "dhwc8": input_tensor.allowed_formats = 1 << int(trt.TensorFormat.DHWC8) elif self.input_format == "cdhw32": input_tensor.allowed_formats = 1 << int(trt.TensorFormat.CDHW32) # Always use FP16 output # workaround for calibration not working with the identity layer properly force_calibration = dict_get(self.args, "force_calibration", default=False) output_tensor.dtype = trt.float16 if force_calibration == False else trt.float32 output_tensor.allowed_formats = 1 << int(trt.TensorFormat.LINEAR) self.initialized = True
def initialize(self): """ Parse input ONNX file to a TRT network. Apply layer optimizations and fusion plugins on network. """ # Query system id for architecture self.system = get_system() self.gpu_arch = self.system.arch # Create network. self.network = self.builder.create_network( 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) # Parse from onnx file. parser = trt.OnnxParser(self.network, self.logger) rn50_gs = RN50GraphSurgeon(self.model_path, self.gpu_arch, self.device_type, self.precision, self.cache_file, self.need_calibration) model = rn50_gs.process_onnx() success = parser.parse(onnx._serialize(model)) if not success: raise RuntimeError( "ResNet50 onnx model processing failed! Error: {:}".format( parser.get_error(0).desc())) # unmarking topk_layer_output_value, just leaving topk_layer_output_index assert self.network.num_outputs == 2, "Two outputs expected" assert self.network.get_output(0).name == "topk_layer_output_value",\ "unexpected tensor: {}".format(self.network.get_output(0).name) assert self.network.get_output(1).name == "topk_layer_output_index",\ "unexpected tensor: {}".format(self.network.get_output(1).name) logging.info("Unmarking output: {:}".format( self.network.get_output(0).name)) self.network.unmark_output(self.network.get_output(0)) # Set input dtype and format input_tensor = self.network.get_input(0) if self.input_dtype == "int8": input_tensor.dtype = trt.int8 input_tensor.dynamic_range = (-128, 127) if self.input_format == "linear": input_tensor.allowed_formats = 1 << int(trt.TensorFormat.LINEAR) elif self.input_format == "chw4": input_tensor.allowed_formats = 1 << int(trt.TensorFormat.CHW4) self.initialized = True