def build_engine(onnx_file_path, engine_file_path): with trt.Builder(TRT_LOGGER) as builder: builder.max_workspace_size = 1 << 20 builder.max_batch_size = 1 with builder.create_network() as network: with trt.OnnxParser(network, TRT_LOGGER) as parser: with open(onnx_file_path, 'rb') as model: parser.parse(model.read()) engine = builder.build_cuda_engine(network) with open(engine_file_path, 'wb') as f: f.write(engine.serialize())
def build_engine_onnx(model_file): with trt.Builder(TRT_LOGGER) as builder, builder.create_network( ) as network, trt.OnnxParser(network, TRT_LOGGER) as parser: # print(builder.platform_has_fast_fp16) builder.fp16_mode = True builder.max_workspace_size = common.GiB(1) # Load the Onnx model and parse it in order to populate the TensorRT network. with open(model_file, 'rb') as model: parser.parse(model.read()) #parser.parse returns a bool, and we were not checking it originally. # if not parser.parse(model.read()): # print(parser.get_error(0)) # print(network.get_layer(network.num_layers -1).get_output(0).shape) # network.mark_output(network.get_layer(network.num_layers -1).get_output(0)) return builder.build_cuda_engine(network)
def build_engine(onnx_path, shape=[1,3,512,512]): """ This is the function to create the TensorRT engine Args: onnx_path : Path to onnx_file. shape : Shape of the input of the ONNX file. """ with trt.Builder(TRT_LOGGER) as builder, builder.create_network(1) as network, trt.OnnxParser(network, TRT_LOGGER) as parser: builder.max_workspace_size = (256 << 20) with open(onnx_path, 'rb') as model: parser.parse(model.read()) network.get_input(0).shape = shape engine = builder.build_cuda_engine(network) return engine
def build_engine(model_path): with trt.Builder(TRT_LOGGER) as builder, \ builder.create_network() as network, \ trt.OnnxParser(network, TRT_LOGGER) as parser: """ Needed, otherwise: [TensorRT] ERROR: Internal error: could not find any implementation for node (Unnamed Layer* 34) [Matrix Multiply], try increasing the workspace size with IBuilder::setMaxWorkspaceSize() [TensorRT] ERROR: ../builder/tacticOptimizer.cpp (1461) - OutOfMemory Error in computeCosts: 0 """ builder.max_workspace_size = 1 << 30 builder.max_batch_size = 1 with open(model_path, "rb") as f: parser.parse(f.read()) engine = builder.build_cuda_engine(network) return engine
def build_engine_onnx(model_file): with trt.Builder(TRT_LOGGER) as builder, builder.create_network( common.EXPLICIT_BATCH) as network, trt.OnnxParser( network, TRT_LOGGER) as parser: builder.max_workspace_size = common.GiB(1) # Load the Onnx model and parse it in order to populate the TensorRT network. with open(model_file, 'rb') as model: if not parser.parse(model.read()): print('ERROR: Failed to parse the ONNX file.') for error in range(parser.num_errors): print(parser.get_error(error)) return None builder.fp16_mode = True builder.strict_type_constraints = True return builder.build_cuda_engine(network)
def build_engine_onnx(model_file): builder = trt.Builder(TRT_LOGGER) network = builder.create_network(EXPLICIT_BATCH) config = builder.create_builder_config() parser = trt.OnnxParser(network, TRT_LOGGER) config.max_workspace_size = GiB(1) # Load the Onnx model and parse it in order to populate the TensorRT network. with open(model_file, "rb") as model: if not parser.parse(model.read()): print("ERROR: Failed to parse the ONNX file.") for error in range(parser.num_errors): print(parser.get_error(error)) return None return builder.build_engine(network, config)
def build_engine(): """Takes an ONNX file and creates a TensorRT engine to run inference with""" with trt.Builder(cfg.TRT_LOGGER) as builder, builder.create_network( ) as network, trt.OnnxParser(network, cfg.TRT_LOGGER) as parser: builder.max_workspace_size = GiB(args.model_memory) builder.max_batch_size = args.max_batch_size if args.precision == 'fp16': # set to fp16 print('force to fp16') builder.fp16_mode = True builder.strict_type_constraints = True elif args.precision == 'int8': # set to int8 builder.int8_mode = True ''' NUM_IMAGES_PER_BATCH = 5 batch = ImageBatchStream(NUM_IMAGES_PER_BATCH, calibration_files) Int8_calibration = EntropyCalibrator(['input_node_name'],batchstream) trt_builder.int8_calibrator = Int8_calibrator ''' else: pass # Parse model file if not os.path.exists(cfg.onnx_file_path): print( 'ONNX file {} not found, please run pytorch2ONNX.py first to generate it.' .format(cfg.onnx_file_path)) exit(0) print('Loading ONNX file from path {}...'.format( cfg.onnx_file_path)) with open(cfg.onnx_file_path, 'rb') as model: print('Beginning ONNX file parsing') parser.parse(model.read()) print('Completed parsing of ONNX file') print('Building an engine from file {}; this may take a while...'. format(cfg.onnx_file_path)) print(network.num_layers) network.mark_output( network.get_layer(network.num_layers - 1).get_output(0)) engine = builder.build_cuda_engine(network) print("Completed creating Engine") with open(cfg.engine_file_path, "wb") as f: f.write(engine.serialize()) return engine
def build_engine(): """Takes an ONNX file and creates a TensorRT engine to run inference with""" with trt.Builder(TRT_LOGGER) as builder,\ builder.create_network() as network, \ trt.OnnxParser(network, TRT_LOGGER) as parser: builder.max_workspace_size = (FLAGS.vram) * 1 << 30 # 1GB builder.max_batch_size = FLAGS.max_batch_size if FLAGS.precision == 'fp16': # set to fp16 print('force to fp16') builder.fp16_mode = True builder.strict_type_constraints = True elif FLAGS.precision == 'int8': # set to int8 pass # builder.int8_mode = True ''' NUM_IMAGES_PER_BATCH = 5 batch = ImageBatchStream(NUM_IMAGES_PER_BATCH, calibration_files) Int8_calibration = EntropyCalibrator(['input_node_name'],batchstream) trt_builder.int8_calibrator = Int8_calibrator ''' else: pass if not os.path.exists(onnx_file_path): print( 'ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.' .format(onnx_file_path)) exit(0) print('Loading ONNX file from path {}...'.format(onnx_file_path)) with open(onnx_file_path, 'rb') as model: print('Beginning ONNX file parsing') parser.parse(model.read()) print('Completed parsing of ONNX file') print('Building an engine from file {}; this may take a while...'. format(onnx_file_path)) engine = builder.build_cuda_engine(network) print("Completed creating Engine") with open(engine_file_path, "wb") as f: f.write(engine.serialize()) return engine
def _parser_model_onnx(self, model_info): g_logger = trt.Logger(trt.Logger.WARNING) with trt.Builder(g_logger) as builder, builder.create_network() as network, \ trt.OnnxParser(network, g_logger) as parser: for i in model_info.inputs: if i.data_format == 'channels_last': raise Exception('The data format: {} is not support'.format(i.data_format)) _LOGGER.info('model_to_plan:: Begin to parse network!') with open(self.model_path, 'rb') as model: result = parser.parse(model.read()) if not result: raise Exception('model_to_plan:: Parse network from uff file failure!') self._parser_model(builder=builder, network=network, model_info=model_info)
def __init__(self, model_path): TRT_LOGGER = trt.Logger(trt.Logger.WARNING) with trt.Builder(TRT_LOGGER) as builder, builder.create_network( common.EXPLICIT_BATCH) as network, trt.OnnxParser( network, TRT_LOGGER) as parser: builder.max_workspace_size = common.GiB(1) # Load the Onnx model and parse it in order to populate the TensorRT network. with open(model_path, 'rb') as model: if not parser.parse(model.read()): print('ERROR: Failed to parse the ONNX file.') for error in range(parser.num_errors): print(parser.get_error(error)) self.engine = builder.build_cuda_engine(network) self.context = self.engine.create_execution_context() self.inputs, self.outputs, self.bindings, self.stream = common.allocate_buffers( self.engine)
def build_engine(model_path): with trt.Builder(TRT_LOGGER) as builder, builder.create_network(flags = 1) as network, \ trt.OnnxParser(network, TRT_LOGGER) as parser: builder.max_workspace_size = 1 << 32 builder.max_batch_size = 1 builder.fp16_mode = 1 with open(model_path, 'rb') as f: value = parser.parse(f.read()) print("Parser: ", value) engine = builder.build_cuda_engine(network) # print_network(network) print(engine) return engine
def build_trt_engine(onnx_module, args): logger = trt.Logger() network_flags = 1 << (int)( trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) with trt.Builder(logger) as builder, builder.create_network( network_flags) as network, trt.OnnxParser(network, logger) as parser: builder.max_workspace_size = 2**31 # 2 GB builder.max_batch_size = args.batch_dim builder.fp16_mode = args.precision != 'fp32' if args.precision == 'int8': builder.int8_mode = True builder.int8_calibrator = Int8Calibrator(args) print('parsing ONNX...') onnx_buf = io.BytesIO() onnx.save(onnx_module, onnx_buf) onnx_buf.seek(0) if not parser.parse(onnx_buf.read()): print(parser.num_errors, 'parser errors:') for i in range(parser.num_errors): print(parser.get_error(i)) print('inputs:') inputs = { t.name: t.shape for t in [network.get_input(i) for i in range(network.num_inputs)] } pprint(inputs) print('outputs:') outputs = { t.name: t.shape for t in [network.get_output(i) for i in range(network.num_outputs)] } pprint(outputs) print('building CUDA engine...') engine = builder.build_cuda_engine(network) if engine: print('saving CUDA engine to', args.trt_path) with open(args.trt_path, 'wb') as mf: mf.write(engine.serialize()) return engine
def onnx_to_trt(folder, model_name, fp=16): print('--- fp_{} ---'.format(fp)) EXPLICIT_BATCH = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) F = EXPLICIT_BATCH NUM_IMAGES_PER_BATCH = 1 with trt.Builder(TRT_LOGGER) as builder, builder.create_network( F) as network, trt.OnnxParser( network, TRT_LOGGER) as parser, builder.create_builder_config( ) as config: builder.max_batch_size = NUM_IMAGES_PER_BATCH builder.max_workspace_size = 1 << 30 if fp == 16: builder.fp16_mode = True builder.strict_type_constraints = True config.max_workspace_size = 1 << 30 if fp == 16: config.flags |= 1 << int(trt.BuilderFlag.FP16) config.flags |= 1 << int(trt.BuilderFlag.STRICT_TYPES) with open("./{}/{}.onnx".format(folder, model_name), 'rb') as model: PARSED = parser.parse(model.read()) if not PARSED: for error in range(parser.num_errors): print(parser.get_error(error)) else: for i in network: print(i.name) inputs = [ network.get_input(i) for i in range(network.num_inputs) ] #print('inputs => ', inputs) opt_profiles = create_optimization_profiles(builder, inputs) add_profiles(config, inputs, opt_profiles) engine = builder.build_engine(network, config) with open( './{}/{}.fp{}.TEST.engine'.format(folder, model_name, fp), "wb") as engine_file: engine_file.write(engine.serialize()) return engine
def build_engine(trt_file_path): # initialize TensorRT engine and parse ONNX model builder = trt.Builder(TRT_LOGGER) explicit_batch = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) network = builder.create_network(explicit_batch) parser = trt.OnnxParser(network, TRT_LOGGER) TRTbin = TRT_FILE_PATH with open(TRTbin, 'rb') as f, trt.Runtime(TRT_LOGGER) as runtime: print('Beginning TRT file parsing') engine = runtime.deserialize_cuda_engine(f.read()) print('Completed parsing of TRT file') context = engine.create_execution_context() print("Completed creating Engine") return engine, context
def initialize(self): """ Parse input ONNX file to a TRT network. Apply layer optimizations and fusion plugins on network. """ # Query system id for architecture self.system = get_system() self.gpu_arch = self.system.arch # Create network. self.network = self.builder.create_network( 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) # Parse from onnx file. parser = trt.OnnxParser(self.network, self.logger) rn50_gs = RN50GraphSurgeon(self.model_path, self.gpu_arch, self.device_type, self.precision, self.cache_file, self.need_calibration) model = rn50_gs.process_onnx() success = parser.parse(onnx._serialize(model)) if not success: raise RuntimeError( "ResNet50 onnx model processing failed! Error: {:}".format( parser.get_error(0).desc())) # unmarking topk_layer_output_value, just leaving topk_layer_output_index assert self.network.num_outputs == 2, "Two outputs expected" assert self.network.get_output(0).name == "topk_layer_output_value",\ "unexpected tensor: {}".format(self.network.get_output(0).name) assert self.network.get_output(1).name == "topk_layer_output_index",\ "unexpected tensor: {}".format(self.network.get_output(1).name) logging.info("Unmarking output: {:}".format( self.network.get_output(0).name)) self.network.unmark_output(self.network.get_output(0)) # Set input dtype and format input_tensor = self.network.get_input(0) if self.input_dtype == "int8": input_tensor.dtype = trt.int8 input_tensor.dynamic_range = (-128, 127) if self.input_format == "linear": input_tensor.allowed_formats = 1 << int(trt.TensorFormat.LINEAR) elif self.input_format == "chw4": input_tensor.allowed_formats = 1 << int(trt.TensorFormat.CHW4) self.initialized = True
def build_engine(onnx_path): builder = trt.Builder(TRT_LOGGER) network = builder.create_network() parser = trt.OnnxParser(network, TRT_LOGGER) builder.max_workspace_size = 1 << 20 # 1024MB builder.max_batch_size = 1 # builder.fp16_mode = True print(onnx_path, file=sys.stderr) with open(onnx_path, 'rb') as model: parser.parse(model.read()) out_size = 2895 isize = network.get_input(0).shape last_layer = network.get_layer(network.num_layers - 1) network.mark_output(last_layer.get_output(0)) print(network, file=sys.stderr) engine = builder.build_cuda_engine(network) return engine
def build_engine(max_batch_size, save_engine): """Takes an ONNX file and creates a TensorRT engine to run inference with""" with trt.Builder(TRT_LOGGER) as builder, \ builder.create_network(1) as network, \ trt.OnnxParser(network, TRT_LOGGER) as parser: # parse onnx model file if not os.path.exists(onnx_file_path): quit('ONNX file {} not found'.format(onnx_file_path)) print('Loading ONNX file from path {}...'.format(onnx_file_path)) with open(onnx_file_path, 'rb') as model: print('Beginning ONNX file parsing') parser.parse(model.read()) assert network.num_layers > 0, 'Failed to parse ONNX model. \ Please check if the ONNX model is compatible ' print('Completed parsing of ONNX file') print('Building an engine from file {}; this may take a while...'.format(onnx_file_path)) # build trt engine if int8_mode: builder.max_batch_size = max_batch_size builder.int8_mode = int8_mode builder.max_workspace_size = 1 << 30 # 1GB assert calibration_stream, 'Error: a calibration_stream should be provided for int8 mode' builder.int8_calibrator = Calibrator(calibration_stream, calibration_table_path) engine = builder.build_cuda_engine(network) print('Int8 mode enabled') if fp16_mode: builder.max_batch_size = max_batch_size builder.max_workspace_size = 1 << 30 # 1GB builder.fp16_mode = fp16_mode engine = builder.build_cuda_engine(network) print('fp16 mode enabled') if fp32_mode: builder.max_batch_size = max_batch_size builder.max_workspace_size = 1 << 30 # 1GB engine = builder.build_cuda_engine(network) print('fp32 mode enabled') if engine is None: print('Failed to create the engine') return None print("Completed creating the engine") if save_engine: with open(engine_file_path, "wb") as f: f.write(engine.serialize()) return engine
def build_engine_onnx(model_file): #创建相应的实例 with trt.Builder(TRT_LOGGER) as builder, builder.create_network( common.EXPLICIT_BATCH) as network, builder.create_builder_config( ) as config, trt.OnnxParser(network, TRT_LOGGER) as parser: #设置相应的参数 config.max_workspace_size = common.GiB(1) # Load the Onnx model and parse it in order to populate the TensorRT network. #读取相应的模型文件 with open(model_file, 'rb') as model: if not parser.parse(model.read()): print('ERROR: Failed to parse the ONNX file.') for error in range(parser.num_errors): print(parser.get_error(error)) return None #构建相应的引擎 return builder.build_engine(network, config)
def run(nProfile): logger = trt.Logger(trt.Logger.ERROR) builder = trt.Builder(logger) network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) config = builder.create_builder_config() config.max_workspace_size = 7 << 30 parser = trt.OnnxParser(network, logger) with open(onnxFile, 'rb') as model: parser.parse(model.read()) if nProfile == 1: profile = builder.create_optimization_profile() inputT0 = network.get_input(0) inputT0.shape = [-1, 1] profile.set_shape(inputT0.name, (1, 1), (510, 1), (512, 1)) config.add_optimization_profile(profile) else: profile0 = builder.create_optimization_profile() inputT0 = network.get_input(0) inputT0.shape = [-1, 1] profile0.set_shape(inputT0.name, (1, 1), (4, 1), (4, 1)) config.add_optimization_profile(profile0) profile1 = builder.create_optimization_profile() inputT0 = network.get_input(0) inputT0.shape = [-1, 1] profile1.set_shape(inputT0.name, (510, 1), (510, 1), (512, 1)) config.add_optimization_profile(profile1) engineString = builder.build_serialized_network(network, config) planFile = onnxFile.split('.')[0] + "-%d.plan" % nProfile with open(planFile, 'wb') as f: f.write(engineString) print("Succeeded building %s!" % (planFile)) engine = trt.Runtime(logger).deserialize_cuda_engine(engineString) context = engine.create_execution_context() # 写教程的时候 trtexec 暂不支持 MultiOptimizationProfile,只能用 script 来做测试 test(engine, context, 1) test(engine, context, 4) test(engine, context, 510) test(engine, context, 512)
def convert_onnx_into_tensorrt_engine(onnx_model_file_path, trt_engine_output_file): """ :param onnx_model_file_path: :param trt_engine_output_file: :return: """ if ops.exists(trt_engine_output_file): print('Trt engine file: {:s} has been generated'.format( trt_engine_output_file)) return try: with trt.Builder(TRT_LOGGER) as builder: explicit_batch = 1 << int( trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) with builder.create_network(explicit_batch) as network: with trt.OnnxParser(network, TRT_LOGGER) as parser: # Parse the model to create a network. with open(onnx_model_file_path, 'rb') as model: parser.parse(model.read()) for error_index in range(parser.num_errors): print(parser.get_error(error_index).desc()) print(parser.get_error(error_index).code()) print(parser.get_error(error_index).file()) # Configure the builder here. builder.max_batch_size = 8 builder.max_workspace_size = 1 << 32 # Build and return the engine. Note that the builder, # network and parser are destroyed when this function returns. engine = builder.build_cuda_engine(network) if engine is not None: with open(trt_engine_output_file, "wb") as f: f.write(engine.serialize()) print('Successfully construct trt engine') return engine else: print('Failed construct trt engine') return engine except Exception as err: print(err) print('Failed to construct trt engine') return None
def process(self, cast_output_file=None): try: import tensorrt as trt except: LOG.logE( "You must install tensorrt package if you want to convert pytorch to onnx. 1. Download Tensorrt7.2.3(for CUDA11.0) from https://developer.nvidia.com/tensorrt \ 2. unpack Tensorrt*.tar.gz 3. pip install tensorrt-x-cpx-none-linux_x86_64.whl in Tensorrt*(your_tensorrt_path)/python", exit=True) return output_trt_file = self.config.model_dir if cast_output_file: output_trt_file = '{}/trt__{}.trt'.format( self.deepvac_core_config.output_dir, cast_output_file) self.config.model_dir = output_trt_file LOG.logI( "config.trt_model_dir found, save tensorrt model to {}...".format( self.config.model_dir)) #to onnx, also set self.config.onnx_model_dir, self.config.onnx_input_names and self.config.onnx_output_names self.exportOnnx() trt_logger = trt.Logger(trt.Logger.WARNING) with trt.Builder(trt_logger) as builder, builder.create_network( 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) ) as network, trt.OnnxParser(network, trt_logger) as parser: builder.max_workspace_size = 4 << 30 builder.max_batch_size = 1 with open(self.config.onnx_model_dir, 'rb') as model: parser.parse(model.read()) config = builder.create_builder_config() if self.config.enable_dynamic_input: profile = builder.create_optimization_profile() profile.set_shape(self.config.onnx_input_names[0], self.config.input_min_dims, self.config.input_opt_dims, self.config.input_max_dims) config.add_optimization_profile(profile) engine = builder.build_engine(network, config) with open(output_trt_file, "wb") as f: f.write(engine.serialize()) LOG.logI( "Pytorch model convert to TensorRT model succeed, save model in {}" .format(output_trt_file))
def build_engine(onnx_file_path, engine_file_path, mode='fp32', verbose=False): """Takes an ONNX file and creates a TensorRT engine.""" TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE) if verbose else trt.Logger() with trt.Builder(TRT_LOGGER) as builder, builder.create_network( *EXPLICIT_BATCH) as network, trt.OnnxParser(network, TRT_LOGGER) as parser: builder.max_workspace_size = 1 << 28 builder.max_batch_size = 1 # Switch mode here if mode == 'fp16': builder.fp16_mode = True elif mode == 'int8': builder.int8_mode = True #builder.strict_type_constraints = True # Parse model file if not os.path.exists(onnx_file_path): print( 'ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.' .format(onnx_file_path)) exit(0) print('Loading ONNX file from path {}...'.format(onnx_file_path)) with open(onnx_file_path, 'rb') as model: print('Beginning ONNX file parsing') if not parser.parse(model.read()): print('ERROR: Failed to parse the ONNX file.') for error in range(parser.num_errors): print(parser.get_error(error)) return None if trt.__version__[0] >= '7': # The actual yolov3.onnx is generated with batch size 64. # Reshape input to batch size 1 shape = list(network.get_input(0).shape) shape[0] = 1 network.get_input(0).shape = shape print('Completed parsing of ONNX file') print('Building an engine; this may take a while...') engine = builder.build_cuda_engine(network) print('Completed creating engine') with open(engine_file_path, 'wb') as f: f.write(engine.serialize()) return engine
def build_engine(onnx_path, shape=[1, 224, 224, 3], precision='FP32'): """ This is the function to create the TensorRT engine Args: onnx_path : Path to onnx_file. shape : Shape of the input of the ONNX file. """ with trt.Builder(TRT_LOGGER) as builder, builder.create_network( 1) as network, trt.OnnxParser(network, TRT_LOGGER) as parser: if precision == 'FP16': builder.fp16_mode = True builder.strict_type_constraints = True builder.max_workspace_size = (256 << 20) with open(onnx_path, 'rb') as model: parser.parse(model.read()) network.get_input(0).shape = shape engine = builder.build_cuda_engine(network) return engine
def build_engine(): #EXPLICIT_BATCH = 1 << (int)(trt.BuilderFlag.FP16) | 1 << (int)(trt.BuilderFlag.STRICT_TYPES) EXPLICIT_BATCH = 1 << (int)( trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) """Takes an ONNX file and creates a TensorRT engine to run inference with""" with trt.Builder(TRT_LOGGER) as builder, builder.create_network( EXPLICIT_BATCH) as network, trt.OnnxParser( network, TRT_LOGGER) as parser: #with trt.Builder(TRT_LOGGER) as builder, builder.create_network(network_flags) as network, trt.OnnxParser(network, TRT_LOGGER) as parser: if (builder.platform_has_fast_fp16): print('support fp16') if (builder.platform_has_fast_int8): print('support int8') if (builder.fp16_mode): print('fp16 kernels are permitted') builder.fp16_mode = True #builder.int8_mode = True builder.strict_type_constraints = True builder.max_workspace_size = 1 << 29 # 512MB builder.max_batch_size = 1 # Parse model file if not os.path.exists(onnx_file_path): print( 'ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.' .format(onnx_file_path)) exit(0) print('Loading ONNX file from path {}...'.format(onnx_file_path)) with open(onnx_file_path, 'rb') as model: print('Beginning ONNX file parsing') if not parser.parse(model.read()): print('ERROR: Failed to parse the ONNX file.') for error in range(parser.num_errors): print(parser.get_error(error)) return None # The actual yolov3.onnx is generated with batch size 64. Reshape input to batch size 1 network.get_input(0).shape = [1, 3, 416, 416] print('Completed parsing of ONNX file') print('Building an engine from file {}; this may take a while...'. format(onnx_file_path)) engine = builder.build_cuda_engine(network) print("Completed creating Engine") with open(engine_file_path, "wb") as f: f.write(engine.serialize()) return engine
def initialize(self): # Create network. self.network = self.builder.create_network( 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) channel_idx = 1 # Input shape input_tensor_dim = [-1] + self.input_volume_dim input_tensor_dim.insert(channel_idx, self.num_input_channel) # Parse from onnx file. parser = trt.OnnxParser(self.network, self.logger) model = self.preprocess_onnx(onnx.load(self.model_path)) success = parser.parse(onnx._serialize(model)) if not success: raise RuntimeError( "3D-Unet onnx model parsing failed! Error: {:}".format( parser.get_error(0).desc())) # Set input/output tensor dtype and formats input_tensor = self.network.get_input(0) output_tensor = self.network.get_output(0) input_tensor.shape = input_tensor_dim if self.input_dtype == "int8": input_tensor.dtype = trt.int8 elif self.input_dtype == "fp16": input_tensor.dtype = trt.float16 elif self.input_dtype == "fp32": input_tensor.dtype = trt.float32 if self.input_format == "linear": input_tensor.allowed_formats = 1 << int(trt.TensorFormat.LINEAR) elif self.input_format == "dhwc8": input_tensor.allowed_formats = 1 << int(trt.TensorFormat.DHWC8) elif self.input_format == "cdhw32": input_tensor.allowed_formats = 1 << int(trt.TensorFormat.CDHW32) # Always use FP16 output output_tensor.dtype = trt.float16 output_tensor.allowed_formats = 1 << int(trt.TensorFormat.LINEAR) self.initialized = True
def _build_engine_onnx(onnx_path: str, force_fp16: bool = False, max_batch_size: int = 1, im_size: Tuple[int] = None): ''' Builds TensorRT engine from provided ONNX file :param onnx_path: Path to ONNX file on disk :param force_fp16: Force use of FP16 precision, even if device doesn't support it. Be careful. :param max_batch_size: Define maximum batch size supported by engine. If >1 creates optimization profile. :param im_size: Required if max_batch_size > 1. Used for creation of optimization profile. :return: TensorRT engine ''' with trt.Builder(TRT_LOGGER) as builder, \ builder.create_network(EXPLICIT_BATCH) as network, \ builder.create_builder_config() as config, \ trt.OnnxParser(network, TRT_LOGGER) as parser: if builder.platform_has_fast_fp16 or force_fp16 is True: builder.fp16_mode = True builder.strict_type_constraints = True builder.max_workspace_size = 1 << 20 if max_batch_size != 1 and im_size is not None: logging.warning( 'Batch size !=1 is used. Ensure your inference code supports it.' ) profile = builder.create_optimization_profile() profile.set_shape('data', (1, 3) + im_size, (max_batch_size, 3) + im_size, (max_batch_size, 3) + im_size) config.add_optimization_profile(profile) with open(onnx_path, "rb") as f: if not parser.parse(f.read()): print('ERROR: Failed to parse the ONNX file: {}'.format( onnx_path)) for error in range(parser.num_errors): print(parser.get_error(error)) sys.exit(1) return builder.build_engine(network, config=config)
def export_engine(model, im, file, train, half, simplify, workspace=4, verbose=False, prefix=colorstr('TensorRT:')): try: check_requirements(('tensorrt',)) import tensorrt as trt opset = (12, 13)[trt.__version__[0] == '8'] # test on TensorRT 7.x and 8.x export_onnx(model, im, file, opset, train, False, simplify) onnx = file.with_suffix('.onnx') assert onnx.exists(), f'failed to export ONNX file: {onnx}' LOGGER.info(f'\n{prefix} starting export with TensorRT {trt.__version__}...') f = file.with_suffix('.engine') # TensorRT engine file logger = trt.Logger(trt.Logger.INFO) if verbose: logger.min_severity = trt.Logger.Severity.VERBOSE builder = trt.Builder(logger) config = builder.create_builder_config() config.max_workspace_size = workspace * 1 << 30 flag = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) network = builder.create_network(flag) parser = trt.OnnxParser(network, logger) if not parser.parse_from_file(str(onnx)): raise RuntimeError(f'failed to load ONNX file: {onnx}') inputs = [network.get_input(i) for i in range(network.num_inputs)] outputs = [network.get_output(i) for i in range(network.num_outputs)] LOGGER.info(f'{prefix} Network Description:') for inp in inputs: LOGGER.info(f'{prefix}\tinput "{inp.name}" with shape {inp.shape} and dtype {inp.dtype}') for out in outputs: LOGGER.info(f'{prefix}\toutput "{out.name}" with shape {out.shape} and dtype {out.dtype}') half &= builder.platform_has_fast_fp16 LOGGER.info(f'{prefix} building FP{16 if half else 32} engine in {f}') if half: config.set_flag(trt.BuilderFlag.FP16) with builder.build_engine(network, config) as engine, open(f, 'wb') as t: t.write(engine.serialize()) LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)') except Exception as e: LOGGER.info(f'\n{prefix} export failure: {e}')
def build_engine(model_path, shape): with trt.Builder(TRT_LOGGER) as builder, \ builder.create_network(flags=network_flags) as network, \ trt.OnnxParser(network, TRT_LOGGER) as parser: builder.max_batch_size = 1 with open(model_path, "rb") as f: if not parser.parse(f.read()): print("ERROR: Failed to parse the ONNX file") for error in range(parser.num_errors): print(parser.get_error(error)) return None config = builder.create_builder_config() config.max_workspace_size = 1 << 30 network.get_input(0).shape = shape engine = builder.build_engine(network, config) return engine
def build_engine(TRT_LOGGER, onnx_path, shape = [1,224,224,3]): """ This is the function to create the TensorRT engine Args: onnx_path : Path to onnx_file. shape : Shape of the input of the ONNX file. """ with trt.Builder(TRT_LOGGER) as builder, builder.create_network(1) as network, trt.OnnxParser(network, TRT_LOGGER) as parser: builder.max_workspace_size = (256 << 20) with open(onnx_path, 'rb') as model: parser.parse(model.read()) network.get_input(0).shape = shape # use FP16 mode if possible if builder.platform_has_fast_fp16: builder.fp16_mode = True # generate TensorRT engine optimized for the target platform engine = builder.build_cuda_engine(network) return engine
def ONNX2TRT(args, calib=None): ''' convert onnx to tensorrt engine, use mode of ['fp32', 'fp16', 'int8'] :return: trt engine ''' assert args.mode.lower() in [ 'fp32', 'fp16', 'int8' ], "mode should be in ['fp32', 'fp16', 'int8']" G_LOGGER = trt.Logger(trt.Logger.WARNING) with trt.Builder(G_LOGGER) as builder, builder.create_network() as network, \ trt.OnnxParser(network, G_LOGGER) as parser: builder.max_batch_size = args.batch_size builder.max_workspace_size = 1 << 30 if args.mode.lower() == 'int8': assert (builder.platform_has_fast_int8 == True), "not support int8" builder.int8_mode = True builder.int8_calibrator = calib elif args.mode.lower() == 'fp16': assert (builder.platform_has_fast_fp16 == True), "not support fp16" builder.fp16_mode = True print('Loading ONNX file from path {}...'.format(args.onnx_file_path)) with open(args.onnx_file_path, 'rb') as model: print('Beginning ONNX file parsing') parser.parse(model.read()) print('Completed parsing of ONNX file') print( 'Building an engine from file {}; this may take a while...'.format( args.onnx_file_path)) engine = builder.build_cuda_engine(network) print("Created engine success! ") # 保存计划文件 print('Saving TRT engine file to path {}...'.format( args.engine_file_path)) with open(args.engine_file_path, "wb") as f: f.write(engine.serialize()) print('Engine file has already saved to {}!'.format( args.engine_file_path)) return engine