def get_engine2(engine_file_path=""): if os.path.exists(engine_file_path): with open(engine_file_path, 'rb') as f, trt.Runtime(logger) as runtime: return runtime.deserialize_cuda_engine(f.read()) else: print("building engine...") with trt.Builder(logger) as builder, builder.create_network( ) as network, trt.CaffeParser() as parser: builder.max_batch_size = 1 builder.max_workspace_size = (256 << 20) builder.fp16_mode = False builder.strict_type_constraints = True if not os.path.exists(MODEL_DIR + 'fcn8s.prototxt'): print("There is no prototxt at: %s" % (MODEL_DIR + 'fcn8s.prototxt')) exit(0) parser.parse(deploy=MODEL_DIR + 'fcn8s.prototxt', model=MODEL_DIR + 'fcn8s.caffemodel', network=network, dtype=trt.float32) network.mark_output( network.get_layer(network.num_layers - 1).get_output(0)) engine = builder.build_cuda_engine(network) return engine
def build_engine(deploy_file, model_file, verbose=False): """Takes an ONNX file and creates a TensorRT engine.""" TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE) if verbose else trt.Logger() with trt.Builder(TRT_LOGGER) as builder, builder.create_network( *EXPLICIT_BATCH) as network, trt.CaffeParser() as parser: builder.max_workspace_size = 1 << 28 builder.max_batch_size = 1 builder.fp16_mode = True datatype = trt.float32 #builder.strict_type_constraints = True # Parse model file # if not os.path.exists(onnx_file_path): # print('ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'.format(onnx_file_path)) # exit(0) # print('Loading ONNX file from path {}...'.format(onnx_file_path)) # with open(onnx_file_path, 'rb') as model: # print('Beginning ONNX file parsing') if not parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=datatype): print('ERROR: Failed to parse the ONNX file.') for error in range(parser.num_errors): print(parser.get_error(error)) return None if trt.__version__[0] >= '7': # The actual yolov3.onnx is generated with batch size 64. # Reshape input to batch size 1 shape = list(network.get_input(0).shape) shape[0] = 1 network.get_input(0).shape = shape print('Completed parsing of Caffe file') net_out_tmp = [ network.get_layer(ln).get_output(0) for ln in range(network.num_layers) if network.get_layer(ln).get_output(0).name in OUTPUT_LAYERS ] assert len(net_out_tmp) == len(OUTPUT_LAYERS) net_out = [None] * len(OUTPUT_LAYERS) for nn in net_out_tmp: net_out[OUTPUT_LAYERS.index(nn.name)] = nn assert None not in net_out for nn in net_out: network.mark_output(nn) print('Building an engine; this may take a while...') engine = builder.build_cuda_engine(network) print('Completed creating engine') # with open(engine_file_path, 'wb') as f: # f.write(engine.serialize()) return engine
def build_engine(deploy_file, model_file): with trt.Builder(TRT_LOGGER) as builder, builder.create_network( ) as network, trt.CaffeParser() as parser: builder.max_workspace_size = common.GiB(1) # Parse the model and build the engine. model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE) network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME)) return builder.build_cuda_engine(network)
def build_engine(): with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.CaffeParser() as parser: builder.max_batch_size = 1 builder.max_workspace_size = 2**20 input_layer = network.add_input(name="input_layer", dtype=trt.float32, shape=(1, 13, 3, 3)) # bn_w = [] # bn = network.add_scale(input=[input_layer], mode=trt.ScaleMode.CHANNEL, ) upsample = network.add_plugin_v2(inputs=[input_layer], plugin=get_trt_plugin("UpsamplePlugin")) upsample.get_output(0).name = "outputs" network.mark_output(upsample.get_output(0)) return builder.build_cuda_engine(network)
def build_engine(trt_deploy_path, trt_model_path, trt_logger, trt_engine_datatype=trt.DataType.FLOAT, batch_size=1, silent=False): with trt.Builder(trt_logger) as builder, builder.create_network() as network, trt.CaffeParser() as parser: builder.max_workspace_size = 1 << 30 if trt_engine_datatype == trt.DataType.HALF: builder.fp16_mode = True builder.max_batch_size = batch_size model_tensors = parser.parse(trt_deploy_path, trt_model_path, network,trt_engine_datatype) network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME)) network.mark_output(model_tensors.find('keep_count')) if not silent: print("Building TensorRT engine. This may take few minutes.") return builder.build_cuda_engine(network)
def __call__(self): builder = trt.Builder(TRT_LOGGER) network = builder.create_network() parser = trt.CaffeParser() model_tensors = parser.parse(deploy=self.deploy, model=self.model, network=network, dtype=self.dtype) if self.outputs and self.outputs != constants.MARK_ALL: for output in self.outputs: network.mark_output(model_tensors.find(output)) return builder, network, parser, self.batch_size
def build_engine(deploy_file, model_file): with trt.Builder(TRT_LOGGER) as builder, builder.create_network( ) as network, trt.CaffeParser() as parser: builder.max_workspace_size = common.GiB(1) # Set the parser's plugin factory. Note that we bind the factory to a reference so # that we can destroy it later. (parser.plugin_factory_ext is a write-only attribute) parser.plugin_factory_ext = fc_factory # Parse the model and build the engine. model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE) network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME)) return builder.build_cuda_engine(network)
def build_engine_caffe(model_file, deploy_file): # You can set the logger severity higher to suppress messages (or lower to display more messages). with trt.Builder(TRT_LOGGER) as builder, builder.create_network( ) as network, trt.CaffeParser() as parser: # Workspace size is the maximum amount of memory available to the builder while building an engine. # It should generally be set as high as possible. builder.max_workspace_size = common.GiB(1) # Load the Caffe model and parse it in order to populate the TensorRT network. # This function returns an object that we can query to find tensors by name. model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE) # For Caffe, we need to manually mark the output of the network. # Since we know the name of the output tensor, we can find it in model_tensors. network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME)) return builder.build_cuda_engine(network)
def build_int8_engine(deploy_file, model_file, calib, batch_size=32): with trt.Builder(TRT_LOGGER) as builder, builder.create_network( ) as network, trt.CaffeParser() as parser: # We set the builder batch size to be the same as the calibrator's, as we use the same batches # during inference. Note that this is not required in general, and inference batch size is # independent of calibration batch size. builder.max_batch_size = batch_size builder.max_workspace_size = common.GiB(1) builder.int8_mode = True builder.int8_calibrator = calib # Parse Caffe model model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE) network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME)) # Build engine and do int8 calibration. return builder.build_cuda_engine(network)
def build_engine(): with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.CaffeParser() as parser: builder.fp16_mode = True builder.strict_type_constraints = True builder.max_batch_size = 16 # Workspace size is the maximum amount of memory available to the builder while building an engine. # It should generally be set as high as possible. builder.max_workspace_size = common.GiB(1) # Load the Caffe model and parse it in order to populate the TensorRT network. # This function returns an object that we can query to find tensors by name. model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE) # For Caffe, we need to manually mark the output of the network. # Since we know the name of the output tensor, we can find it in model_tensors. network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME)) engine = builder.build_cuda_engine(network) with open(engine_file_path, "wb") as f: f.write(engine.serialize()) return engine
def convert_caffe_model_to_trt(caffe_weights_file, caffe_deploy_file, trt_model_filename, output_tensor_name, output_data_type, max_workspace_size, max_batch_size): "Convert a pair of (caffe_weights_file,caffe_deploy_file) into a trt_model_file using the given parameters" TRT_LOGGER = trt.Logger(trt.Logger.WARNING) with trt.Builder(TRT_LOGGER) as builder, builder.create_network( ) as network, trt.CaffeParser() as parser: if (output_data_type == 'fp16'): if not builder.platform_has_fast_fp16: print( 'Warning: This platform is not optimized for fast fp16 mode' ) builder.fp16_mode = True print('Converting into fp16, max_batch_size={}'.format( max_batch_size)) else: print('Converting into fp32 (default), max_batch_size={}'.format( max_batch_size)) builder.max_workspace_size = max_workspace_size builder.max_batch_size = max_batch_size model_tensors = parser.parse(deploy=caffe_deploy_file, model=caffe_weights_file, network=network, dtype=trt.float32) network.mark_output(model_tensors.find(output_tensor_name)) trt_model_object = builder.build_cuda_engine(network) try: serialized_trt_model = trt_model_object.serialize() with open(trt_model_filename, "wb") as trt_model_file: trt_model_file.write(serialized_trt_model) except: print( 'Error: cannot serialize or write TensorRT engine to file {}.'. format(trt_model_filename))
def build_int8_engine(deploy_file, model_file, calib, batch_size=32): with trt.Builder(TRT_LOGGER) as builder, builder.create_network( ) as network, builder.create_builder_config() as config, trt.CaffeParser( ) as parser, trt.Runtime(TRT_LOGGER) as runtime: # We set the builder batch size to be the same as the calibrator's, as we use the same batches # during inference. Note that this is not required in general, and inference batch size is # independent of calibration batch size. builder.max_batch_size = batch_size config.max_workspace_size = common.GiB(1) config.set_flag(trt.BuilderFlag.INT8) config.int8_calibrator = calib # Parse Caffe model model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE) network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME)) # Build engine and do int8 calibration. plan = builder.build_serialized_network(network, config) return runtime.deserialize_cuda_engine(plan)
def build_int8_engine(deploy_file, model_file, batch_size=32, trt_engine_datatype=trt.DataType.FLOAT): with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.CaffeParser() as parser: # We set the builder batch size to be the same as the calibrator's, as we use the same batches # during inference. Note that this is not required in general, and inference batch size is # independent of calibration batch size. builder.max_batch_size = batch_size builder.max_workspace_size = common.GiB(1) if trt_engine_datatype == trt.DataType.HALF: builder.fp16_mode = True elif trt_engine_datatype == trt.DataType.INT8: # Now we create a calibrator and give it the location of our calibration data. # We also allow it to cache calibration data for faster engine building. _, [calib_data] = common.find_sample_data(description="Runs a Caffe MNIST network in Int8 mode", subfolder="mnist", find_files=["t10k-images-idx3-ubyte"]) calibration_cache = "mnist_calibration.cache" builder.int8_mode = True builder.int8_calibrator = MNISTEntropyCalibrator(calib_data, cache_file=calibration_cache) # Parse Caffe model model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE) network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME)) # Build engine and do int8 calibration. return builder.build_cuda_engine(network)
def parse_caffe(self, caffe_model_file, caffe_deploy_file, output_name="prob1"): """Parses caffe model file and prepares for serialization :param caffe_model_file: path to caffe model file :param caffe_deploy_file: path to caffe deploy file :param output_name: output name """ parser = trt.CaffeParser() model_tensors = parser.parse( deploy=caffe_deploy_file, model=caffe_model_file, network=self.network, dtype=CudaEngineManager.CONSTANTS["dtype"]) self.network.mark_output(model_tensors.find(output_name)) self.parser = parser
def build_engine_caffe(model_file, deploy_file, precision): # precision: float, half, int8 # You can set the logger severity higher to suppress messages (or lower to display more messages). with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.CaffeParser() as parser: # Workspace size is the maximum amount of memory available to the builder while building an engine. # It should generally be set as high as possible. builder.max_workspace_size = common.GiB(1) # Load the Caffe model and parse it in order to populate the TensorRT network. # This function returns an object that we can query to find tensors by name. model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE) # For Caffe, we need to manually mark the output of the network. # Since we know the name of the output tensor, we can find it in model_tensors. print(model_tensors) print(ModelData.OUTPUT_NAME) print(model_tensors.find(ModelData.OUTPUT_NAME)) network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME)) if precision == "half": # enable fp16 (chenrong06) builder.fp16_mode = True builder.strict_type_constraints = True print("pricision: half") elif precision == "int8": # enable int8 and set quantize (chenrong06) # Incomplete version, please refer to workspace/tensorrt/samples/sampleINT8API/sampleINT8API.cpp builder.int8_mode = True builder.int8_calibrator = None builder.strict_type_constraints = True print(network.num_layers) for i in range(network.num_layers): layer = network[i] tensor = layer.get_output(0) tensor.set_dynamic_range(-1.0, 1.0) tensor = layer.get_input(0) tensor.set_dynamic_range(-1.0, 1.0) print("pricision: int8") else: print("pricision: float") return builder.build_cuda_engine(network)
def build_engine(): """Takes an ONNX file and creates a TensorRT engine to run inference with""" with trt.Builder(TRT_LOGGER) as builder, builder.create_network( common.EXPLICIT_BATCH) as network, trt.CaffeParser() as parser: builder.max_batch_size = ModelData.BATCH_SIZE builder.max_workspace_size = common.GiB( ModelData.MEM_SIZE) # 1 # 1 << 28 # 256MiB if ModelData.DTYPE == trt.float16: builder.fp16_mode = True elif ModelData.DTYPE == trt.int8: # onnx有问题,官方例子caffe是可以的 builder.int8_mode = True # Now we create a calibrator and give it the location of our calibration data. # We also allow it to cache calibration data for faster engine building. calibration_cache = "calibration.cache" calib = common.MNISTEntropyCalibrator( ModelData.data_dir, ModelData.INPUT_SHAPE[-2:], cache_file=calibration_cache, batch_size=ModelData.BATCH_SIZE) builder.int8_calibrator = calib else: pass # Load the Caffe model and parse it in order to populate the TensorRT network. # This function returns an object that we can query to find tensors by name. model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE) # For Caffe, we need to manually mark the output of the network. # Since we know the name of the output tensor, we can find it in model_tensors. network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME)) print('Building an engine from file {}; this may take a while...'. format(model_file)) engine = builder.build_cuda_engine(network) print("Completed creating Engine") with open(engine_file_path, "wb") as f: f.write(engine.serialize()) return engine
def build_engine(model_file, deploy_file, trt_logger, batch_size=1, precison_mode='FP32'): DTYPE = trt.float32 if precison_mode == 'FP16': DTYPE = trt.float16 with trt.Builder(trt_logger) as builder, builder.create_network( ) as network, trt.CaffeParser() as parser: # Workspace size for building an engine. builder.max_workspace_size = 1 << 30 # Max batch size builder.max_batch_size = batch_size print("Building TensorRT engine. This may take few minutes.") model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=DTYPE) # Find the name of the output tensor in model_tensors. network.mark_output(model_tensors.find(OUTPUT_NAME)) return builder.build_cuda_engine(network)
def build_int8_engine(deploy_file, model_file, calib, batch_size=32): #创建相关的实例 with trt.Builder(TRT_LOGGER) as builder, builder.create_network( ) as network, builder.create_builder_config() as config, trt.CaffeParser( ) as parser: # We set the builder batch size to be the same as the calibrator's, as we use the same batches # during inference. Note that this is not required in general, and inference batch size is # independent of calibration batch size. #指定相关的参数 builder.max_batch_size = batch_size config.max_workspace_size = common.GiB(1) config.set_flag(trt.BuilderFlag.INT8) config.int8_calibrator = calib # Parse Caffe model #使用caffe解析器解析模型,解析一个prototxt文件和一个binaryproto Caffe模型,分别提取网络定义和与网络相关的权值。 model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE) #标记网络的输出 network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME)) # Build engine and do int8 calibration. #构建相应的引擎 return builder.build_engine(network, config)
# 3.2.2. Importing A Model Using A Parser In Python ''' To import a model using a parser, you will need to perform the following high-level steps: Create the TensorRTbuilder and network. Create the TensorRT parser for the specific format. Use the parser to parse the imported model and populate the network. The builder must be created before the network because it serves as a factory for the network. Different parsers have different mechanisms for marking network outputs. ''' # 3.2.3. Importing From Caffe Using Python import tensorrt as trt datatype = trt.float32 #Define the data type. In this example, we will use float32. deploy_file = 'data/mnist/mnist.prototxt' model_file = 'data/mnist/mnist.caffemodel' with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.CaffeParser() as parser: model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=datatype) # The parser returns the model_tensors, which is a table containing the mapping from tensor names to ITensor objects. # 3.2.4. Importing From TensorFlow Using Python ''' Create a frozen TensorFlow model for the tensorflow model. The instructions on freezing a TensorFlow model into a stream can be found in Freezing A TensorFlow Graph. Use the UFF converter to convert a frozen tensorflow model to a UFF file. Typically, this is as simple as: convert-to-uff frozen_inference_graph.pb ''' model_file = '/data/mnist/mnist.uff' with builder = trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser: parser.register_input("Placeholder", (1, 28, 28)) parser.register_output("fc2/Relu") parser.parse(model_file, network) # 3.2.5. Importing From ONNX Using Python with builder = trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
# TensorRT 中加载 Caffe 模型并创建 engine ----------------------------------------- logger = trt.Logger(trt.Logger.VERBOSE) if os.path.isfile(trtFile): with open(trtFile, 'rb') as f: engine = trt.Runtime(logger).deserialize_cuda_engine(f.read()) if engine == None: print("Failed loading engine!") exit() print("Succeeded loading engine!") else: builder = trt.Builder(logger) network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) config = builder.create_builder_config() config.max_workspace_size = 3 << 30 parser = trt.CaffeParser() with open(caffePrototxtFile, 'rb') as f0, open(caffeModelFile, 'rb') as f1: net = parser.parse_buffer(f0.read(), f1.read(), network, trt.float32) if net is None: print("Failed parsing caffe file!") print("Succeeded parsing cafe file!") outTensor = net.find('y') # 找到网络的输出层 squeezeLayer = network.add_reduce(outTensor, trt.ReduceOperation.SUM, (1 << 2) + (1 << 3), False) # 删掉先前手工添加的、多余的维度 argmaxLayer = network.add_topk(squeezeLayer.get_output(0), trt.TopKOperation.MAX, 1, 1 << 1) # 补上 Caffe 不支持的 Argmax 层 network.mark_output(argmaxLayer.get_output(1)) engineString = builder.build_serialized_network(network, config) if engineString == None: print("Failed building engine!") exit()
def _build_engine_caffe(model_info): def GiB(x): return x * 1 << 30 with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.CaffeParser() as parser: builder.max_batch_size = model_info.max_batch_size builder.max_workspace_size = GiB(model_info.max_workspace_size) builder.fp16_mode = model_info.flag_fp16 # Parse the model and build the engine. model_tensors = parser.parse(deploy=model_info.deploy_file, model=model_info.model_file, network=network, dtype=model_info.data_type) for ind_out in range(len(model_info.output_name)): print('=> Marking output blob "', model_info.output_name[ind_out], '"') network.mark_output(model_tensors.find(model_info.output_name[ind_out])) print("=> Building TensorRT engine. This may take a few minutes.") return builder.build_cuda_engine(network)
def retrieve_mean(mean_proto): with trt.CaffeParser() as parser: return parser.parse_binary_proto(mean_proto)