def build_engine_onnx(model_file): #创建相应的实例 with trt.Builder(TRT_LOGGER) as builder, builder.create_network( common.EXPLICIT_BATCH) as network, builder.create_builder_config( ) as config, trt.OnnxParser(network, TRT_LOGGER) as parser: #设置相应的参数 config.max_workspace_size = common.GiB(1) # Load the Onnx model and parse it in order to populate the TensorRT network. #读取相应的模型文件 with open(model_file, 'rb') as model: if not parser.parse(model.read()): print('ERROR: Failed to parse the ONNX file.') for error in range(parser.num_errors): print(parser.get_error(error)) return None #构建相应的引擎 return builder.build_engine(network, config)
def build_engine_caffe(model_file, deploy_file): # You can set the logger severity higher to suppress messages (or lower to display more messages). with trt.Builder(TRT_LOGGER) as builder, builder.create_network( ) as network, trt.CaffeParser() as parser: # Workspace size is the maximum amount of memory available to the builder while building an engine. # It should generally be set as high as possible. builder.max_workspace_size = common.GiB(1) # Load the Caffe model and parse it in order to populate the TensorRT network. # This function returns an object that we can query to find tensors by name. model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE) # For Caffe, we need to manually mark the output of the network. # Since we know the name of the output tensor, we can find it in model_tensors. network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME)) return builder.build_cuda_engine(network)
def build_int8_engine(deploy_file, model_file, calib, batch_size=32): with trt.Builder(TRT_LOGGER) as builder, builder.create_network( ) as network, trt.CaffeParser() as parser: # We set the builder batch size to be the same as the calibrator's, as we use the same batches # during inference. Note that this is not required in general, and inference batch size is # independent of calibration batch size. builder.max_batch_size = batch_size builder.max_workspace_size = common.GiB(1) builder.int8_mode = True builder.int8_calibrator = calib # Parse Caffe model model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE) network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME)) # Build engine and do int8 calibration. return builder.build_cuda_engine(network)
def build_engine_onnx_int8(TRT_LOGGER, model_file, calib): with trt.Builder(TRT_LOGGER) as builder, builder.create_network(common.EXPLICIT_BATCH) as network, trt.OnnxParser(network, TRT_LOGGER) as parser: builder.max_workspace_size = common.GiB(1) # import pdb;pdb.set_trace() builder.int8_mode = True # calibration_cache = "res50_calibration.cache" # calib = ResNetEntropyCalibrator(training_data='../datasets/ic15/train_list.txt', cache_file=calibration_cache) builder.int8_calibrator = calib # Load the Onnx model and parse it in order to populate the TensorRT network. with open(model_file, 'rb') as model: if not parser.parse(model.read()): print ('ERROR: Failed to parse the ONNX file.') for error in range(parser.num_errors): print (parser.get_error(error)) return None # import pdb;pdb.set_trace() return builder.build_cuda_engine(network)
def build_engine_with_some_missing_weights(weights): # For more information on TRT basics, refer to the introductory samples. #创建builder和network实例 with trt.Builder( TRT_LOGGER) as builder, builder.create_network() as network: #设置最大工作空间的大小,GIB的具体实现参考common.py builder.max_workspace_size = common.GiB(1) # Set the refit flag in the builder #在builder中设置refit标志位 builder.refittable = True # Populate the network using weights from the PyTorch model. #利用相应模型的权重填充tensorrt网络 #populate_network_with_some_dummy_weights的具体实现参考本文件的相应实现 populate_network_with_some_dummy_weights(network, weights) # Build and return an engine. #建立相应的引擎 #Builds an ICudaEngine from a INetworkDefinition return builder.build_cuda_engine(network)
def build_engine(): with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.CaffeParser() as parser: builder.fp16_mode = True builder.strict_type_constraints = True builder.max_batch_size = 16 # Workspace size is the maximum amount of memory available to the builder while building an engine. # It should generally be set as high as possible. builder.max_workspace_size = common.GiB(1) # Load the Caffe model and parse it in order to populate the TensorRT network. # This function returns an object that we can query to find tensors by name. model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE) # For Caffe, we need to manually mark the output of the network. # Since we know the name of the output tensor, we can find it in model_tensors. network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME)) engine = builder.build_cuda_engine(network) with open(engine_file_path, "wb") as f: f.write(engine.serialize()) return engine
def build_int8_engine(deploy_file, model_file, calib, batch_size=32): with trt.Builder(TRT_LOGGER) as builder, builder.create_network( ) as network, builder.create_builder_config() as config, trt.CaffeParser( ) as parser, trt.Runtime(TRT_LOGGER) as runtime: # We set the builder batch size to be the same as the calibrator's, as we use the same batches # during inference. Note that this is not required in general, and inference batch size is # independent of calibration batch size. builder.max_batch_size = batch_size config.max_workspace_size = common.GiB(1) config.set_flag(trt.BuilderFlag.INT8) config.int8_calibrator = calib # Parse Caffe model model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE) network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME)) # Build engine and do int8 calibration. plan = builder.build_serialized_network(network, config) return runtime.deserialize_cuda_engine(plan)
def build_engine_uff(model_file): # You can set the logger severity higher to suppress messages (or lower to display more messages). #创建相关的实例 with trt.Builder(TRT_LOGGER) as builder, builder.create_network( ) as network, builder.create_builder_config() as config, trt.UffParser( ) as parser: # Workspace size is the maximum amount of memory available to the builder while building an engine. # It should generally be set as high as possible. #设定相应的参数 config.max_workspace_size = common.GiB(1) # We need to manually register the input and output nodes for UFF. #register_input用来注册一个uff网络的输入名称和相应的维度 #注册uff的输入和输出结点 parser.register_input(ModelData.INPUT_NAME, ModelData.INPUT_SHAPE) parser.register_output(ModelData.OUTPUT_NAME) # Load the UFF model and parse it in order to populate the TensorRT network. #使用uff解析器解析模型 parser.parse(model_file, network) # Build and return an engine. #创建相关的引擎 return builder.build_engine(network, config)
def build_int8_engine(deploy_file, model_file, batch_size=32, trt_engine_datatype=trt.DataType.FLOAT): with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.CaffeParser() as parser: # We set the builder batch size to be the same as the calibrator's, as we use the same batches # during inference. Note that this is not required in general, and inference batch size is # independent of calibration batch size. builder.max_batch_size = batch_size builder.max_workspace_size = common.GiB(1) if trt_engine_datatype == trt.DataType.HALF: builder.fp16_mode = True elif trt_engine_datatype == trt.DataType.INT8: # Now we create a calibrator and give it the location of our calibration data. # We also allow it to cache calibration data for faster engine building. _, [calib_data] = common.find_sample_data(description="Runs a Caffe MNIST network in Int8 mode", subfolder="mnist", find_files=["t10k-images-idx3-ubyte"]) calibration_cache = "mnist_calibration.cache" builder.int8_mode = True builder.int8_calibrator = MNISTEntropyCalibrator(calib_data, cache_file=calibration_cache) # Parse Caffe model model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE) network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME)) # Build engine and do int8 calibration. return builder.build_cuda_engine(network)
def build_engine_caffe(model_file, deploy_file, precision): # precision: float, half, int8 # You can set the logger severity higher to suppress messages (or lower to display more messages). with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.CaffeParser() as parser: # Workspace size is the maximum amount of memory available to the builder while building an engine. # It should generally be set as high as possible. builder.max_workspace_size = common.GiB(1) # Load the Caffe model and parse it in order to populate the TensorRT network. # This function returns an object that we can query to find tensors by name. model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE) # For Caffe, we need to manually mark the output of the network. # Since we know the name of the output tensor, we can find it in model_tensors. print(model_tensors) print(ModelData.OUTPUT_NAME) print(model_tensors.find(ModelData.OUTPUT_NAME)) network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME)) if precision == "half": # enable fp16 (chenrong06) builder.fp16_mode = True builder.strict_type_constraints = True print("pricision: half") elif precision == "int8": # enable int8 and set quantize (chenrong06) # Incomplete version, please refer to workspace/tensorrt/samples/sampleINT8API/sampleINT8API.cpp builder.int8_mode = True builder.int8_calibrator = None builder.strict_type_constraints = True print(network.num_layers) for i in range(network.num_layers): layer = network[i] tensor = layer.get_output(0) tensor.set_dynamic_range(-1.0, 1.0) tensor = layer.get_input(0) tensor.set_dynamic_range(-1.0, 1.0) print("pricision: int8") else: print("pricision: float") return builder.build_cuda_engine(network)
def build_int8_engine(onnx_file_path, calib, batch_size=32): # with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, builder.create_builder_config() as config, trt.CaffeParser() as parser: EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) with trt.Builder(TRT_LOGGER) as builder, builder.create_network(common.EXPLICIT_BATCH) as network, \ builder.create_builder_config() as config, trt.OnnxParser(network, TRT_LOGGER) as parser: # We set the builder batch size to be the same as the calibrator's, as we use the same batches # during inference. Note that this is not required in general, and inference batch size is # independent of calibration batch size. builder.max_batch_size = batch_size config.max_workspace_size = common.GiB(1) config.set_flag(trt.BuilderFlag.INT8) config.set_flag(trt.BuilderFlag.STRICT_TYPES) config.int8_calibrator = calib # Parse Onnx model with open(onnx_file_path, 'rb') as model: print('Beginning ONNX file parsing') if not parser.parse(model.read()): print('ERROR: Failed to parse the ONNX file.') for error in range(parser.num_errors): print(parser.get_error(error)) return None network.get_input(0).shape = [batch_size, 3, 32, 32] # Decide which layers fallback to FP32. # If all layers fallback to FP32, you can use 'index>-1' for index, layer in enumerate(network): print('layer index', index, ':', layer.type) if index < 10: if layer.type == trt.LayerType.ACTIVATION or \ layer.type == trt.LayerType.CONVOLUTION or \ layer.type == trt.LayerType.FULLY_CONNECTED or \ layer.type == trt.LayerType.SCALE: print('fallback to fp32!') layer.precision = trt.float32 layer.set_output_type(0, trt.float32) # Build engine and do int8 calibration. return builder.build_engine(network, config)
def build_engine_onnx(model_file): with trt.Builder(TRT_LOGGER) as builder, builder.create_network( ) as network, trt.OnnxParser(network, TRT_LOGGER) as parser: builder.max_workspace_size = common.GiB(1) builder.max_batch_size = args.batch_size # Load the Onnx model and parse it in order to populate the TensorRT network. with open(model_file, 'rb') as model: ok = parser.parse(model.read()) if not ok: print("Error: Parse onnx model \"{}\" failed.".format( model_file)) error = parser.get_error(0) print(" code: {}".format(error.code())) print(" desc: {}".format(error.desc())) print(" file: {}".format(error.file())) print(" func: {}".format(error.func())) print(" line: {}".format(error.line())) print(" node: {}".format(error.node())) exit(-1) if args.q: # enable int8 and set quantize (chenrong06) # Incomplete version, please refer to workspace/tensorrt/samples/sampleINT8API/sampleINT8API.cpp builder.int8_mode = True builder.int8_calibrator = None builder.strict_type_constraints = True # print(network.num_layers) for i in range(network.num_layers): layer = network[i] tensor = layer.get_output(0) if tensor: tensor.set_dynamic_range(-1.0, 1.0) tensor = layer.get_input(0) if tensor: tensor.set_dynamic_range(-1.0, 1.0) # print("pricision: int8") return builder.build_cuda_engine(network)
def test_trt_export(model_name=ONNX_MODEL_NAME): import tensorrt as trt TRT_LOGGER = trt.Logger(trt.Logger.WARNING) trt.init_libnvinfer_plugins(TRT_LOGGER, '') with trt.Builder(TRT_LOGGER) as builder, builder.create_network( common.EXPLICIT_BATCH) as network, trt.OnnxParser( network, TRT_LOGGER) as parser: builder.max_workspace_size = common.GiB(1) builder.fp16_mode = False builder.max_batch_size = 1 with open(model_name, 'rb') as model: if not parser.parse(model.read()): print('ERROR: Failed to parse the ONNX file.') for error in range(parser.num_errors): print(parser.get_error(error)) return None engine = builder.build_cuda_engine(network) print("CUDA engine build successfully!") return engine
def build_engine_onnx(model_file): with trt.Builder(TRT_LOGGER) as builder, builder.create_network( common.EXPLICIT_BATCH) as network, trt.OnnxParser( network, TRT_LOGGER) as parser: builder.max_workspace_size = common.GiB(1) builder.fp16_mode = True builder.max_batch_size = 1 # always 1 for explicit batch config = builder.create_builder_config() # need to be set along with fp16_mode if config is specified. config.set_flag(trt.BuilderFlag.FP16) profile = builder.create_optimization_profile() profile.set_shape('input', (1, 1, 4, 4), (2, 1, 4, 4), (4, 1, 4, 4)) profile.set_shape('grid', (1, 4, 4, 2), (2, 4, 4, 2), (4, 4, 4, 2)) config.add_optimization_profile(profile) # Load the Onnx model and parse it in order to populate the TensorRT network. with open(model_file, 'rb') as model: if not parser.parse(model.read()): print('ERROR: Failed to parse the ONNX file.') for error in range(parser.num_errors): print(parser.get_error(error)) return None return builder.build_engine(network, config)
def build_engine_onnx(model_file, calibrator=None): with trt.Builder(TRT_LOGGER) as builder, builder.create_network( ) as network, trt.OnnxParser(network, TRT_LOGGER) as parser: builder.max_workspace_size = common.GiB(1) builder.max_batch_size = 8 precision = "fp32" if calibrator: builder.int8_mode = True builder.int8_calibrator = calibrator precision = "int8" else: builder.fp16_mode = True precision = "fp16" # Load the Onnx model and parse it in order to populate the TensorRT network. with open(model_file, 'rb') as model: parser.parse(model.read()) engine = builder.build_cuda_engine(network) serialized = engine.serialize() with open( "/work/models/flowers-152-b{}-{}.engine".format( builder.max_batch_size, precision), "wb") as file: file.write(serialized) return engine
def build_int8_engine(deploy_file, model_file, calib, batch_size=32): #创建相关的实例 with trt.Builder(TRT_LOGGER) as builder, builder.create_network( ) as network, builder.create_builder_config() as config, trt.CaffeParser( ) as parser: # We set the builder batch size to be the same as the calibrator's, as we use the same batches # during inference. Note that this is not required in general, and inference batch size is # independent of calibration batch size. #指定相关的参数 builder.max_batch_size = batch_size config.max_workspace_size = common.GiB(1) config.set_flag(trt.BuilderFlag.INT8) config.int8_calibrator = calib # Parse Caffe model #使用caffe解析器解析模型,解析一个prototxt文件和一个binaryproto Caffe模型,分别提取网络定义和与网络相关的权值。 model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE) #标记网络的输出 network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME)) # Build engine and do int8 calibration. #构建相应的引擎 return builder.build_engine(network, config)
def build_engine(model_dir): """Build TensorRT engine through the Python API. Args: model_dir: the trained TensorFlow PSENet model dir. Returns: engine: the build TensorRT engine. """ ckpt = tf.train.get_checkpoint_state(model_dir) ckpt_path = ckpt.model_checkpoint_path reader = pywrap_tensorflow.NewCheckpointReader(ckpt_path) explicit_flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) with trt.Builder(TRT_LOGGER) as builder, builder.create_network( explicit_flag) as network, builder.create_builder_config( ) as config: data = network.add_input(INPUT_NAME, trt.float32, (-1, 3, -1, -1)) w = reader.get_tensor("resnet_v1_50/conv1/weights").transpose( 3, 2, 0, 1).reshape(-1) b = np.zeros(64, dtype=np.float32) conv1 = network.add_convolution(data, 64, (7, 7), trt.Weights(w), trt.Weights(b)) conv1.stride = (2, 2) conv1.padding = (3, 3) bn1 = add_batchnorm(reader, network, conv1.get_output(0), "resnet_v1_50/conv1/BatchNorm/", 1e-5) relu1 = network.add_activation(bn1.get_output(0), trt.ActivationType.RELU) # C2 pool1 = network.add_pooling(relu1.get_output(0), trt.PoolingType.MAX, (3, 3)) pool1.stride = (2, 2) pool1.pre_padding = (0, 0) pool1.post_padding = (1, 1) x = bottleneck(reader, network, pool1.get_output(0), 64, 1, "resnet_v1_50/block1/unit_1/bottleneck_v1/", 1) x = bottleneck(reader, network, x.get_output(0), 64, 1, "resnet_v1_50/block1/unit_2/bottleneck_v1/", 0) # C3 block1 = bottleneck(reader, network, x.get_output(0), 64, 2, "resnet_v1_50/block1/unit_3/bottleneck_v1/", 2) x = bottleneck(reader, network, block1.get_output(0), 128, 1, "resnet_v1_50/block2/unit_1/bottleneck_v1/", 1) x = bottleneck(reader, network, x.get_output(0), 128, 1, "resnet_v1_50/block2/unit_2/bottleneck_v1/", 0) x = bottleneck(reader, network, x.get_output(0), 128, 1, "resnet_v1_50/block2/unit_3/bottleneck_v1/", 0) # C4 block2 = bottleneck(reader, network, x.get_output(0), 128, 2, "resnet_v1_50/block2/unit_4/bottleneck_v1/", 2) x = bottleneck(reader, network, block2.get_output(0), 256, 1, "resnet_v1_50/block3/unit_1/bottleneck_v1/", 1) x = bottleneck(reader, network, x.get_output(0), 256, 1, "resnet_v1_50/block3/unit_2/bottleneck_v1/", 0) x = bottleneck(reader, network, x.get_output(0), 256, 1, "resnet_v1_50/block3/unit_3/bottleneck_v1/", 0) x = bottleneck(reader, network, x.get_output(0), 256, 1, "resnet_v1_50/block3/unit_4/bottleneck_v1/", 0) x = bottleneck(reader, network, x.get_output(0), 256, 1, "resnet_v1_50/block3/unit_5/bottleneck_v1/", 0) block3 = bottleneck(reader, network, x.get_output(0), 256, 2, "resnet_v1_50/block3/unit_6/bottleneck_v1/", 2) x = bottleneck(reader, network, block3.get_output(0), 512, 1, "resnet_v1_50/block4/unit_1/bottleneck_v1/", 1) x = bottleneck(reader, network, x.get_output(0), 512, 1, "resnet_v1_50/block4/unit_2/bottleneck_v1/", 0) # C5 block4 = bottleneck(reader, network, x.get_output(0), 512, 1, "resnet_v1_50/block4/unit_3/bottleneck_v1/", 0) build_p5_r1 = add_conv_relu(reader, network, block4.get_output(0), 256, 1, 1, "build_feature_pyramid/build_P5/") build_p4_r1 = add_conv_relu( reader, network, block2.get_output(0), 256, 1, 1, "build_feature_pyramid/build_P4/reduce_dimension/") bfp_layer4_resize = network.add_resize(build_p5_r1.get_output(0)) build_p4_r1_shape = network.add_shape( build_p4_r1.get_output(0)).get_output(0) bfp_layer4_resize.set_input(1, build_p4_r1_shape) bfp_layer4_resize.resize_mode = trt.ResizeMode.NEAREST bfp_layer4_resize.align_corners = False bfp_add = network.add_elementwise(build_p4_r1.get_output(0), bfp_layer4_resize.get_output(0), trt.ElementWiseOperation.SUM) build_p4_r2 = add_conv_relu( reader, network, bfp_add.get_output(0), 256, 3, 1, "build_feature_pyramid/build_P4/avoid_aliasing/") build_p3_r1 = add_conv_relu( reader, network, block1.get_output(0), 256, 1, 1, "build_feature_pyramid/build_P3/reduce_dimension/") bfp_layer3_resize = network.add_resize(build_p4_r2.get_output(0)) bfp_layer3_resize.resize_mode = trt.ResizeMode.NEAREST build_p3_r1_shape = network.add_shape( build_p3_r1.get_output(0)).get_output(0) bfp_layer3_resize.set_input(1, build_p3_r1_shape) bfp_layer3_resize.align_corners = False bfp_add1 = network.add_elementwise(bfp_layer3_resize.get_output(0), build_p3_r1.get_output(0), trt.ElementWiseOperation.SUM) build_p3_r2 = add_conv_relu( reader, network, bfp_add1.get_output(0), 256, 3, 1, "build_feature_pyramid/build_P3/avoid_aliasing/") build_p2_r1 = add_conv_relu( reader, network, pool1.get_output(0), 256, 1, 1, "build_feature_pyramid/build_P2/reduce_dimension/") bfp_layer2_resize = network.add_resize(build_p3_r2.get_output(0)) bfp_layer2_resize.resize_mode = trt.ResizeMode.NEAREST build_p2_r1_shape = network.add_shape( build_p2_r1.get_output(0)).get_output(0) bfp_layer2_resize.set_input(1, build_p2_r1_shape) bfp_layer2_resize.align_corners = False bfp_add2 = network.add_elementwise(bfp_layer2_resize.get_output(0), build_p2_r1.get_output(0), trt.ElementWiseOperation.SUM) # P2 build_p2_r2 = add_conv_relu( reader, network, bfp_add2.get_output(0), 256, 3, 1, "build_feature_pyramid/build_P2/avoid_aliasing/") build_p2_r2_shape = network.add_shape( build_p2_r2.get_output(0)).get_output(0) # P3 x2 layer1_resize = network.add_resize(build_p3_r2.get_output(0)) layer1_resize.resize_mode = trt.ResizeMode.LINEAR layer1_resize.set_input(1, build_p2_r2_shape) layer1_resize.align_corners = False # P4 x4 layer2_resize = network.add_resize(build_p4_r2.get_output(0)) layer2_resize.resize_mode = trt.ResizeMode.LINEAR layer2_resize.set_input(1, build_p2_r2_shape) layer2_resize.align_corners = False # p5 right # P5 x8 layer3_resize = network.add_resize(build_p5_r1.get_output(0)) layer3_resize.resize_mode = trt.ResizeMode.LINEAR layer3_resize.set_input(1, build_p2_r2_shape) layer3_resize.align_corners = False # C(P5,P4,P3,P2) concat = network.add_concatenation([ layer3_resize.get_output(0), layer2_resize.get_output(0), layer1_resize.get_output(0), build_p2_r2.get_output(0), ]) w = reader.get_tensor("feature_results/Conv/weights").transpose( 3, 2, 0, 1).reshape(-1) b = np.zeros(256, dtype=np.float32) feature_result_conv = network.add_convolution(concat.get_output(0), 256, (3, 3), trt.Weights(w), trt.Weights(b)) feature_result_conv.padding = (1, 1) feature_result_bn = add_batchnorm(reader, network, feature_result_conv.get_output(0), "feature_results/Conv/BatchNorm/", 1e-5) feature_result_relu = network.add_activation( feature_result_bn.get_output(0), trt.ActivationType.RELU) w = reader.get_tensor("feature_results/Conv_1/weights").transpose( 3, 2, 0, 1).reshape(-1) b = reader.get_tensor("feature_results/Conv_1/biases") feature_result_conv_1 = network.add_convolution( feature_result_relu.get_output(0), 6, (1, 1), trt.Weights(w), trt.Weights(b)) sigmoid = network.add_activation(feature_result_conv_1.get_output(0), trt.ActivationType.SIGMOID) sigmoid.get_output(0).name = OUTPUT_NAME network.mark_output(sigmoid.get_output(0)) profile = builder.create_optimization_profile() profile.set_shape("input", min=(1, 3, 128, 128), opt=(1, 3, 640, 640), max=(4, 3, 1200, 1200)) config.add_optimization_profile(profile) config.max_workspace_size = common.GiB(1) if USE_FP16: config_flags = 1 << int(trt.BuilderFlag.FP16) config.flags = config_flags engine = builder.build_engine(network, config) return engine