def get_engine(onnx_file_path, engine_file_path=""): """Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it.""" def build_engine(): """Takes an ONNX file and creates a TensorRT engine to run inference with""" with trt.Builder(TRT_LOGGER) as builder, builder.create_network( ) as network, trt.OnnxParser(network, TRT_LOGGER) as parser: builder.max_workspace_size = 1 << 28 # 256MiB builder.max_batch_size = 1 # Parse model file if not os.path.exists(onnx_file_path): print( 'ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.' .format(onnx_file_path)) exit(0) print('Loading ONNX file from path {}...'.format(onnx_file_path)) with open(onnx_file_path, 'rb') as model: print('Beginning ONNX file parsing') #parser.parse(model.read()) #parser.parse returns a bool, and we were not checking it originally. if not parser.parse(model.read()): print(parser.get_error(0)) print( network.get_layer(network.num_layers - 1).get_output(0).shape) network.mark_output( network.get_layer(network.num_layers - 1).get_output(0)) print('Completed parsing of ONNX file') print('Building an engine from file {}; this may take a while...'. format(onnx_file_path)) engine = builder.build_cuda_engine(network) print("Completed creating Engine") print(engine) with open(engine_file_path, "wb") as f: f.write(engine.serialize()) return engine if os.path.exists(engine_file_path): # If a serialized engine exists, use it instead of building an engine. print("Reading engine from file {}".format(engine_file_path)) with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime: return runtime.deserialize_cuda_engine(f.read()) else: return build_engine()
def get_engine(onnx_file_path, engine_file_path, input_size, rebuild=True): """Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it.""" print("Explicit batch: ", common.EXPLICIT_BATCH) max_batch_size = 1 def build_engine(): """Takes an ONNX file and creates a TensorRT engine to run inference with""" with trt.Builder(TRT_LOGGER) as builder, builder.create_network( common.EXPLICIT_BATCH) as network, trt.OnnxParser( network, TRT_LOGGER) as parser: builder.max_workspace_size = 1 << 30 # 2048MB? builder.max_batch_size = max_batch_size #builder.int8_mode = True # Parse model file if not os.path.exists(onnx_file_path): print('ONNX file {} not found.'.format(onnx_file_path)) exit(0) print('Loading ONNX file from path {}...'.format(onnx_file_path)) with open(onnx_file_path, 'rb') as model: print('Beginning ONNX file parsing') if not parser.parse(model.read()): print('ERROR: Failed to parse the ONNX file.') for error in range(parser.num_errors): print(parser.get_error(error)) return None # The actual yolov3.onnx is generated with batch size 64. Reshape input to batch size 1 print("jee") network.get_input(0).shape = [max_batch_size] + input_size print('Completed parsing of ONNX file') print('Building an engine from file {}; this may take a while...'. format(onnx_file_path)) engine = builder.build_cuda_engine(network) print("Completed creating Engine") with open(engine_file_path, "wb") as f: f.write(engine.serialize()) return engine if os.path.exists(engine_file_path) and not rebuild: # If a serialized engine exists, use it instead of building an engine. print("Reading engine from file {}".format(engine_file_path)) with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime: return runtime.deserialize_cuda_engine(f.read()) else: return build_engine()
def __init__(self, engine_file_path, num_class): # Create a Context on this device, self.cfx = cuda.Device(0).make_context() stream = cuda.Stream() TRT_LOGGER = trt.Logger(trt.Logger.INFO) runtime = trt.Runtime(TRT_LOGGER) # Deserialize the engine from file with open(engine_file_path, "rb") as f: engine = runtime.deserialize_cuda_engine(f.read()) context = engine.create_execution_context() host_inputs = [] cuda_inputs = [] host_outputs = [] cuda_outputs = [] bindings = [] for binding in engine: size = trt.volume( engine.get_binding_shape(binding)) * engine.max_batch_size dtype = trt.nptype(engine.get_binding_dtype(binding)) # Allocate host and device buffers host_mem = cuda.pagelocked_empty(size, dtype) cuda_mem = cuda.mem_alloc(host_mem.nbytes) # Append the device buffer to device bindings. bindings.append(int(cuda_mem)) # Append to the appropriate list. if engine.binding_is_input(binding): host_inputs.append(host_mem) cuda_inputs.append(cuda_mem) else: host_outputs.append(host_mem) cuda_outputs.append(cuda_mem) # Store self.stream = stream self.context = context self.engine = engine self.host_inputs = host_inputs self.cuda_inputs = cuda_inputs self.host_outputs = host_outputs self.cuda_outputs = cuda_outputs self.bindings = bindings self.num_class = num_class
def build_engine(weights, engine_file_path): if os.path.exists(engine_file_path): with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime: return runtime.deserialize_cuda_engine(f.read()) # For more information on TRT basics, refer to the introductory samples. with trt.Builder( TRT_LOGGER) as builder, builder.create_network() as network: builder.max_workspace_size = 2 << 30 # 4 GiB # Populate the network using weights from the PyTorch model. populate_network(network, weights) # Build and return an engine. engine = builder.build_cuda_engine(network) assert engine is not None print("Completed creating Engine") with open(engine_file_path, "wb") as f: f.write(engine.serialize()) return engine
def _load_from_state_dict( self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs, ): engine_bytes = state_dict[prefix + "engine"] with trt.Logger() as logger, trt.Runtime(logger) as runtime: self.engine = runtime.deserialize_cuda_engine(engine_bytes) self.context = self.engine.create_execution_context() self.input_names = state_dict[prefix + "input_names"] self.output_names = state_dict[prefix + "output_names"]
def create_engine(self): print("creating engine") if not os.path.exists(self.uff_file_name): print(self.uff_file_name, "not found. create new uff file") self.convert_to_uff() if not os.path.exists(self.engine_file_name): print(self.engine_file_name, "not found. build new engine") engine = self.build_engine() with open(self.engine_file_name, "wb") as f: f.write(engine.serialize()) else: print(self.engine_file_name, "found. reuse engine") with open(self.engine_file_name, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime: engine = runtime.deserialize_cuda_engine(f.read()) self.engine = engine print("engine created")
def get_engine(model_path: str): """ Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it. """ if os.path.exists(model_path): if model_path.endswith('trt'): print(f"Reading engine from file {model_path}") with open(model_path, 'rb') as f, trt.Runtime(TRT_LOGGER) as runtime: return runtime.deserialize_cuda_engine(f.read()) elif model_path.endswith('onnx'): build_engine(model_path) else: print("Invalid File: Only .onnx and .trt are supported.") else: print(f"FILE: {model_path} not found.")
def _serialize_deserialize(self, trt_engine): if USE_PYBIND: self.runtime = trt.Runtime(TRT_LOGGER) else: self.runtime = trt.infer.create_infer_runtime(self._logger) self.plugin_factory = parser_runtime.create_plugin_factory( self._logger) serialized_engine = trt_engine.serialize() del self.parser # Parser no longer needed for ownership of plugins if USE_PYBIND: trt_engine = self.runtime.deserialize_cuda_engine( serialized_engine) else: trt_engine = self.runtime.deserialize_cuda_engine( serialized_engine, self.plugin_factory) return trt_engine
def __init__(self, trt_model_path, device, img_mean=np.array([128, 128, 128], dtype=np.float32), img_scale=np.float32(1 / 255)): assert device == 'GPU', 'Only supports GPU.' TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE) # runtime = trt.Runtime(TRT_LOGGER) with open(trt_model_path, 'rb') as f, trt.Runtime(TRT_LOGGER) as runtime: self.engine = runtime.deserialize_cuda_engine(f.read()) self.img_mean = img_mean self.img_scale = img_scale # self.inputs, self.outputs, self.bindings, self.stream = self._allocate_buffers(self.engine) self.inputs, self.outputs, self.bindings, self.stream = common.allocate_buffers( self.engine) self.context = self.engine.create_execution_context()
def __init__(self, build_engine_proc=None, build_engine_params=None, engine_file_path=None): tensorrt.init_libnvinfer_plugins(None, "") logger = tensorrt.Logger(tensorrt.Logger.INFO) if engine_file_path is None: with tensorrt.Builder(logger) as builder: if build_engine_params is not None: self.engine = build_engine_proc(builder, *build_engine_params) else: self.engine = build_engine_proc(builder) else: with open(engine_file_path, 'rb') as f, tensorrt.Runtime(logger) as runtime: self.engine = runtime.deserialize_cuda_engine(f.read()) self.context = self.engine.create_execution_context()
def __init__(self, model): print('setting up Yolov5s-simple.trt processor') # load tensorrt engine TRT_LOGGER = trt.Logger(trt.Logger.INFO) TRTbin = '{0}/models/{1}'.format(os.path.dirname(__file__), model) with open(TRTbin, 'rb') as f, trt.Runtime(TRT_LOGGER) as runtime: engine = runtime.deserialize_cuda_engine(f.read()) self.context = engine.create_execution_context() # allocate memory inputs, outputs, bindings = [], [], [] stream = cuda.Stream() for binding in engine: size = trt.volume(engine.get_binding_shape(binding)) dtype = trt.nptype(engine.get_binding_dtype(binding)) host_mem = cuda.pagelocked_empty(size, dtype) device_mem = cuda.mem_alloc(host_mem.nbytes) bindings.append(int(device_mem)) if engine.binding_is_input(binding): inputs.append({'host': host_mem, 'device': device_mem}) else: outputs.append({'host': host_mem, 'device': device_mem}) # save to class self.inputs = inputs self.outputs = outputs self.bindings = bindings self.stream = stream # post processing config filters = (80 + 5) * 3 self.output_shapes = [(1, 3, 80, 80, 85), (1, 3, 40, 40, 85), (1, 3, 20, 20, 85)] self.strides = np.array([8., 16., 32.]) anchors = np.array([ [[10, 13], [16, 30], [33, 23]], [[30, 61], [62, 45], [59, 119]], [[116, 90], [156, 198], [373, 326]], ]) self.nl = len(anchors) self.nc = 80 # classes self.no = self.nc + 5 # outputs per anchor self.na = len(anchors[0]) a = anchors.copy().astype(np.float32) a = a.reshape(self.nl, -1, 2) self.anchors = a.copy() self.anchor_grid = a.copy().reshape(self.nl, 1, -1, 1, 1, 2)
def test_calibrator_outside_polygraphy(self, identity_builder_network): builder, network = identity_builder_network NUM_BATCHES = 2 config = builder.create_builder_config() config.set_flag(trt.BuilderFlag.INT8) with Calibrator(generate_data(NUM_BATCHES)) as calibrator: config.int8_calibrator = calibrator if mod.version(trt.__version__) < mod.version("8.0"): engine = builder.build_engine(network, config) else: with trt.Runtime(get_trt_logger()) as runtime: engine = runtime.deserialize_cuda_engine( builder.build_serialized_network(network, config)) with engine: assert engine self.check_calibrator_cleanup(calibrator)
def get_engine(onnx_file_path, engine_file_path=""): """Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it.""" def build_engine(): """Takes an ONNX file and creates a TensorRT engine to run inference with""" with trt.Builder(TRT_LOGGER) as builder, builder.create_network( common.EXPLICIT_BATCH ) as network, builder.create_builder_config( ) as config, trt.OnnxParser(network, TRT_LOGGER) as parser: config.max_workspace_size = 1 << 28 # 256MiB builder.max_batch_size = 1 # Parse model file if not os.path.exists(onnx_file_path): print( 'ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.' .format(onnx_file_path)) exit(0) print('Loading ONNX file from path {}...'.format(onnx_file_path)) with open(onnx_file_path, 'rb') as model: print('Beginning ONNX file parsing') if not parser.parse(model.read()): print('ERROR: Failed to parse the ONNX file.') for error in range(parser.num_errors): print(parser.get_error(error)) return None # The actual yolov3.onnx is generated with batch size 64. Reshape input to batch size 1 network.get_input(0).shape = [1, 3, 608, 608] print('Completed parsing of ONNX file') print('Building an engine from file {}; this may take a while...'. format(onnx_file_path)) engine = builder.build_engine(network, config) print("Completed creating Engine") with open(engine_file_path, "wb") as f: f.write(engine.serialize()) return engine if os.path.exists(engine_file_path): # If a serialized engine exists, use it instead of building an engine. print("Reading engine from file {}".format(engine_file_path)) with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime: return runtime.deserialize_cuda_engine(f.read()) else: return build_engine()
def _load_from_state_dict( self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs, ): engine_bytes = state_dict[prefix + "engine"] logger = trt.Logger() runtime = trt.Runtime(logger) self.engine = runtime.deserialize_cuda_engine(engine_bytes) self.input_names = state_dict[prefix + "input_names"] self.output_names = state_dict[prefix + "output_names"] self._initialize()
def __init__(self, config): self.detection_threshold = config["detection_threshold"] self.nms_threshold = config["nms_threshold"] self.engine_path = config["engine_file"] self.class_names = load_class_names(config["names_file"]) self.logger = trt.Logger() self.runtime = trt.Runtime(self.logger) print("Reading engine from file {}".format(self.engine_path)) with open(self.engine_path, "rb") as f: self.engine = self.runtime.deserialize_cuda_engine(f.read()) self.context = self.engine.create_execution_context() self.buffers = self._allocate_buffers(self.engine, 1) self.input_h = 608 # Set in yolov4-facemask.cfg self.input_w = 608 self.context.set_binding_shape(0, (1, 3, self.input_h, self.input_w))
def _load_engine(self, model_path): if not model_path: logging.info( "you didn't specify the model file so the COCO pretrained model will be used" ) base_url = "https://github.com/Tony607/jetson_nano_trt_tf_ssd/raw/master/packages/jetpack4.3/" base_dir = "detectors/data/" model_file = "TRT_ssd_mobilenet_v2_coco.bin" model_path = os.path.join(base_dir, model_file) if not os.path.isfile(model_path): logging.info( 'model does not exist under: {}, downloading from {}'. format(str(model_path), base_url + model_file)) os.makedirs(base_dir, exist_ok=True) wget.download(base_url + model_file, model_path) """ Load engine file as a trt Runtime. """ with open(model_path, 'rb') as f, trt.Runtime(self.trt_logger) as runtime: return runtime.deserialize_cuda_engine(f.read())
def build_engine(onnx_path, using_half): engine_file = onnx_path.replace(".onnx", ".engine") if os.path.exists(engine_file): with open(engine_file, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime: return runtime.deserialize_cuda_engine(f.read()) with trt.Builder(TRT_LOGGER) as builder, builder.create_network(EXPLICIT_BATCH) as network, trt.OnnxParser(network, TRT_LOGGER) as parser: builder.max_batch_size = 1 # always 1 for explicit batch config = builder.create_builder_config() config.max_workspace_size = GiB(1) if using_half: config.set_flag(trt.BuilderFlag.FP16) # Load the Onnx model and parse it in order to populate the TensorRT network. with open(onnx_path, 'rb') as model: if not parser.parse(model.read()): print ('ERROR: Failed to parse the ONNX file.') for error in range(parser.num_errors): print (parser.get_error(error)) return None return builder.build_engine(network, config)
def get_engine(onnx_file_path, engine_file_path="", fp16_mode=False, overwrite=False): """Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it.""" def build_engine(): """Takes an ONNX file and creates a TensorRT engine to run inference with""" with trt.Builder(TRT_LOGGER) as builder, builder.create_network( ) as network, trt.OnnxParser(network, TRT_LOGGER) as parser: builder.max_workspace_size = 1 << 28 # 256MB builder.max_batch_size = 1 if fp16_mode: print('Using FP16 mode') builder.fp16_mode = True # Parse model file if not os.path.exists(onnx_file_path): print( 'ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.' .format(onnx_file_path)) exit(0) print('Loading ONNX file from path {}...'.format(onnx_file_path)) with open(onnx_file_path, 'rb') as model: print('Beginning ONNX file parsing') parser.parse(model.read()) print('Completed parsing of ONNX file') print('Building an engine from file {}; this may take a while...'. format(onnx_file_path)) engine = builder.build_cuda_engine(network) print("Completed creating Engine") with open(engine_file_path, "wb") as f: f.write(engine.serialize()) print( 'Saving TensorRT file to path {}...'.format(engine_file_path)) return engine if os.path.exists(engine_file_path) and not overwrite: # If a serialized engine exists, use it instead of building an engine. print("Reading engine from file {}".format(engine_file_path)) with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime: return runtime.deserialize_cuda_engine(f.read()) else: return build_engine()
def __init__(self, model, anchor_nums, nc, anchors, output_shapes, img_size): # load tensorrt engine self.cfx = cuda.Device(0).make_context() TRT_LOGGER = trt.Logger(trt.Logger.INFO) TRTbin = model # print('trtbin', TRTbin) runtime = trt.Runtime(TRT_LOGGER) with open(TRTbin, 'rb') as f: engine = runtime.deserialize_cuda_engine(f.read()) self.context = engine.create_execution_context() # allocate memory inputs, outputs, bindings = [], [], [] stream = cuda.Stream() for binding in engine: size = trt.volume( engine.get_binding_shape(binding)) * engine.max_batch_size dtype = trt.nptype(engine.get_binding_dtype(binding)) host_mem = cuda.pagelocked_empty(size, dtype) device_mem = cuda.mem_alloc(host_mem.nbytes) bindings.append(int(device_mem)) if engine.binding_is_input(binding): inputs.append({'host': host_mem, 'device': device_mem}) else: outputs.append({'host': host_mem, 'device': device_mem}) # save to class self.inputs = inputs self.outputs = outputs self.bindings = bindings self.stream = stream self.anchor_nums = anchor_nums self.nc = nc # classes self.no = self.nc + 5 # outputs per anchor # post processing config self.output_shapes = output_shapes self.strides = np.array([8., 16., 32.]) self.na = len(anchors[0]) self.nl = len(anchors) self.img_size = img_size a = anchors.copy().astype(np.float32) a = a.reshape(self.nl, -1, 2) self.anchors = a.copy() self.anchor_grid = a.copy().reshape(self.nl, 1, -1, 1, 1, 2)
def build_engine(): """Takes an ONNX file and creates a TensorRT engine to run inference with""" builder = trt.Builder(TRT_LOGGER) network = builder.create_network(common.EXPLICIT_BATCH) parser = trt.OnnxParser(network, TRT_LOGGER) runtime = trt.Runtime(TRT_LOGGER) # Parse model file print('Loading ONNX file from path {}...'.format(onnx_file_path)) with open(onnx_file_path, 'rb') as model: print('Beginning ONNX file parsing') if not parser.parse(model.read()): print('ERROR: Failed to parse the ONNX file.') for error in range(parser.num_errors): print(parser.get_error(error)) return None print('Completed parsing of ONNX file') # Print input info print('Network inputs:') for i in range(network.num_inputs): tensor = network.get_input(i) print(tensor.name, trt.nptype(tensor.dtype), tensor.shape) network.get_input(0).shape = [10, 1] network.get_input(1).shape = [10, 1, 1, 16] network.get_input(2).shape = [6, 1] network.get_input(3).shape = [6, 1, 1, 16] config = builder.create_builder_config() config.set_flag(trt.BuilderFlag.REFIT) config.max_workspace_size = 1 << 28 # 256MiB print( 'Building an engine from file {}; this may take a while...'.format( onnx_file_path)) plan = builder.build_serialized_network(network, config) engine = runtime.deserialize_cuda_engine(plan) print("Completed creating Engine") with open(engine_file_path, "wb") as f: f.write(plan) return engine
def get_engine(onnx_file_path, engine_file_path=""): """ Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it. """ print("hello world") def build_engine(): """ Takes an ONNX file and creates a TensorRT engine to run inference with """ with trt.Builder(TRT_LOGGER) as builder, builder.create_network( ) as network, trt.OnnxParser(network, TRT_LOGGER) as parser: builder.max_workspace_size = 2 << 30 # 1GB builder.max_batch_size = 1 # Parse model file if not os.path.exists(onnx_file_path): print( 'ONNX file {} not found, please run platedetection2onnx.py first to generate it.' .format(onnx_file_path)) exit(0) print('Loading ONNX file from path {}...'.format(onnx_file_path)) with open(onnx_file_path, 'rb') as model: print('Beginning ONNX file parsing') parser.parse(model.read()) print('Completed parsing of ONNX file') print('Building an engine from file {}; this may take a while...'. format(onnx_file_path)) engine = builder.build_cuda_engine(network) print(engine) print('Completed creating Engine') with open(engine_file_path, 'wb') as f: f.write(engine.serialize()) return engine if os.path.exists(engine_file_path): # If a serialized engine exists, use it instead of building an engine. print('Reading engine from file {}'.format(engine_file_path)) with open(engine_file_path, 'rb') as f, trt.Runtime(TRT_LOGGER) as runtime: return runtime.deserialize_cuda_engine(f.read()) else: return build_engine()
def main(): arr = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, \ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, \ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, \ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, \ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, \ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, \ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, \ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, \ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, \ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, \ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, \ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, \ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0 ]) print(arr) # with build_engine() as engine: # # Build an engine, allocate buffers and create a stream. # # For more information on buffer allocation, refer to the introductory samples. # h_input, d_input, h_output, d_output, stream = allocate_buffers(engine) # np.copyto(h_input, arr) # # print("debug") # with engine.create_execution_context() as context: # do_inference(context, h_input, d_input, h_output, d_output, stream) # print(h_output) # save_engine = os.path.join(os.path.dirname(__file__), "sample.engine") # with build_engine() as engine: # with open(save_engine, "wb") as f: # f.write(engine.serialize()) save_engine = os.path.join(os.path.dirname(__file__), "sample.engine") with open(save_engine, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime: engine = runtime.deserialize_cuda_engine(f.read()) h_input, d_input, h_output, d_output, stream = allocate_buffers(engine) np.copyto(h_input, arr) # print("debug") with engine.create_execution_context() as context: do_inference(context, h_input, d_input, h_output, d_output, stream) print(h_output)
def main(): common.add_help(description="Yeah!") # Get the PyTorch weights weights = torch.load('mobilenetv3_centernet162_910.pth', map_location={'cuda:0': 'cpu'}) mobilenetv3 = get_pose_net({'hm': 2, 'wh': 2, 'reg': 2}) mobilenetv3.load_state_dict(weights, strict=False) mobilenetv3.eval() # Do inference with TensorRT. with MobileNetv3(weights).engine as engine: # Build an engine, allocate buffers and create a stream. # For more information on buffer allocation, refer to the introductory samples. with open('mobilenetv3-centernet.trt', "wb") as f: f.write(engine.serialize()) with open('mobilenetv3.trt', "rb") as f, trt.Runtime(TRT_LOGGER) as runtime: engine = runtime.deserialize_cuda_engine(f.read()) inputs, outputs, bindings, stream = common.allocate_buffers(engine) with engine.create_execution_context() as context: t = 0 for _ in range(1): img = load_random_test_case( pagelocked_buffer=inputs[0].host) # For more information on performing inference, refer to the introductory samples. # The common.do_inference function will return a list of outputs - we only have one in this case. a = time.time() [hm, wh, reg, _] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=1) t += time.time() - a with torch.no_grad(): [baseline] = mobilenetv3.cuda()(torch.from_numpy(img).cuda()) print('baseline: ', baseline['hm'].mean().cpu().numpy(), baseline['wh'].mean().cpu().numpy(), baseline['reg'].mean().cpu().numpy()) print('output: ', np.mean(hm), np.mean(wh), np.mean(reg)) print('Time: ', t)
def get_engine(onnx_file_path, width=608, height=608, batch_size=1, engine_file_path="", int8mode=False, calib_file='yolo_calibration.cache'): """Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it.""" def build_engine(): """Takes an ONNX file and creates a TensorRT engine to run inference with""" with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, \ trt.OnnxParser(network, TRT_LOGGER) as parser: builder.max_workspace_size = 1 << 28 # 256MiB builder.max_batch_size = batch_size if int8mode: # calibrator definition calibration_dataset_loc = "calibration_dataset/" calibration_cache = calib_file calib = calibrator.PythonEntropyCalibrator(calibration_dataset_loc, cache_file=calibration_cache, width=width, height=height, batch_size=batch_size) builder.int8_mode = True builder.int8_calibrator = calib else: builder.fp16_mode = True # Parse model file if not os.path.exists(onnx_file_path): print('ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'.format(onnx_file_path)) exit(0) print('Loading ONNX file from path {}...'.format(onnx_file_path)) with open(onnx_file_path, 'rb') as model: print('Beginning ONNX file parsing') parser.parse(model.read()) print('Completed parsing of ONNX file') print('Building an engine from file {}; this may take a while...'.format(onnx_file_path)) engine = builder.build_cuda_engine(network) print("Completed creating Engine") with open(engine_file_path, "wb") as f: f.write(engine.serialize()) return engine if os.path.exists(engine_file_path): # If a serialized engine exists, use it instead of building an engine. print("Reading engine from file {}".format(engine_file_path)) with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime: return runtime.deserialize_cuda_engine(f.read()) else: return build_engine()
def __init__(self, trt_path): # get model name self._model_name = os.path.basename(trt_path) self._model_name = self._model_name[:self._model_name.rfind(".")] # create engine self.trt_path = trt_path self.logger = trt.Logger() self.runtime = trt.Runtime(self.logger) with open(trt_path, "rb") as f: self.engine = self.runtime.deserialize_cuda_engine(f.read()) # create context and buffer self.context = self.engine.create_execution_context() self.stream = cuda.Stream() bindings = [] host_input = device_input = host_output = device_output = None for binding in self.engine: binding_idx = self.engine.get_binding_index(binding) print(f"binding name {binding}, idx {binding_idx}") shape = trt.volume(self.context.get_binding_shape(binding_idx)) dtype = trt.nptype(self.engine.get_binding_dtype(binding)) if self.engine.binding_is_input(binding): print(shape) host_input = np.empty(shape, dtype=np.float32) device_input = cuda.mem_alloc(host_input.nbytes) bindings.append(int(device_input)) else: host_output = cuda.pagelocked_empty(shape, dtype) device_output = cuda.mem_alloc(host_output.nbytes) bindings.append(int(device_output)) assert device_input is not None assert device_output is not None assert len(bindings) == 2 self.bindings = bindings self.device_input = device_input self.host_input = host_input self.device_output = device_output self.host_output = host_output
def get_engine(onnx_file_path, engine_file_path=""): """Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it.""" def build_engine(): """Takes an ONNX file and creates a TensorRT engine to run inference with""" with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser: builder.max_workspace_size = 1 << 30 # 1GB builder.max_batch_size = 1 # builder.fp16_mode = True # builder.strict_type_constraints = True # Parse model file if not os.path.exists(onnx_file_path): print('ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'.format(onnx_file_path)) raise FileExistsError( 'ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'.format(onnx_file_path)) print('Loading ONNX file from path {}...'.format(onnx_file_path)) with open(onnx_file_path, 'rb') as model: print('Beginning ONNX file parsing') parser.parse(model.read()) print('Completed parsing of ONNX file') print('Building an engine from file {}; this may take a while...'.format(onnx_file_path)) engine = builder.build_cuda_engine(network) if engine is None: print('build engine have some error') raise Exception('build engine have some error') with open(engine_file_path, "wb") as f: f.write(engine.serialize()) print("Completed creating Engine") return engine if os.path.exists(engine_file_path): # If a serialized engine exists, use it instead of building an engine. print("Reading engine from file {}".format(engine_file_path)) with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime: return runtime.deserialize_cuda_engine(f.read()) else: return build_engine()
def main(): # while True: # pass # data_test_path = "/home/ubuntu/MyFiles/ZTE/FACE-ALL-5-POINTS_CROP/" data_test_path = "/home/ubuntu/MyFiles/ZTE/1000pairs/" data_txt = "/home/ubuntu/MyFiles/ZTE/test_2000_images_list.txt" with open("zte.v7.engine", "rb") as f, trt.Runtime(TRT_LOGGER) as runtime: engine = runtime.deserialize_cuda_engine(f.read()) # while True: # pass batch_size = BATCH_SIZE # Allocate buffers and create a CUDA stream. features1 = [] features2 = [] with engine.create_execution_context() as context: data_test1, data_test2 = get_test_data(data_test_path, data_txt) for i in range(TEST_NUM): h_input, d_input, h_output, d_output, stream = allocate_buffers( engine) test_case = load_normalized_test_case(data_test1[i], h_input) do_inference(context, h_input, d_input, h_output, d_output, stream) features1.append(h_output) # print(data_test1[0:10]) for i in range(TEST_NUM): h_input, d_input, h_output, d_output, stream = allocate_buffers( engine) test_case = load_normalized_test_case(data_test2[i], h_input) do_inference(context, h_input, d_input, h_output, d_output, stream) features2.append(h_output) # print(data_test2[0:10]) # print(features1[0][0:10]) # print(features2[0][0:10]) GetTPR(features1, features2)
def __init__(self, model_path, device): cuda.init() self.__device = cuda.Device(device) self.context = self.__device.make_context() trt.init_libnvinfer_plugins(engine.TRT_LOGGER, '') self.__trt_runtime = trt.Runtime(engine.TRT_LOGGER) try: self.__trt_engine = engine.load_engine( self.__trt_runtime, os.path.join(model_path, 'gpu.buf')) except Exception as e: self.__finalize() raise e self._allocate_buffers() self.__model_shape = itemgetter(1, 2)( self.__trt_engine.get_binding_shape('Input')) self.__execution_context = self.__trt_engine.create_execution_context()
def _engine_init(self): """ load a engine buffer or buid a new one :return: a trt engine obj """ self.trt_runtime = trt.Runtime(TRT_LOGGER) self.trt_engine = None engine_file = os.path.splitext(self.model_dir)[0] + '.engine' if not os.path.exists(engine_file) or self.force_rebuild: print('no built engine found, building a new one...') model_type = os.path.splitext(self.model_dir)[-1] valid_model_format = ['.pb', '.onnx'] assert model_type in valid_model_format, 'provided model is invalid:{}/{}'.format( model_type, valid_model_format) self.trt_engine = TensorrtBuilder.build_engine_from_pb_or_onnx( self.model_dir, **self.kwargs) else: print('loading built engine:{}...'.format(engine_file)) self.trt_engine = TensorrtBuilder._load_engine( self.trt_runtime, engine_file)
def __init__(self, tensorrt_engine_path: str): assert (os.path.exists(tensorrt_engine_path)) with open(tensorrt_engine_path, "rb") as fp: self.runtime = trt.Runtime(trt.Logger()) self.engine = self.runtime.deserialize_cuda_engine(fp.read()) # 输入和输出的array数据类型 self.array_in_dtype = trt.nptype(self.engine.get_binding_dtype(0)) self.array_out_dtype = trt.nptype(self.engine.get_binding_dtype(1)) # 输入和输出的array数据shape(无batch) self.array_in_shape = self.engine.get_binding_shape(0) self.array_out_shape = self.engine.get_binding_shape(1) self.stream = cuda.Stream() self.h_input = cuda.pagelocked_empty(trt.volume(self.array_in_shape), dtype=self.array_in_dtype) self.h_output = cuda.pagelocked_empty(trt.volume(self.array_out_shape), dtype=self.array_out_dtype) # Allocate device memory for inputs and outputs. self.d_input = cuda.mem_alloc(self.h_input.nbytes) self.d_output = cuda.mem_alloc(self.h_output.nbytes)