def __init__(self, engine_fpath: str, network_metadata: NetworkMetadata): self.network_metadata = network_metadata self.trt_engine = engine_from_bytes(bytes_from_path(engine_fpath)) self.trt_context = TrtRunner( self.trt_engine.create_execution_context()) self.trt_context.activate()
def main(): engine = engine_from_bytes(bytes_from_path("identity.engine")) # NOTE: In TensorRT 8.0 and newer, we do *not* need to use a context manager to free `engine`. with engine, TrtRunner(engine) as runner: inp_data = np.ones((1, 1, 2, 2), dtype=np.float32) # NOTE: The runner owns the output buffers and is free to reuse them between `infer()` calls. # Thus, if you want to store results from multiple inferences, you should use `copy.deepcopy()`. outputs = runner.infer(feed_dict={"x": inp_data}) assert np.array_equal(outputs["output"], inp_data) # It's an identity model! print("Inference succeeded!")
def call_impl(self): """ Returns: (trt.IBuilder, trt.INetworkDefinition, trt.OnnxParser): A TensorRT network, as well as the builder used to create it, and the parser used to populate it. """ path = util.invoke_if_callable(self.path)[0] if mod.version(trt.__version__) >= mod.version("7.1"): with util.FreeOnException(super().call_impl()) as (builder, network, parser): # We need to use parse_from_file for the ONNX parser to keep track of the location of the ONNX file for # potentially parsing any external weights. success = parser.parse_from_file(path) trt_util.check_onnx_parser_errors(parser, success) return builder, network, parser else: from polygraphy.backend.common import bytes_from_path return network_from_onnx_bytes(bytes_from_path(path), self.explicit_precision)