Esempio n. 1
0
 def initialize_model():
     # Load a potentially large model in memory. Executed once per process.
     build_engine = EngineFromBytes(
         open(self._engine_path, "rb").read())
     runner = TrtRunner(build_engine)
     runner.activate()
     return TrtModel(runner)
Esempio n. 2
0
class TRTPolygraphyRunner:
    """
    TRT implemented network interface that can be used to measure inference time.
    Easier to use but harder to utilize. Recommend using TRTNativeRunner for better performance.
    """
    def __init__(self, engine_fpath: str, network_metadata: NetworkMetadata):
        self.network_metadata = network_metadata

        self.trt_engine = engine_from_bytes(bytes_from_path(engine_fpath))
        self.trt_context = TrtRunner(
            self.trt_engine.create_execution_context())
        self.trt_context.activate()

    def __call__(self, *args, **kwargs):
        # hook polygraphy verbosity for inference
        g_logger_verbosity = (G_LOGGER.EXTRA_VERBOSE if G_LOGGER.root.level
                              == G_LOGGER.DEBUG else G_LOGGER.WARNING)

        with PG_LOGGER.verbosity(g_logger_verbosity):
            return self.forward(*args, **kwargs)

    def release(self):
        self.trt_context.deactivate()