コード例 #1
0
    def __init__(self, engine_fpath: str, network_metadata: NetworkMetadata):
        self.network_metadata = network_metadata

        self.trt_engine = engine_from_bytes(bytes_from_path(engine_fpath))
        self.trt_context = TrtRunner(
            self.trt_engine.create_execution_context())
        self.trt_context.activate()
コード例 #2
0
 def initialize_model():
     # Load a potentially large model in memory. Executed once per process.
     build_engine = EngineFromBytes(
         open(self._engine_path, "rb").read())
     runner = TrtRunner(build_engine)
     runner.activate()
     return TrtModel(runner)
コード例 #3
0
ファイル: test_runner.py プロジェクト: phongphuhanam/TensorRT
    def test_multithreaded_runners_from_engine(self):
        model = ONNX_MODELS["identity"]
        engine = engine_from_network(NetworkFromOnnxBytes(model.loader))

        with engine, TrtRunner(engine) as runner0, TrtRunner(engine) as runner1:
            t1 = threading.Thread(target=model.check_runner, args=(runner0,))
            t2 = threading.Thread(target=model.check_runner, args=(runner1,))
            t1.start()
            t2.start()
            t1.join()
            t2.join()
コード例 #4
0
ファイル: test_runner.py プロジェクト: phongphuhanam/TensorRT
 def test_device_view_dynamic_shapes(self, use_view):
     model = ONNX_MODELS["dynamic_identity"]
     profiles = [
         Profile().add("X", (1, 2, 1, 1), (1, 2, 2, 2), (1, 2, 4, 4)),
     ]
     runner = TrtRunner(EngineFromNetwork(NetworkFromOnnxBytes(model.loader), CreateConfig(profiles=profiles)))
     with runner, cuda.DeviceArray(shape=(1, 2, 3, 3), dtype=np.float32) as arr:
         inp = np.random.random_sample(size=(1, 2, 3, 3)).astype(np.float32)
         arr.copy_from(inp)
         outputs = runner.infer({"X": cuda.DeviceView(arr.ptr, arr.shape, arr.dtype) if use_view else arr})
         assert np.all(outputs["Y"] == inp)
         assert outputs["Y"].shape == (1, 2, 3, 3)
コード例 #5
0
ファイル: example.py プロジェクト: stjordanis/TensorRT
def main():
    # We can provide a path or file-like object if we want to cache calibration data.
    # This lets us avoid running calibration the next time we build the engine.
    #
    # TIP: You can use this calibrator with TensorRT APIs directly (e.g. config.int8_calibrator).
    # You don't have to use it with Polygraphy loaders if you don't want to.
    calibrator = Calibrator(data_loader=calib_data(),
                            cache="identity-calib.cache")

    # We must enable int8 mode in addition to providing the calibrator.
    build_engine = EngineFromNetwork(NetworkFromOnnxPath("identity.onnx"),
                                     config=CreateConfig(
                                         int8=True, calibrator=calibrator))

    # When we activate our runner, it will calibrate and build the engine. If we want to
    # see the logging output from TensorRT, we can temporarily increase logging verbosity:
    with G_LOGGER.verbosity(
            G_LOGGER.VERBOSE), TrtRunner(build_engine) as runner:
        # Finally, we can test out our int8 TensorRT engine with some dummy input data:
        inp_data = np.ones(shape=(1, 1, 2, 2), dtype=np.float32)

        # NOTE: The runner owns the output buffers and is free to reuse them between `infer()` calls.
        # Thus, if you want to store results from multiple inferences, you should use `copy.deepcopy()`.
        outputs = runner.infer({"x": inp_data})

        assert np.array_equal(outputs["y"],
                              inp_data)  # It's an identity model!
コード例 #6
0
 def test_basic(self):
     model = ONNX_MODELS["identity"]
     network_loader = NetworkFromOnnxBytes(model.loader)
     with TrtRunner(EngineFromNetwork(network_loader)) as runner:
         assert runner.is_active
         model.check_runner(runner)
     assert not runner.is_active
コード例 #7
0
 def test_error_on_wrong_dtype_feed_dict(self):
     model = ONNX_MODELS["identity"]
     network_loader = NetworkFromOnnxBytes(model.loader)
     with TrtRunner(EngineFromNetwork(network_loader)) as runner:
         with pytest.raises(PolygraphyException, match="unexpected dtype."):
             runner.infer(
                 {"x": np.ones(shape=(1, 1, 2, 2), dtype=np.int32)})
コード例 #8
0
ファイル: test_runner.py プロジェクト: phongphuhanam/TensorRT
 def test_cannot_use_device_view_shape_tensor(self):
     model = ONNX_MODELS["empty_tensor_expand"]
     with TrtRunner(EngineFromNetwork(NetworkFromOnnxBytes(model.loader))) as runner, cuda.DeviceArray(
         shape=(5,), dtype=np.int32
     ) as arr:
         with pytest.raises(PolygraphyException, match="it must reside in host memory"):
             runner.infer({"data": np.ones((2, 0, 3, 0), dtype=np.float32), "new_shape": arr})
コード例 #9
0
 def test_context(self):
     model = ONNX_MODELS["identity"]
     engine = engine_from_network(NetworkFromOnnxBytes(model.loader))
     with engine, TrtRunner(engine.create_execution_context) as runner:
         model.check_runner(runner)
         assert not runner.owns_engine
         assert runner.owns_context
コード例 #10
0
 def test_device_buffer_order_matches_bindings(self):
     model = ONNX_MODELS["reducable"]
     engine = engine_from_network(NetworkFromOnnxBytes(model.loader))
     with engine, TrtRunner(engine) as runner:
         dev_buf_order = list(runner.device_buffers.keys())
         for binding, dev_buf_name in zip(engine, dev_buf_order):
             assert binding == dev_buf_name
コード例 #11
0
def main():
    # We can compose multiple lazy loaders together to get the desired conversion.
    # In this case, we want ONNX -> TensorRT Network -> TensorRT engine (w/ fp16).
    #
    # NOTE: `build_engine` is a *callable* that returns an engine, not the engine itself.
    #   To get the engine directly, you can use the immediately evaluated functional API.
    #   See examples/api/06_immediate_eval_api for details.
    build_engine = EngineFromNetwork(
        NetworkFromOnnxPath("identity.onnx"), config=CreateConfig(
            fp16=True))  # Note that config is an optional argument.

    # To reuse the engine elsewhere, we can serialize and save it to a file.
    # The `SaveEngine` lazy loader will return the TensorRT engine when called,
    # which allows us to chain it together with other loaders.
    build_engine = SaveEngine(build_engine, path="identity.engine")

    # Once our loader is ready, inference is simply a matter of constructing a runner,
    # activating it with a context manager (i.e. `with TrtRunner(...)`) and calling `infer()`.
    #
    # NOTE: You can use the activate() function instead of a context manager, but you will need to make sure to
    # deactivate() to avoid a memory leak. For that reason, a context manager is the safer option.
    with TrtRunner(build_engine) as runner:
        inp_data = np.ones(shape=(1, 1, 2, 2), dtype=np.float32)

        # NOTE: The runner owns the output buffers and is free to reuse them between `infer()` calls.
        # Thus, if you want to store results from multiple inferences, you should use `copy.deepcopy()`.
        outputs = runner.infer(feed_dict={"x": inp_data})

        assert np.array_equal(outputs["y"],
                              inp_data)  # It's an identity model!

        print("Inference succeeded!")
コード例 #12
0
ファイル: example.py プロジェクト: phongphuhanam/TensorRT
def main():
    # The OnnxrtRunner requires an ONNX-RT session.
    # We can use the SessionFromOnnx lazy loader to construct one easily:
    build_onnxrt_session = SessionFromOnnx("identity.onnx")

    # The TrtRunner requires a TensorRT engine.
    # To create one from the ONNX model, we can chain a couple lazy loaders together:
    build_engine = EngineFromNetwork(NetworkFromOnnxPath("identity.onnx"))

    runners = [
        TrtRunner(build_engine),
        OnnxrtRunner(build_onnxrt_session),
    ]

    # `Comparator.run()` will run each runner separately using synthetic input data and
    #   return a `RunResults` instance. See `polygraphy/comparator/struct.py` for details.
    #
    # TIP: To use custom input data, you can set the `data_loader` parameter in `Comparator.run()``
    #   to a generator or iterable that yields `Dict[str, np.ndarray]`.
    run_results = Comparator.run(runners)

    # `Comparator.compare_accuracy()` checks that outputs match between runners.
    #
    # TIP: The `compare_func` parameter can be used to control how outputs are compared (see API reference for details).
    #   The default comparison function is created by `CompareFunc.simple()`, but we can construct it
    #   explicitly if we want to change the default parameters, such as tolerance.
    assert bool(
        Comparator.compare_accuracy(
            run_results, compare_func=CompareFunc.simple(atol=1e-8)))

    # We can use `RunResults.save()` method to save the inference results to a JSON file.
    # This can be useful if you want to generate and compare results separately.
    run_results.save("inference_results.json")
コード例 #13
0
ファイル: example.py プロジェクト: stjordanis/TensorRT
def main():
    # In Polygraphy, loaders and runners take ownership of objects if they are provided
    # via the return values of callables. For example, we don't need to worry about object
    # lifetimes when we use lazy loaders.
    #
    # Since we are immediately evaluating, we take ownership of objects, and are responsible for freeing them.
    builder, network, parser = network_from_onnx_path("identity.onnx")

    # Extend the network with an identity layer.
    prev_output = network.get_output(0)
    network.unmark_output(prev_output)
    output = network.add_identity(prev_output).get_output(0)
    output.name = "output"
    network.mark_output(output)

    # Create a TensorRT IBuilderConfig so that we can build the engine with FP16 enabled.
    config = create_config(builder, network, fp16=True)

    # We can free everything we constructed above once we're done building the engine.
    # NOTE: In TensorRT 8.0, we do *not* need to use a context manager here.
    with builder, network, parser, config:
        engine = engine_from_network((builder, network), config)

    # NOTE: In TensorRT 8.0, we do *not* need to use a context manager to free `engine`.
    with engine, TrtRunner(engine) as runner:
        inp_data = np.ones((1, 1, 2, 2), dtype=np.float32)

        # NOTE: The runner owns the output buffers and is free to reuse them between `infer()` calls.
        # Thus, if you want to store results from multiple inferences, you should use `copy.deepcopy()`.
        outputs = runner.infer(feed_dict={"x": inp_data})

        assert np.array_equal(outputs["output"], inp_data)  # It's an identity model!

        print("Inference succeeded!")
コード例 #14
0
ファイル: test_runner.py プロジェクト: phongphuhanam/TensorRT
 def test_no_output_copy(self):
     model = ONNX_MODELS["identity"]
     network_loader = NetworkFromOnnxBytes(model.loader)
     with TrtRunner(EngineFromNetwork(network_loader)) as runner:
         inp = np.ones(shape=(1, 1, 2, 2), dtype=np.float32)
         outputs = runner.infer({"x": inp}, copy_outputs_to_host=False)
         assert isinstance(outputs["y"], cuda.DeviceView)
         assert np.array_equal(outputs["y"].numpy(), inp)
コード例 #15
0
    def check_network(self, suffix):
        """
        Checks whether the provided network is accurate compared to golden values.

        Returns:
            OrderedDict[str, OutputCompareResult]:
                    A mapping of output names to an object describing whether they matched, and what the
                    required tolerances were.
        """
        from polygraphy.comparator import Comparator, CompareFunc, DataLoader
        from polygraphy.backend.trt import EngineFromNetwork, TrtRunner, ModifyNetwork, SaveEngine

        with G_LOGGER.verbosity(severity=G_LOGGER.severity if self.args.
                                show_output else G_LOGGER.CRITICAL):
            data_loader = tool_util.get_data_loader(self.args)

            self.args.strict_types = True  # HACK: Override strict types so things actually run in the right precision.
            config = tool_util.get_trt_config_loader(self.args,
                                                     data_loader)(self.builder,
                                                                  self.network)

            suffix = "-{:}-{:}".format(suffix, self.precision)
            engine_path = misc.insert_suffix(self.args.save_engine, suffix)

            self.builder, self.network, self.parser = ModifyNetwork(
                (self.builder, self.network, self.parser),
                outputs=self.args.trt_outputs)()

            engine_loader = SaveEngine(EngineFromNetwork(
                (self.builder, self.network, self.parser), config),
                                       path=engine_path)

            runners = [TrtRunner(engine_loader)]

            results = Comparator.run(runners, data_loader=data_loader)
            if self.args.validate:
                Comparator.validate(results)
            results.update(self.golden)

            compare_func = CompareFunc.basic_compare_func(
                atol=self.args.atol,
                rtol=self.args.rtol,
                check_shapes=not self.args.no_shape_check)
            accuracy_result = Comparator.compare_accuracy(
                results, compare_func=compare_func)

        tolerances = list(accuracy_result.values())[0][
            0]  # First iteration of first runner pair
        for name, req_tol in tolerances.items():
            if bool(req_tol):
                G_LOGGER.success(
                    "PASSED | Output: {:} | Required Tolerances: {:}".format(
                        name, req_tol))
            else:
                G_LOGGER.error(
                    "FAILED | Output: {:} | Required Tolerances: {:}".format(
                        name, req_tol))
        return accuracy_result
コード例 #16
0
    def test_multirun_outputs_are_different(self):
        onnx_loader = ONNX_MODELS["identity"].loader
        runner = TrtRunner(EngineFromNetwork(NetworkFromOnnxBytes(onnx_loader)))
        run_results = Comparator.run([runner], data_loader=DataLoader(iterations=2))

        iteration0 = run_results[runner.name][0]
        iteration1 = run_results[runner.name][1]
        for name in iteration0.keys():
            assert np.any(iteration0[name] != iteration1[name])
コード例 #17
0
 def test_error_on_wrong_name_feed_dict(self, names, err):
     model = ONNX_MODELS["identity"]
     network_loader = NetworkFromOnnxBytes(model.loader)
     with TrtRunner(EngineFromNetwork(network_loader)) as runner:
         with pytest.raises(PolygraphyException, match=err):
             runner.infer({
                 name: np.ones(shape=(1, 1, 2, 2), dtype=np.float32)
                 for name in names
             })
コード例 #18
0
ファイル: test_runner.py プロジェクト: phongphuhanam/TensorRT
    def test_empty_tensor_with_dynamic_input_shape_tensor(self):
        model = ONNX_MODELS["empty_tensor_expand"]
        shapes = [(1, 2, 0, 3, 0), (2, 2, 0, 3, 0), (4, 2, 0, 3, 0)]
        network_loader = NetworkFromOnnxBytes(model.loader)
        profiles = [Profile().add("new_shape", *shapes)]
        config_loader = CreateConfig(profiles=profiles)

        with TrtRunner(EngineFromNetwork(network_loader, config_loader)) as runner:
            for shape in shapes:
                model.check_runner(runner, {"new_shape": shape})
コード例 #19
0
 def test_basic(self):
     model = ONNX_MODELS["identity"]
     network_loader = NetworkFromOnnxBytes(model.loader)
     with TrtRunner(EngineFromNetwork(network_loader)) as runner:
         assert runner.is_active
         assert runner.owns_engine
         assert runner.owns_context
         model.check_runner(runner)
     assert not runner.is_active
     assert runner._cached_input_metadata is None
コード例 #20
0
ファイル: test_runner.py プロジェクト: phongphuhanam/TensorRT
 def test_basic(self):
     model = ONNX_MODELS["identity"]
     network_loader = NetworkFromOnnxBytes(model.loader)
     with TrtRunner(EngineFromNetwork(network_loader)) as runner:
         assert runner.is_active
         assert runner.owns_engine
         assert runner.owns_context
         model.check_runner(runner)
         assert runner.last_inference_time() is not None
     assert not runner.is_active
コード例 #21
0
    def test_segfault_does_not_hang(self):
        def raise_called_process_error():
            class FakeSegfault(sp.CalledProcessError):
                pass

            raise FakeSegfault(-11, ["simulate", "segfault"])

        runners = [TrtRunner(EngineFromNetwork(raise_called_process_error))]
        with pytest.raises(PolygraphyException):
            Comparator.run(runners, use_subprocess=True, subprocess_polling_interval=1)
コード例 #22
0
def main():
    build_engine = EngineFromNetwork(NetworkFromOnnxPath("identity.onnx"))

    with TrtRunner(build_engine) as runner:
        for (data, golden) in zip(REAL_DATASET, EXPECTED_OUTPUTS):
            # NOTE: The runner owns the output buffers and is free to reuse them between `infer()` calls.
            # Thus, if you want to store results from multiple inferences, you should use `copy.deepcopy()`.
            outputs = runner.infer(feed_dict={"x": data})

            assert np.array_equal(outputs["y"], golden)
コード例 #23
0
    def test_subsequent_infers_with_different_input_types(self):
        model = ONNX_MODELS["identity"]
        network_loader = NetworkFromOnnxBytes(model.loader)
        with TrtRunner(EngineFromNetwork(network_loader)) as runner:
            inp = np.ones(shape=(1, 1, 2, 2), dtype=np.float32)

            def check(outputs):
                assert np.all(outputs["y"] == inp)

            check(runner.infer({"x": inp}))
            check(runner.infer({"x": cuda.DeviceArray().copy_from(inp)}))
            check(runner.infer({"x": inp}))
コード例 #24
0
ファイル: test_runner.py プロジェクト: phongphuhanam/TensorRT
 def test_device_views(self, use_view):
     model = ONNX_MODELS["reducable"]
     network_loader = NetworkFromOnnxBytes(model.loader)
     with TrtRunner(EngineFromNetwork(network_loader)) as runner, cuda.DeviceArray((1,), dtype=np.float32) as x:
         x.copy_from(np.ones((1,), dtype=np.float32))
         outputs = runner.infer(
             {
                 "X0": x.view() if use_view else x,
                 "Y0": np.ones((1,), dtype=np.float32),
             }
         )
         assert outputs["identity_out_6"][0] == 2
         assert outputs["identity_out_8"][0] == 2
コード例 #25
0
def main():
    # Since we have no further need of TensorRT APIs, we can come back to regular Polygraphy.
    #
    # NOTE: Since we're using lazy loaders, we provide the functions as arguments - we do *not* call them ourselves.
    build_engine = EngineFromNetwork(load_network, config=load_config)

    with TrtRunner(build_engine) as runner:
        inp_data = np.ones(shape=(1, 1, 2, 2), dtype=np.float32)

        # NOTE: The runner owns the output buffers and is free to reuse them between `infer()` calls.
        # Thus, if you want to store results from multiple inferences, you should use `copy.deepcopy()`.
        outputs = runner.infer({"x": inp_data})

        assert np.all(outputs["y"] == inp_data) # It's an identity model!
コード例 #26
0
 def test_multiple_profiles(self):
     model = ONNX_MODELS["dynamic_identity"]
     shapes = [(1, 2, 4, 4), (1, 2, 8, 8), (1, 2, 16, 16)]
     network_loader = NetworkFromOnnxBytes(model.loader)
     profiles = [
         Profile().add("X", (1, 2, 1, 1), (1, 2, 2, 2), (1, 2, 4, 4)),
         Profile().add("X", *shapes),
     ]
     config_loader = CreateConfig(profiles=profiles)
     with TrtRunner(EngineFromNetwork(network_loader,
                                      config_loader)) as runner:
         runner.context.active_optimization_profile = 1
         for shape in shapes:
             model.check_runner(runner, {"X": shape})
コード例 #27
0
class TRTPolygraphyRunner:
    """
    TRT implemented network interface that can be used to measure inference time.
    Easier to use but harder to utilize. Recommend using TRTNativeRunner for better performance.
    """
    def __init__(self, engine_fpath: str, network_metadata: NetworkMetadata):
        self.network_metadata = network_metadata

        self.trt_engine = engine_from_bytes(bytes_from_path(engine_fpath))
        self.trt_context = TrtRunner(
            self.trt_engine.create_execution_context())
        self.trt_context.activate()

    def __call__(self, *args, **kwargs):
        # hook polygraphy verbosity for inference
        g_logger_verbosity = (G_LOGGER.EXTRA_VERBOSE if G_LOGGER.root.level
                              == G_LOGGER.DEBUG else G_LOGGER.WARNING)

        with PG_LOGGER.verbosity(g_logger_verbosity):
            return self.forward(*args, **kwargs)

    def release(self):
        self.trt_context.deactivate()
コード例 #28
0
    def test_dim_param_trt_onnxrt(self):
        load_onnx_bytes = ONNX_MODELS["dim_param"].loader
        build_onnxrt_session = SessionFromOnnx(load_onnx_bytes)
        load_engine = EngineFromNetwork(NetworkFromOnnxBytes(load_onnx_bytes))

        runners = [
            OnnxrtRunner(build_onnxrt_session),
            TrtRunner(load_engine),
        ]

        run_results = Comparator.run(runners)
        compare_func = CompareFunc.simple(check_shapes=mod.version(trt.__version__) >= mod.version("7.0"))
        assert bool(Comparator.compare_accuracy(run_results, compare_func=compare_func))
        assert len(list(run_results.values())[0]) == 1  # Default number of iterations
コード例 #29
0
def main():
    engine = engine_from_bytes(bytes_from_path("identity.engine"))

    # NOTE: In TensorRT 8.0 and newer, we do *not* need to use a context manager to free `engine`.
    with engine, TrtRunner(engine) as runner:
        inp_data = np.ones((1, 1, 2, 2), dtype=np.float32)

        # NOTE: The runner owns the output buffers and is free to reuse them between `infer()` calls.
        # Thus, if you want to store results from multiple inferences, you should use `copy.deepcopy()`.
        outputs = runner.infer(feed_dict={"x": inp_data})

        assert np.array_equal(outputs["output"],
                              inp_data)  # It's an identity model!

        print("Inference succeeded!")
コード例 #30
0
 def test_multiple_profiles(self):
     model = ONNX_MODELS["dynamic_identity"]
     profile0_shapes = [(1, 2, 1, 1), (1, 2, 2, 2), (1, 2, 4, 4)]
     profile1_shapes = [(1, 2, 4, 4), (1, 2, 8, 8), (1, 2, 16, 16)]
     network_loader = NetworkFromOnnxBytes(model.loader)
     profiles = [
         Profile().add("X", *profile0_shapes),
         Profile().add("X", *profile1_shapes),
     ]
     config_loader = CreateConfig(profiles=profiles)
     with TrtRunner(EngineFromNetwork(network_loader,
                                      config_loader)) as runner:
         for index, shapes in enumerate([profile0_shapes, profile1_shapes]):
             runner.set_profile(index)
             for shape in shapes:
                 model.check_runner(runner, {"X": shape})