Example #1
def main():
    # We can provide a path or file-like object if we want to cache calibration data.
    # This lets us avoid running calibration the next time we build the engine.
    # TIP: You can use this calibrator with TensorRT APIs directly (e.g. config.int8_calibrator).
    # You don't have to use it with Polygraphy loaders if you don't want to.
    calibrator = Calibrator(data_loader=calib_data(),

    # We must enable int8 mode in addition to providing the calibrator.
    build_engine = EngineFromNetwork(NetworkFromOnnxPath("identity.onnx"),
                                         int8=True, calibrator=calibrator))

    # When we activate our runner, it will calibrate and build the engine. If we want to
    # see the logging output from TensorRT, we can temporarily increase logging verbosity:
    with G_LOGGER.verbosity(
            G_LOGGER.VERBOSE), TrtRunner(build_engine) as runner:
        # Finally, we can test out our int8 TensorRT engine with some dummy input data:
        inp_data = np.ones(shape=(1, 1, 2, 2), dtype=np.float32)

        # NOTE: The runner owns the output buffers and is free to reuse them between `infer()` calls.
        # Thus, if you want to store results from multiple inferences, you should use `copy.deepcopy()`.
        outputs = runner.infer({"x": inp_data})

        assert np.array_equal(outputs["y"],
                              inp_data)  # It's an identity model!
Example #2
def main():
    # We can compose multiple lazy loaders together to get the desired conversion.
    # In this case, we want ONNX -> TensorRT Network -> TensorRT engine (w/ fp16).
    # NOTE: `build_engine` is a *callable* that returns an engine, not the engine itself.
    #   To get the engine directly, you can use the immediately evaluated functional API.
    #   See examples/api/06_immediate_eval_api for details.
    build_engine = EngineFromNetwork(
        NetworkFromOnnxPath("identity.onnx"), config=CreateConfig(
            fp16=True))  # Note that config is an optional argument.

    # To reuse the engine elsewhere, we can serialize and save it to a file.
    # The `SaveEngine` lazy loader will return the TensorRT engine when called,
    # which allows us to chain it together with other loaders.
    build_engine = SaveEngine(build_engine, path="identity.engine")

    # Once our loader is ready, inference is simply a matter of constructing a runner,
    # activating it with a context manager (i.e. `with TrtRunner(...)`) and calling `infer()`.
    # NOTE: You can use the activate() function instead of a context manager, but you will need to make sure to
    # deactivate() to avoid a memory leak. For that reason, a context manager is the safer option.
    with TrtRunner(build_engine) as runner:
        inp_data = np.ones(shape=(1, 1, 2, 2), dtype=np.float32)

        # NOTE: The runner owns the output buffers and is free to reuse them between `infer()` calls.
        # Thus, if you want to store results from multiple inferences, you should use `copy.deepcopy()`.
        outputs = runner.infer(feed_dict={"x": inp_data})

        assert np.array_equal(outputs["y"],
                              inp_data)  # It's an identity model!

        print("Inference succeeded!")
Example #3
 def test_basic(self):
     model = ONNX_MODELS["identity"]
     network_loader = NetworkFromOnnxBytes(model.loader)
     with TrtRunner(EngineFromNetwork(network_loader)) as runner:
         assert runner.is_active
     assert not runner.is_active
Example #4
 def test_error_on_wrong_dtype_feed_dict(self):
     model = ONNX_MODELS["identity"]
     network_loader = NetworkFromOnnxBytes(model.loader)
     with TrtRunner(EngineFromNetwork(network_loader)) as runner:
         with pytest.raises(PolygraphyException, match="unexpected dtype."):
                 {"x": np.ones(shape=(1, 1, 2, 2), dtype=np.int32)})
Example #5
def main():
    # The OnnxrtRunner requires an ONNX-RT session.
    # We can use the SessionFromOnnx lazy loader to construct one easily:
    build_onnxrt_session = SessionFromOnnx("identity.onnx")

    # The TrtRunner requires a TensorRT engine.
    # To create one from the ONNX model, we can chain a couple lazy loaders together:
    build_engine = EngineFromNetwork(NetworkFromOnnxPath("identity.onnx"))

    runners = [

    # `Comparator.run()` will run each runner separately using synthetic input data and
    #   return a `RunResults` instance. See `polygraphy/comparator/struct.py` for details.
    # TIP: To use custom input data, you can set the `data_loader` parameter in `Comparator.run()``
    #   to a generator or iterable that yields `Dict[str, np.ndarray]`.
    run_results = Comparator.run(runners)

    # `Comparator.compare_accuracy()` checks that outputs match between runners.
    # TIP: The `compare_func` parameter can be used to control how outputs are compared (see API reference for details).
    #   The default comparison function is created by `CompareFunc.simple()`, but we can construct it
    #   explicitly if we want to change the default parameters, such as tolerance.
    assert bool(
            run_results, compare_func=CompareFunc.simple(atol=1e-8)))

    # We can use `RunResults.save()` method to save the inference results to a JSON file.
    # This can be useful if you want to generate and compare results separately.
Example #6
 def test_cannot_use_device_view_shape_tensor(self):
     model = ONNX_MODELS["empty_tensor_expand"]
     with TrtRunner(EngineFromNetwork(NetworkFromOnnxBytes(model.loader))) as runner, cuda.DeviceArray(
         shape=(5,), dtype=np.int32
     ) as arr:
         with pytest.raises(PolygraphyException, match="it must reside in host memory"):
             runner.infer({"data": np.ones((2, 0, 3, 0), dtype=np.float32), "new_shape": arr})
Example #7
 def test_no_output_copy(self):
     model = ONNX_MODELS["identity"]
     network_loader = NetworkFromOnnxBytes(model.loader)
     with TrtRunner(EngineFromNetwork(network_loader)) as runner:
         inp = np.ones(shape=(1, 1, 2, 2), dtype=np.float32)
         outputs = runner.infer({"x": inp}, copy_outputs_to_host=False)
         assert isinstance(outputs["y"], cuda.DeviceView)
         assert np.array_equal(outputs["y"].numpy(), inp)
Example #8
    def test_calibrator_with_path_name_cache(self, identity_builder_network):
        builder, network = identity_builder_network
        data = [{"x": np.ones((1, 1, 2, 2), dtype=np.float32)}]

        with tempfile.NamedTemporaryFile() as cache:
            create_config = CreateConfig(int8=True, calibrator=Calibrator(data, cache=cache.name))
            with EngineFromNetwork((builder, network), create_config)():
Example #9
    def check_network(self, suffix):
        Checks whether the provided network is accurate compared to golden values.

            OrderedDict[str, OutputCompareResult]:
                    A mapping of output names to an object describing whether they matched, and what the
                    required tolerances were.
        from polygraphy.comparator import Comparator, CompareFunc, DataLoader
        from polygraphy.backend.trt import EngineFromNetwork, TrtRunner, ModifyNetwork, SaveEngine

        with G_LOGGER.verbosity(severity=G_LOGGER.severity if self.args.
                                show_output else G_LOGGER.CRITICAL):
            data_loader = tool_util.get_data_loader(self.args)

            self.args.strict_types = True  # HACK: Override strict types so things actually run in the right precision.
            config = tool_util.get_trt_config_loader(self.args,

            suffix = "-{:}-{:}".format(suffix, self.precision)
            engine_path = misc.insert_suffix(self.args.save_engine, suffix)

            self.builder, self.network, self.parser = ModifyNetwork(
                (self.builder, self.network, self.parser),

            engine_loader = SaveEngine(EngineFromNetwork(
                (self.builder, self.network, self.parser), config),

            runners = [TrtRunner(engine_loader)]

            results = Comparator.run(runners, data_loader=data_loader)
            if self.args.validate:

            compare_func = CompareFunc.basic_compare_func(
                check_shapes=not self.args.no_shape_check)
            accuracy_result = Comparator.compare_accuracy(
                results, compare_func=compare_func)

        tolerances = list(accuracy_result.values())[0][
            0]  # First iteration of first runner pair
        for name, req_tol in tolerances.items():
            if bool(req_tol):
                    "PASSED | Output: {:} | Required Tolerances: {:}".format(
                        name, req_tol))
                    "FAILED | Output: {:} | Required Tolerances: {:}".format(
                        name, req_tol))
        return accuracy_result
Example #10
 def test_can_build_with_calibrator(self, identity_builder_network):
     builder, network = identity_builder_network
     calibrator = Calibrator(DataLoader())
     create_config = CreateConfig(int8=True, calibrator=calibrator)
     loader = EngineFromNetwork((builder, network), create_config)
     with loader():
     # Calibrator buffers should be freed after the build
     assert all([buf.allocated_nbytes == 0 for buf in calibrator.device_buffers.values()])
Example #11
    def test_multirun_outputs_are_different(self):
        onnx_loader = ONNX_MODELS["identity"].loader
        runner = TrtRunner(EngineFromNetwork(NetworkFromOnnxBytes(onnx_loader)))
        run_results = Comparator.run([runner], data_loader=DataLoader(iterations=2))

        iteration0 = run_results[runner.name][0]
        iteration1 = run_results[runner.name][1]
        for name in iteration0.keys():
            assert np.any(iteration0[name] != iteration1[name])
Example #12
 def test_error_on_wrong_name_feed_dict(self, names, err):
     model = ONNX_MODELS["identity"]
     network_loader = NetworkFromOnnxBytes(model.loader)
     with TrtRunner(EngineFromNetwork(network_loader)) as runner:
         with pytest.raises(PolygraphyException, match=err):
                 name: np.ones(shape=(1, 1, 2, 2), dtype=np.float32)
                 for name in names
Example #13
 def test_basic(self):
     model = ONNX_MODELS["identity"]
     network_loader = NetworkFromOnnxBytes(model.loader)
     with TrtRunner(EngineFromNetwork(network_loader)) as runner:
         assert runner.is_active
         assert runner.owns_engine
         assert runner.owns_context
         assert runner.last_inference_time() is not None
     assert not runner.is_active
Example #14
 def test_basic(self):
     model = ONNX_MODELS["identity"]
     network_loader = NetworkFromOnnxBytes(model.loader)
     with TrtRunner(EngineFromNetwork(network_loader)) as runner:
         assert runner.is_active
         assert runner.owns_engine
         assert runner.owns_context
     assert not runner.is_active
     assert runner._cached_input_metadata is None
Example #15
def main():
    build_engine = EngineFromNetwork(NetworkFromOnnxPath("identity.onnx"))

    with TrtRunner(build_engine) as runner:
        for (data, golden) in zip(REAL_DATASET, EXPECTED_OUTPUTS):
            # NOTE: The runner owns the output buffers and is free to reuse them between `infer()` calls.
            # Thus, if you want to store results from multiple inferences, you should use `copy.deepcopy()`.
            outputs = runner.infer(feed_dict={"x": data})

            assert np.array_equal(outputs["y"], golden)
Example #16
    def test_segfault_does_not_hang(self):
        def raise_called_process_error():
            class FakeSegfault(sp.CalledProcessError):

            raise FakeSegfault(-11, ["simulate", "segfault"])

        runners = [TrtRunner(EngineFromNetwork(raise_called_process_error))]
        with pytest.raises(PolygraphyException):
            Comparator.run(runners, use_subprocess=True, subprocess_polling_interval=1)
Example #17
    def test_empty_tensor_with_dynamic_input_shape_tensor(self):
        model = ONNX_MODELS["empty_tensor_expand"]
        shapes = [(1, 2, 0, 3, 0), (2, 2, 0, 3, 0), (4, 2, 0, 3, 0)]
        network_loader = NetworkFromOnnxBytes(model.loader)
        profiles = [Profile().add("new_shape", *shapes)]
        config_loader = CreateConfig(profiles=profiles)

        with TrtRunner(EngineFromNetwork(network_loader, config_loader)) as runner:
            for shape in shapes:
                model.check_runner(runner, {"new_shape": shape})
Example #18
    def test_calibrator_iterable_data(self, identity_builder_network):
        builder, network = identity_builder_network
        NUM_BATCHES = 2

        data = [{"x": np.ones((1, 1, 2, 2), dtype=np.float32)}] * NUM_BATCHES
        calibrator = Calibrator(data)

        create_config = CreateConfig(int8=True, calibrator=calibrator)
        loader = EngineFromNetwork((builder, network), create_config)
        with loader():
            assert calibrator.num_batches == NUM_BATCHES
Example #19
 def test_device_view_dynamic_shapes(self, use_view):
     model = ONNX_MODELS["dynamic_identity"]
     profiles = [
         Profile().add("X", (1, 2, 1, 1), (1, 2, 2, 2), (1, 2, 4, 4)),
     runner = TrtRunner(EngineFromNetwork(NetworkFromOnnxBytes(model.loader), CreateConfig(profiles=profiles)))
     with runner, cuda.DeviceArray(shape=(1, 2, 3, 3), dtype=np.float32) as arr:
         inp = np.random.random_sample(size=(1, 2, 3, 3)).astype(np.float32)
         outputs = runner.infer({"X": cuda.DeviceView(arr.ptr, arr.shape, arr.dtype) if use_view else arr})
         assert np.all(outputs["Y"] == inp)
         assert outputs["Y"].shape == (1, 2, 3, 3)
Example #20
    def test_subsequent_infers_with_different_input_types(self):
        model = ONNX_MODELS["identity"]
        network_loader = NetworkFromOnnxBytes(model.loader)
        with TrtRunner(EngineFromNetwork(network_loader)) as runner:
            inp = np.ones(shape=(1, 1, 2, 2), dtype=np.float32)

            def check(outputs):
                assert np.all(outputs["y"] == inp)

            check(runner.infer({"x": inp}))
            check(runner.infer({"x": cuda.DeviceArray().copy_from(inp)}))
            check(runner.infer({"x": inp}))
Example #21
    def test_calibrator_caches_without_explicit_cache(self, identity_builder_network):
        builder, network = identity_builder_network
        data = [{"x": np.ones((1, 1, 2, 2), dtype=np.float32)}]

        calibrator = Calibrator(data)
        # First, populate the cache
        create_config = CreateConfig(int8=True, calibrator=calibrator)
        with EngineFromNetwork((builder, network), create_config)():

        # Check that the internal cache is populated
        assert calibrator.read_calibration_cache()
Example #22
    def test_calibrator_generator_data(self, identity_builder_network):
        builder, network = identity_builder_network
        NUM_BATCHES = 2

        def generate_data():
            for item in [np.ones((1, 1, 2, 2), dtype=np.float32)] * NUM_BATCHES:
                yield {"x": item}
        calibrator = Calibrator(generate_data())

        create_config = CreateConfig(int8=True, calibrator=calibrator)
        loader = EngineFromNetwork((builder, network), create_config)
        with loader():
            assert calibrator.num_batches == NUM_BATCHES
Example #23
    def test_calibrator_with_file_object_cache(self, identity_builder_network,
        builder, network = identity_builder_network
        data = [{"x": np.ones((1, 1, 2, 2), dtype=np.float32)}]

        with tempfile.NamedTemporaryFile(mode=mode) as cache:
            create_config = CreateConfig(int8=True,
            with func.invoke(
                    EngineFromNetwork((builder, network), create_config)):
                if mode != "rb":
Example #24
    def test_multithreaded_runners_from_engine(self):
        model = ONNX_MODELS["identity"]
        engine = func.invoke(

        with engine, TrtRunner(engine) as runner0, TrtRunner(
                engine) as runner1:
            t1 = threading.Thread(target=model.check_runner, args=(runner0, ))
            t2 = threading.Thread(target=model.check_runner, args=(runner1, ))
Example #25
 def test_device_views(self, use_view):
     model = ONNX_MODELS["reducable"]
     network_loader = NetworkFromOnnxBytes(model.loader)
     with TrtRunner(EngineFromNetwork(network_loader)) as runner, cuda.DeviceArray((1,), dtype=np.float32) as x:
         x.copy_from(np.ones((1,), dtype=np.float32))
         outputs = runner.infer(
                 "X0": x.view() if use_view else x,
                 "Y0": np.ones((1,), dtype=np.float32),
         assert outputs["identity_out_6"][0] == 2
         assert outputs["identity_out_8"][0] == 2
Example #26
def main():
    # Since we have no further need of TensorRT APIs, we can come back to regular Polygraphy.
    # NOTE: Since we're using lazy loaders, we provide the functions as arguments - we do *not* call them ourselves.
    build_engine = EngineFromNetwork(load_network, config=load_config)

    with TrtRunner(build_engine) as runner:
        inp_data = np.ones(shape=(1, 1, 2, 2), dtype=np.float32)

        # NOTE: The runner owns the output buffers and is free to reuse them between `infer()` calls.
        # Thus, if you want to store results from multiple inferences, you should use `copy.deepcopy()`.
        outputs = runner.infer({"x": inp_data})

        assert np.all(outputs["y"] == inp_data) # It's an identity model!
Example #27
    def test_dim_param_trt_onnxrt(self):
        load_onnx_bytes = ONNX_MODELS["dim_param"].loader
        build_onnxrt_session = SessionFromOnnx(load_onnx_bytes)
        load_engine = EngineFromNetwork(NetworkFromOnnxBytes(load_onnx_bytes))

        runners = [

        run_results = Comparator.run(runners)
        compare_func = CompareFunc.simple(check_shapes=mod.version(trt.__version__) >= mod.version("7.0"))
        assert bool(Comparator.compare_accuracy(run_results, compare_func=compare_func))
        assert len(list(run_results.values())[0]) == 1  # Default number of iterations
Example #28
 def test_multiple_profiles(self):
     model = ONNX_MODELS["dynamic_identity"]
     shapes = [(1, 2, 4, 4), (1, 2, 8, 8), (1, 2, 16, 16)]
     network_loader = NetworkFromOnnxBytes(model.loader)
     profiles = [
         Profile().add("X", (1, 2, 1, 1), (1, 2, 2, 2), (1, 2, 4, 4)),
         Profile().add("X", *shapes),
     config_loader = CreateConfig(profiles=profiles)
     with TrtRunner(EngineFromNetwork(network_loader,
                                      config_loader)) as runner:
         runner.context.active_optimization_profile = 1
         for shape in shapes:
             model.check_runner(runner, {"X": shape})
Example #29
    def test_calibrator_rechecks_cache_on_reset(self, identity_builder_network):
        builder, network = identity_builder_network
        data = [{"x": np.ones((1, 1, 2, 2), dtype=np.float32)}]

        with tempfile.NamedTemporaryFile(mode="wb+") as cache:
            calibrator = Calibrator(data, cache=cache.name)
            # First, populate the cache
            create_config = CreateConfig(int8=True, calibrator=calibrator)
            with EngineFromNetwork((builder, network), create_config)():

            # Ensure that now the calibrator will read from the cache when reset
            assert not calibrator.has_cached_scales
            assert len(calibrator.read_calibration_cache()) == os.stat(cache.name).st_size
Example #30
 def test_multiple_profiles(self):
     model = ONNX_MODELS["dynamic_identity"]
     profile0_shapes = [(1, 2, 1, 1), (1, 2, 2, 2), (1, 2, 4, 4)]
     profile1_shapes = [(1, 2, 4, 4), (1, 2, 8, 8), (1, 2, 16, 16)]
     network_loader = NetworkFromOnnxBytes(model.loader)
     profiles = [
         Profile().add("X", *profile0_shapes),
         Profile().add("X", *profile1_shapes),
     config_loader = CreateConfig(profiles=profiles)
     with TrtRunner(EngineFromNetwork(network_loader,
                                      config_loader)) as runner:
         for index, shapes in enumerate([profile0_shapes, profile1_shapes]):
             for shape in shapes:
                 model.check_runner(runner, {"X": shape})