def test_loader_explicit_precision(self):
     builder, network, parser = func.invoke(
     with builder, network, parser:
         assert not network.has_implicit_batch_dimension
         assert network.has_explicit_precision
def main():
    # We can compose multiple lazy loaders together to get the desired conversion.
    # In this case, we want ONNX -> TensorRT Network -> TensorRT engine (w/ fp16).
    # NOTE: `build_engine` is a *callable* that returns an engine, not the engine itself.
    #   To get the engine directly, you can use the immediately evaluated functional API.
    #   See examples/api/06_immediate_eval_api for details.
    build_engine = EngineFromNetwork(
        NetworkFromOnnxPath("identity.onnx"), config=CreateConfig(
            fp16=True))  # Note that config is an optional argument.

    # To reuse the engine elsewhere, we can serialize and save it to a file.
    # The `SaveEngine` lazy loader will return the TensorRT engine when called,
    # which allows us to chain it together with other loaders.
    build_engine = SaveEngine(build_engine, path="identity.engine")

    # Once our loader is ready, inference is simply a matter of constructing a runner,
    # activating it with a context manager (i.e. `with TrtRunner(...)`) and calling `infer()`.
    # NOTE: You can use the activate() function instead of a context manager, but you will need to make sure to
    # deactivate() to avoid a memory leak. For that reason, a context manager is the safer option.
    with TrtRunner(build_engine) as runner:
        inp_data = np.ones(shape=(1, 1, 2, 2), dtype=np.float32)

        # NOTE: The runner owns the output buffers and is free to reuse them between `infer()` calls.
        # Thus, if you want to store results from multiple inferences, you should use `copy.deepcopy()`.
        outputs = runner.infer(feed_dict={"x": inp_data})

        assert np.array_equal(outputs["y"],
                              inp_data)  # It's an identity model!

        print("Inference succeeded!")
Exemple #3
def main():
    # We can provide a path or file-like object if we want to cache calibration data.
    # This lets us avoid running calibration the next time we build the engine.
    # TIP: You can use this calibrator with TensorRT APIs directly (e.g. config.int8_calibrator).
    # You don't have to use it with Polygraphy loaders if you don't want to.
    calibrator = Calibrator(data_loader=calib_data(),

    # We must enable int8 mode in addition to providing the calibrator.
    build_engine = EngineFromNetwork(NetworkFromOnnxPath("identity.onnx"),
                                         int8=True, calibrator=calibrator))

    # When we activate our runner, it will calibrate and build the engine. If we want to
    # see the logging output from TensorRT, we can temporarily increase logging verbosity:
    with G_LOGGER.verbosity(
            G_LOGGER.VERBOSE), TrtRunner(build_engine) as runner:
        # Finally, we can test out our int8 TensorRT engine with some dummy input data:
        inp_data = np.ones(shape=(1, 1, 2, 2), dtype=np.float32)

        # NOTE: The runner owns the output buffers and is free to reuse them between `infer()` calls.
        # Thus, if you want to store results from multiple inferences, you should use `copy.deepcopy()`.
        outputs = runner.infer({"x": inp_data})

        assert np.array_equal(outputs["y"],
                              inp_data)  # It's an identity model!
Exemple #4
def main():
    # The OnnxrtRunner requires an ONNX-RT session.
    # We can use the SessionFromOnnx lazy loader to construct one easily:
    build_onnxrt_session = SessionFromOnnx("identity.onnx")

    # The TrtRunner requires a TensorRT engine.
    # To create one from the ONNX model, we can chain a couple lazy loaders together:
    build_engine = EngineFromNetwork(NetworkFromOnnxPath("identity.onnx"))

    runners = [

    # `` will run each runner separately using synthetic input data and
    #   return a `RunResults` instance. See `polygraphy/comparator/` for details.
    # TIP: To use custom input data, you can set the `data_loader` parameter in ```
    #   to a generator or iterable that yields `Dict[str, np.ndarray]`.
    run_results =

    # `Comparator.compare_accuracy()` checks that outputs match between runners.
    # TIP: The `compare_func` parameter can be used to control how outputs are compared (see API reference for details).
    #   The default comparison function is created by `CompareFunc.simple()`, but we can construct it
    #   explicitly if we want to change the default parameters, such as tolerance.
    assert bool(
            run_results, compare_func=CompareFunc.simple(atol=1e-8)))

    # We can use `` method to save the inference results to a JSON file.
    # This can be useful if you want to generate and compare results separately."inference_results.json")
Exemple #5
def main():
    build_engine = EngineFromNetwork(NetworkFromOnnxPath("identity.onnx"))

    with TrtRunner(build_engine) as runner:
        for (data, golden) in zip(REAL_DATASET, EXPECTED_OUTPUTS):
            # NOTE: The runner owns the output buffers and is free to reuse them between `infer()` calls.
            # Thus, if you want to store results from multiple inferences, you should use `copy.deepcopy()`.
            outputs = runner.infer(feed_dict={"x": data})

            assert np.array_equal(outputs["y"], golden)
Exemple #6
def main():
    # The OnnxrtRunner requires an ONNX-RT session.
    # We can use the SessionFromOnnx lazy loader to construct one easily:
    build_onnxrt_session = SessionFromOnnx("identity.onnx")

    # The TrtRunner requires a TensorRT engine.
    # To create one from the ONNX model, we can chain a couple lazy loaders together:
    build_engine = EngineFromNetwork(NetworkFromOnnxPath("identity.onnx"))

    runners = [

    # `` will run each runner separately using synthetic input data and return a `RunResults` instance.
    # See `polygraphy/comparator/` for details.
    run_results =

    # `Comparator.compare_accuracy()` checks that outputs match between runners.
    assert bool(Comparator.compare_accuracy(run_results))

    # We can use `` method to save the inference results to a JSON file.
    # This can be useful if you want to generate and compare results separately."inference_results.json")
Exemple #7
from polygraphy.backend.trt import NetworkFromOnnxPath, CreateConfig, EngineFromNetwork, Calibrator, TrtRunner
from polygraphy.logger import G_LOGGER

import numpy as np
import os

MODEL = os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir, "models", "identity.onnx")
INPUT_SHAPE = (1, 1, 2, 2)

# The data loader argument to Calibrator can be any iterable or generator that yields `feed_dict`s.
# A feed_dict is just a mapping of input names to corresponding inputs (as NumPy arrays).
# Calibration will continue until our data loader runs out of data (4 batches in this example).
def calib_data():
    for _ in range(4):
        yield {"x": np.ones(shape=INPUT_SHAPE, dtype=np.float32)} # Totally real data

# We can provide a path or file-like object if we want to cache calibration data.
# This lets us avoid running calibration the next time we build the engine.
calibrator = Calibrator(data_loader=calib_data(), cache="identity-calib.cache")
build_engine = EngineFromNetwork(NetworkFromOnnxPath(MODEL), config=CreateConfig(int8=True, calibrator=calibrator))

# When we activate our runner, it will calibrate and build the engine. If we want to
# see the logging output from TensorRT, we can temporarily increase logging verbosity:
with G_LOGGER.verbosity(G_LOGGER.VERBOSE):
    with TrtRunner(build_engine) as runner:
        feed_dict = {"x": np.ones(shape=INPUT_SHAPE, dtype=np.float32)}
        outputs = runner.infer(feed_dict=feed_dict)
        assert np.all(outputs["y"] == feed_dict["x"])
Exemple #8
import numpy as np
from polygraphy.backend.trt import (EngineFromNetwork, NetworkFromOnnxPath,

INPUT_SHAPE = (1, 1, 2, 2)
REAL_DATASET = [  # Definitely real data
    np.ones(INPUT_SHAPE, dtype=np.float32),
    np.zeros(INPUT_SHAPE, dtype=np.float32),
    np.ones(INPUT_SHAPE, dtype=np.float32),
    np.zeros(INPUT_SHAPE, dtype=np.float32),

# For our identity network, the golden output values are the same as the input values.
# Though this network appears to do nothing, it can be incredibly useful in some cases (like here!).

MODEL = os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir,
                     "models", "identity.onnx")

build_engine = EngineFromNetwork(NetworkFromOnnxPath(MODEL))

# Activate the runner using a context manager. For TensorRT, this will build an engine,
# then destroy it upon exiting the context.
# NOTE: You can also use the activate() function for this, but you will need to make sure to
# deactivate() to avoid a memory leak. For that reason, a context manager is the safer option.
with TrtRunner(build_engine) as runner:
    for (data, golden) in zip(REAL_DATASET, GOLDEN_VALUES):
        outputs = runner.infer(feed_dict={"x": data})
        assert np.all(outputs["y"] == golden)
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
Parses an ONNX model, and then extends it with an Identity layer.
from polygraphy import func
from polygraphy.backend.trt import NetworkFromOnnxPath

parse_onnx = NetworkFromOnnxPath("identity.onnx")

# If we define a function called `load_network`, polygraphy can
# use it directly in place of using a model file.
# TIP: If our function isn't called `load_network`, we can explicitly specify
# the name with the `--trt-network-func-name` argument.
def load_network(builder, network, parser):
    # NOTE: func.extend() causes the signature of this function to be `() -> (builder, network, parser)`
    # For details on how this works, see examples/api/03_interoperating_with_tensorrt

    # Append an identity layer to the network
    prev_output = network.get_output(0)
Exemple #10
import os

import numpy as np
import tensorrt as trt
from polygraphy.backend.trt import (CreateConfig, EngineFromNetwork,
                                    NetworkFromOnnxPath, TrtRunner)
from polygraphy.common import func

MODEL = os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir, "models", "identity.onnx")

# We can use the `extend` decorator to easily extend loaders provided by Polygraphy
# The parameters our decorated function takes should match the return values of the loader we are extending.

# For `NetworkFromOnnxPath`, we can see from the API documentation that it returns a TensorRT
# builder, network and parser. That is what our function will receive.
def load_network(builder, network, parser):
    # Here we can modify the network. For this example, we'll just set the network name. = "MyIdentity"
    print("Network name: {:}".format(

# In case a builder configuration option is missing from Polygraphy, we can easily set it using TensorRT APIs.
# Our function will receive a TensorRT builder config since that's what `CreateConfig` returns.
def load_config(config):
    # Polygraphy supports the fp16 flag, but in case it didn't, we could do this:

# Since we have no further need of TensorRT APIs, we can come back to regular Polygraphy.
Exemple #11
import numpy as np
import tensorrt as trt
from polygraphy import func
from polygraphy.backend.trt import CreateConfig, EngineFromNetwork, NetworkFromOnnxPath, TrtRunner

# TIP: The immediately evaluated functional API makes it very easy to interoperate
# with backends like TensorRT. For details, see example 06 (`examples/api/06_immediate_eval_api`).

# We can use the `extend` decorator to easily extend lazy loaders provided by Polygraphy
# The parameters our decorated function takes should match the return values of the loader we are extending.

# For `NetworkFromOnnxPath`, we can see from the API documentation that it returns a TensorRT
# builder, network and parser. That is what our function will receive.
def load_network(builder, network, parser):
    # Here we can modify the network. For this example, we'll just set the network name. = "MyIdentity"
    print("Network name: {:}".format(

    # Notice that we don't need to return anything - `extend()` takes care of that for us!

# In case a builder configuration option is missing from Polygraphy, we can easily set it using TensorRT APIs.
# Our function will receive a TensorRT IBuilderConfig since that's what `CreateConfig` returns.
def load_config(config):
    # Polygraphy supports the fp16 flag, but in case it didn't, we could do this:
Exemple #12
 def test_loader(self):
     builder, network, parser = NetworkFromOnnxPath(ONNX_MODELS["identity"].path)()
     with builder, network, parser:
         assert not network.has_implicit_batch_dimension
         assert not network.has_explicit_precision
Exemple #13
def main():
    # A Profile maps each input tensor to a range of shapes.
    # TIP: To save lines, calls to `add` can be chained:
    #     profile.add("input0", ...).add("input1", ...)
    #   Of course, you may alternatively write this as:
    #     profile.add("input0", ...)
    #     profile.add("input1", ...)
    profiles = [
        # The low-latency case. For best performance, min == opt == max.
                      min=(1, 3, 28, 28),
                      opt=(1, 3, 28, 28),
                      max=(1, 3, 28, 28)),
        # The dynamic batching case. We use `4` for the opt batch size since that's our most common case.
                      min=(1, 3, 28, 28),
                      opt=(4, 3, 28, 28),
                      max=(32, 3, 28, 28)),
        # The offline case. For best performance, min == opt == max.
                      min=(128, 3, 28, 28),
                      opt=(128, 3, 28, 28),
                      max=(128, 3, 28, 28)),

    # See examples/api/06_immediate_eval_api for details on immediately evaluated functional loaders like `engine_from_network`.
    engine = engine_from_network(NetworkFromOnnxPath("dynamic_identity.onnx"),

    # We'll save the engine so that we can inspect it with `inspect model`.
    # This should make it easy to see how the engine bindings are laid out.
    save_engine(engine, "dynamic_identity.engine")

    # We'll create, but not activate, three separate runners, each with a separate context.
    # TIP: By providing a context directly, as opposed to via a lazy loader,
    # we can ensure that the runner will *not* take ownership of it.
    low_latency = TrtRunner(engine.create_execution_context())

    # NOTE: The following two lines will cause TensorRT to display errors since profile 0
    # is already in use by the first execution context. We'll suppress them using G_LOGGER.verbosity().
    with G_LOGGER.verbosity(G_LOGGER.CRITICAL):
        dynamic_batching = TrtRunner(engine.create_execution_context())
        offline = TrtRunner(engine.create_execution_context())
        # NOTE: We could update the profile index here (e.g. `context.active_optimization_profile = 2`),
        # but instead, we'll use TrtRunner's `set_profile()` API when we later activate the runner.

    # Finally, we can activate the runners as we need them.
    # NOTE: Since the context and engine are already created, the runner will only need to
    # allocate input and output buffers during activation.

    input_img = np.ones((1, 3, 28, 28), dtype=np.float32)  # An input "image"

    with low_latency:
        outputs = low_latency.infer({"X": input_img})
        assert np.array_equal(outputs["Y"],
                              input_img)  # It's an identity model!

        print("Low latency runner succeeded!")

        # While we're serving requests online, we might decide that we need dynamic batching
        # for a moment.
        # NOTE: We're assuming that activating runners will be cheap here, so we can bring up
        # the dynamic batching runner just-in-time.
        # TIP: If activating the runner is not cheap (e.g. input/output buffers are large),
        # it might be better to keep the runner active the whole time.
        with dynamic_batching:
            # NOTE: The very first time we activate this runner, we need to set
            # the profile index (it's 0 by default). We need to do this *only once*.
            # Alternatively, we could have set the profile index in the context directly (see above).
            )  # Use the second profile, which is intended for dynamic batching.

            # We'll create fake batches by repeating our fake input image.
            small_input_batch = np.repeat(input_img, 4,
                                          axis=0)  # Shape: (4, 3, 28, 28)
            outputs = dynamic_batching.infer({"X": small_input_batch})
            assert np.array_equal(outputs["Y"], small_input_batch)

    # If we need dynamic batching again later, we can activate the runner once more.
    # NOTE: This time, we do *not* need to set the profile.
    with dynamic_batching:
        # NOTE: We can use any shape that's in the range of the profile without
        # additional setup - Polygraphy handles the details behind the scenes!
        large_input_batch = np.repeat(input_img, 16,
                                      axis=0)  # Shape: (16, 3, 28, 28)
        outputs = dynamic_batching.infer({"X": large_input_batch})
        assert np.array_equal(outputs["Y"], large_input_batch)

        print("Dynamic batching runner succeeded!")

    with offline:
        # NOTE: We must set the profile to something other than 0 or 1 since both of those
        # are now in use by the `low_latency` and `dynamic_batching` runners respectively.
        )  # Use the third profile, which is intended for the offline case.

        large_offline_batch = np.repeat(input_img, 128,
                                        axis=0)  # Shape: (128, 3, 28, 28)
        outputs = offline.infer({"X": large_offline_batch})
        assert np.array_equal(outputs["Y"], large_offline_batch)

        print("Offline runner succeeded!")
from polygraphy.backend.onnxrt import OnnxrtRunner, SessionFromOnnx
from polygraphy.backend.trt import CreateConfig as CreateTrtConfig, EngineFromNetwork, NetworkFromOnnxPath, Profile, SaveEngine, TrtRunner
from polygraphy.common import TensorMetadata
from polygraphy.comparator import Comparator, CompareFunc, DataLoader
import sys

# Data Loader
data_loader = DataLoader(
    input_metadata=TensorMetadata().add('tensor-0', None, (4, 1, 28, 28)))

# Loaders
build_onnxrt_session = SessionFromOnnx(
parse_network_from_onnx = NetworkFromOnnxPath(
profiles = [
                  min=[1, 1, 28, 28],
                  opt=[4, 1, 28, 28],
                  max=[16, 1, 28, 28])
create_trt_config = CreateTrtConfig(max_workspace_size=1000000000,
build_engine = EngineFromNetwork(parse_network_from_onnx,
save_engine = SaveEngine(build_engine, path='model-FP32.plan')

# Runners
runners = [