Python NetworkFromOnnxPath Beispiele, polygraphy.backend.trt.NetworkFromOnnxPath Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: test_loader.py Projekt: celidos/TensorRT_study

 def test_loader_explicit_precision(self):
     builder, network, parser = func.invoke(
         NetworkFromOnnxPath(ONNX_MODELS["identity"].path,
                             explicit_precision=True))
     with builder, network, parser:
         assert not network.has_implicit_batch_dimension
         assert network.has_explicit_precision

Beispiel #2

0

Datei anzeigen

Datei: build_and_run.py Projekt: phongphuhanam/TensorRT

def main():
    # We can compose multiple lazy loaders together to get the desired conversion.
    # In this case, we want ONNX -> TensorRT Network -> TensorRT engine (w/ fp16).
    #
    # NOTE: `build_engine` is a *callable* that returns an engine, not the engine itself.
    #   To get the engine directly, you can use the immediately evaluated functional API.
    #   See examples/api/06_immediate_eval_api for details.
    build_engine = EngineFromNetwork(
        NetworkFromOnnxPath("identity.onnx"), config=CreateConfig(
            fp16=True))  # Note that config is an optional argument.

    # To reuse the engine elsewhere, we can serialize and save it to a file.
    # The `SaveEngine` lazy loader will return the TensorRT engine when called,
    # which allows us to chain it together with other loaders.
    build_engine = SaveEngine(build_engine, path="identity.engine")

    # Once our loader is ready, inference is simply a matter of constructing a runner,
    # activating it with a context manager (i.e. `with TrtRunner(...)`) and calling `infer()`.
    #
    # NOTE: You can use the activate() function instead of a context manager, but you will need to make sure to
    # deactivate() to avoid a memory leak. For that reason, a context manager is the safer option.
    with TrtRunner(build_engine) as runner:
        inp_data = np.ones(shape=(1, 1, 2, 2), dtype=np.float32)

        # NOTE: The runner owns the output buffers and is free to reuse them between `infer()` calls.
        # Thus, if you want to store results from multiple inferences, you should use `copy.deepcopy()`.
        outputs = runner.infer(feed_dict={"x": inp_data})

        assert np.array_equal(outputs["y"],
                              inp_data)  # It's an identity model!

        print("Inference succeeded!")

Beispiel #3

0

Datei anzeigen

Datei: example.py Projekt: stjordanis/TensorRT

def main():
    # We can provide a path or file-like object if we want to cache calibration data.
    # This lets us avoid running calibration the next time we build the engine.
    #
    # TIP: You can use this calibrator with TensorRT APIs directly (e.g. config.int8_calibrator).
    # You don't have to use it with Polygraphy loaders if you don't want to.
    calibrator = Calibrator(data_loader=calib_data(),
                            cache="identity-calib.cache")

    # We must enable int8 mode in addition to providing the calibrator.
    build_engine = EngineFromNetwork(NetworkFromOnnxPath("identity.onnx"),
                                     config=CreateConfig(
                                         int8=True, calibrator=calibrator))

    # When we activate our runner, it will calibrate and build the engine. If we want to
    # see the logging output from TensorRT, we can temporarily increase logging verbosity:
    with G_LOGGER.verbosity(
            G_LOGGER.VERBOSE), TrtRunner(build_engine) as runner:
        # Finally, we can test out our int8 TensorRT engine with some dummy input data:
        inp_data = np.ones(shape=(1, 1, 2, 2), dtype=np.float32)

        # NOTE: The runner owns the output buffers and is free to reuse them between `infer()` calls.
        # Thus, if you want to store results from multiple inferences, you should use `copy.deepcopy()`.
        outputs = runner.infer({"x": inp_data})

        assert np.array_equal(outputs["y"],
                              inp_data)  # It's an identity model!

Beispiel #4

0

Datei anzeigen

Datei: example.py Projekt: phongphuhanam/TensorRT

def main():
    # The OnnxrtRunner requires an ONNX-RT session.
    # We can use the SessionFromOnnx lazy loader to construct one easily:
    build_onnxrt_session = SessionFromOnnx("identity.onnx")

    # The TrtRunner requires a TensorRT engine.
    # To create one from the ONNX model, we can chain a couple lazy loaders together:
    build_engine = EngineFromNetwork(NetworkFromOnnxPath("identity.onnx"))

    runners = [
        TrtRunner(build_engine),
        OnnxrtRunner(build_onnxrt_session),
    ]

    # `Comparator.run()` will run each runner separately using synthetic input data and
    #   return a `RunResults` instance. See `polygraphy/comparator/struct.py` for details.
    #
    # TIP: To use custom input data, you can set the `data_loader` parameter in `Comparator.run()``
    #   to a generator or iterable that yields `Dict[str, np.ndarray]`.
    run_results = Comparator.run(runners)

    # `Comparator.compare_accuracy()` checks that outputs match between runners.
    #
    # TIP: The `compare_func` parameter can be used to control how outputs are compared (see API reference for details).
    #   The default comparison function is created by `CompareFunc.simple()`, but we can construct it
    #   explicitly if we want to change the default parameters, such as tolerance.
    assert bool(
        Comparator.compare_accuracy(
            run_results, compare_func=CompareFunc.simple(atol=1e-8)))

    # We can use `RunResults.save()` method to save the inference results to a JSON file.
    # This can be useful if you want to generate and compare results separately.
    run_results.save("inference_results.json")

Beispiel #5

0

Datei anzeigen

def main():
    build_engine = EngineFromNetwork(NetworkFromOnnxPath("identity.onnx"))

    with TrtRunner(build_engine) as runner:
        for (data, golden) in zip(REAL_DATASET, EXPECTED_OUTPUTS):
            # NOTE: The runner owns the output buffers and is free to reuse them between `infer()` calls.
            # Thus, if you want to store results from multiple inferences, you should use `copy.deepcopy()`.
            outputs = runner.infer(feed_dict={"x": data})

            assert np.array_equal(outputs["y"], golden)

Beispiel #6

0

Datei anzeigen

Datei: example.py Projekt: stjordanis/TensorRT

def main():
    # The OnnxrtRunner requires an ONNX-RT session.
    # We can use the SessionFromOnnx lazy loader to construct one easily:
    build_onnxrt_session = SessionFromOnnx("identity.onnx")

    # The TrtRunner requires a TensorRT engine.
    # To create one from the ONNX model, we can chain a couple lazy loaders together:
    build_engine = EngineFromNetwork(NetworkFromOnnxPath("identity.onnx"))

    runners = [
        TrtRunner(build_engine),
        OnnxrtRunner(build_onnxrt_session),
    ]

    # `Comparator.run()` will run each runner separately using synthetic input data and return a `RunResults` instance.
    # See `polygraphy/comparator/struct.py` for details.
    run_results = Comparator.run(runners)

    # `Comparator.compare_accuracy()` checks that outputs match between runners.
    assert bool(Comparator.compare_accuracy(run_results))

    # We can use `RunResults.save()` method to save the inference results to a JSON file.
    # This can be useful if you want to generate and compare results separately.
    run_results.save("inference_results.json")

Beispiel #7

0

Datei anzeigen

Datei: example.py Projekt: leo-XUKANG/TensorRT-1

"""
from polygraphy.backend.trt import NetworkFromOnnxPath, CreateConfig, EngineFromNetwork, Calibrator, TrtRunner
from polygraphy.logger import G_LOGGER

import numpy as np
import os


MODEL = os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir, "models", "identity.onnx")
INPUT_SHAPE = (1, 1, 2, 2)

# The data loader argument to Calibrator can be any iterable or generator that yields `feed_dict`s.
# A feed_dict is just a mapping of input names to corresponding inputs (as NumPy arrays).
# Calibration will continue until our data loader runs out of data (4 batches in this example).
def calib_data():
    for _ in range(4):
        yield {"x": np.ones(shape=INPUT_SHAPE, dtype=np.float32)} # Totally real data

# We can provide a path or file-like object if we want to cache calibration data.
# This lets us avoid running calibration the next time we build the engine.
calibrator = Calibrator(data_loader=calib_data(), cache="identity-calib.cache")
build_engine = EngineFromNetwork(NetworkFromOnnxPath(MODEL), config=CreateConfig(int8=True, calibrator=calibrator))

# When we activate our runner, it will calibrate and build the engine. If we want to
# see the logging output from TensorRT, we can temporarily increase logging verbosity:
with G_LOGGER.verbosity(G_LOGGER.VERBOSE):
    with TrtRunner(build_engine) as runner:
        feed_dict = {"x": np.ones(shape=INPUT_SHAPE, dtype=np.float32)}
        outputs = runner.infer(feed_dict=feed_dict)
        assert np.all(outputs["y"] == feed_dict["x"])

Beispiel #8

0

Datei anzeigen

Datei: example.py Projekt: celidos/TensorRT_study

import numpy as np
from polygraphy.backend.trt import (EngineFromNetwork, NetworkFromOnnxPath,
                                    TrtRunner)

INPUT_SHAPE = (1, 1, 2, 2)
REAL_DATASET = [  # Definitely real data
    np.ones(INPUT_SHAPE, dtype=np.float32),
    np.zeros(INPUT_SHAPE, dtype=np.float32),
    np.ones(INPUT_SHAPE, dtype=np.float32),
    np.zeros(INPUT_SHAPE, dtype=np.float32),
]

# For our identity network, the golden output values are the same as the input values.
# Though this network appears to do nothing, it can be incredibly useful in some cases (like here!).
GOLDEN_VALUES = REAL_DATASET

MODEL = os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir,
                     "models", "identity.onnx")

build_engine = EngineFromNetwork(NetworkFromOnnxPath(MODEL))

# Activate the runner using a context manager. For TensorRT, this will build an engine,
# then destroy it upon exiting the context.
# NOTE: You can also use the activate() function for this, but you will need to make sure to
# deactivate() to avoid a memory leak. For that reason, a context manager is the safer option.
with TrtRunner(build_engine) as runner:
    for (data, golden) in zip(REAL_DATASET, GOLDEN_VALUES):
        outputs = runner.infer(feed_dict={"x": data})
        assert np.all(outputs["y"] == golden)

Beispiel #9

0

Datei anzeigen

Datei: define_network.py Projekt: stjordanis/TensorRT

#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Parses an ONNX model, and then extends it with an Identity layer.
"""
from polygraphy import func
from polygraphy.backend.trt import NetworkFromOnnxPath

parse_onnx = NetworkFromOnnxPath("identity.onnx")

# If we define a function called `load_network`, polygraphy can
# use it directly in place of using a model file.
#
# TIP: If our function isn't called `load_network`, we can explicitly specify
# the name with the `--trt-network-func-name` argument.
@func.extend(parse_onnx)
def load_network(builder, network, parser):
    # NOTE: func.extend() causes the signature of this function to be `() -> (builder, network, parser)`
    # For details on how this works, see examples/api/03_interoperating_with_tensorrt

    # Append an identity layer to the network
    prev_output = network.get_output(0)
    network.unmark_output(prev_output)

Beispiel #10

0

Datei anzeigen

Datei: example.py Projekt: celidos/TensorRT_study

import os

import numpy as np
import tensorrt as trt
from polygraphy.backend.trt import (CreateConfig, EngineFromNetwork,
                                    NetworkFromOnnxPath, TrtRunner)
from polygraphy.common import func

MODEL = os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir, "models", "identity.onnx")

# We can use the `extend` decorator to easily extend loaders provided by Polygraphy
# The parameters our decorated function takes should match the return values of the loader we are extending.

# For `NetworkFromOnnxPath`, we can see from the API documentation that it returns a TensorRT
# builder, network and parser. That is what our function will receive.
@func.extend(NetworkFromOnnxPath(MODEL))
def load_network(builder, network, parser):
    # Here we can modify the network. For this example, we'll just set the network name.
    network.name = "MyIdentity"
    print("Network name: {:}".format(network.name))


# In case a builder configuration option is missing from Polygraphy, we can easily set it using TensorRT APIs.
# Our function will receive a TensorRT builder config since that's what `CreateConfig` returns.
@func.extend(CreateConfig())
def load_config(config):
    # Polygraphy supports the fp16 flag, but in case it didn't, we could do this:
    config.set_flag(trt.BuilderFlag.FP16)


# Since we have no further need of TensorRT APIs, we can come back to regular Polygraphy.

Beispiel #11

0

Datei anzeigen

"""
import numpy as np
import tensorrt as trt
from polygraphy import func
from polygraphy.backend.trt import CreateConfig, EngineFromNetwork, NetworkFromOnnxPath, TrtRunner


# TIP: The immediately evaluated functional API makes it very easy to interoperate
# with backends like TensorRT. For details, see example 06 (`examples/api/06_immediate_eval_api`).

# We can use the `extend` decorator to easily extend lazy loaders provided by Polygraphy
# The parameters our decorated function takes should match the return values of the loader we are extending.

# For `NetworkFromOnnxPath`, we can see from the API documentation that it returns a TensorRT
# builder, network and parser. That is what our function will receive.
@func.extend(NetworkFromOnnxPath("identity.onnx"))
def load_network(builder, network, parser):
    # Here we can modify the network. For this example, we'll just set the network name.
    network.name = "MyIdentity"
    print("Network name: {:}".format(network.name))

    # Notice that we don't need to return anything - `extend()` takes care of that for us!


# In case a builder configuration option is missing from Polygraphy, we can easily set it using TensorRT APIs.
# Our function will receive a TensorRT IBuilderConfig since that's what `CreateConfig` returns.
@func.extend(CreateConfig())
def load_config(config):
    # Polygraphy supports the fp16 flag, but in case it didn't, we could do this:
    config.set_flag(trt.BuilderFlag.FP16)

Beispiel #12

0

Datei anzeigen

Datei: test_loader.py Projekt: leo-XUKANG/TensorRT-1

 def test_loader(self):
     builder, network, parser = NetworkFromOnnxPath(ONNX_MODELS["identity"].path)()
     with builder, network, parser:
         assert not network.has_implicit_batch_dimension
         assert not network.has_explicit_precision

Beispiel #13

0

Datei anzeigen

def main():
    # A Profile maps each input tensor to a range of shapes.
    #
    # TIP: To save lines, calls to `add` can be chained:
    #     profile.add("input0", ...).add("input1", ...)
    #
    #   Of course, you may alternatively write this as:
    #     profile.add("input0", ...)
    #     profile.add("input1", ...)
    #
    profiles = [
        # The low-latency case. For best performance, min == opt == max.
        Profile().add("X",
                      min=(1, 3, 28, 28),
                      opt=(1, 3, 28, 28),
                      max=(1, 3, 28, 28)),
        # The dynamic batching case. We use `4` for the opt batch size since that's our most common case.
        Profile().add("X",
                      min=(1, 3, 28, 28),
                      opt=(4, 3, 28, 28),
                      max=(32, 3, 28, 28)),
        # The offline case. For best performance, min == opt == max.
        Profile().add("X",
                      min=(128, 3, 28, 28),
                      opt=(128, 3, 28, 28),
                      max=(128, 3, 28, 28)),
    ]

    # See examples/api/06_immediate_eval_api for details on immediately evaluated functional loaders like `engine_from_network`.
    engine = engine_from_network(NetworkFromOnnxPath("dynamic_identity.onnx"),
                                 config=CreateConfig(profiles=profiles))

    # We'll save the engine so that we can inspect it with `inspect model`.
    # This should make it easy to see how the engine bindings are laid out.
    save_engine(engine, "dynamic_identity.engine")

    # We'll create, but not activate, three separate runners, each with a separate context.
    #
    # TIP: By providing a context directly, as opposed to via a lazy loader,
    # we can ensure that the runner will *not* take ownership of it.
    #
    low_latency = TrtRunner(engine.create_execution_context())

    # NOTE: The following two lines will cause TensorRT to display errors since profile 0
    # is already in use by the first execution context. We'll suppress them using G_LOGGER.verbosity().
    #
    with G_LOGGER.verbosity(G_LOGGER.CRITICAL):
        dynamic_batching = TrtRunner(engine.create_execution_context())
        offline = TrtRunner(engine.create_execution_context())
        # NOTE: We could update the profile index here (e.g. `context.active_optimization_profile = 2`),
        # but instead, we'll use TrtRunner's `set_profile()` API when we later activate the runner.

    # Finally, we can activate the runners as we need them.
    #
    # NOTE: Since the context and engine are already created, the runner will only need to
    # allocate input and output buffers during activation.

    input_img = np.ones((1, 3, 28, 28), dtype=np.float32)  # An input "image"

    with low_latency:
        outputs = low_latency.infer({"X": input_img})
        assert np.array_equal(outputs["Y"],
                              input_img)  # It's an identity model!

        print("Low latency runner succeeded!")

        # While we're serving requests online, we might decide that we need dynamic batching
        # for a moment.
        #
        # NOTE: We're assuming that activating runners will be cheap here, so we can bring up
        # the dynamic batching runner just-in-time.
        #
        # TIP: If activating the runner is not cheap (e.g. input/output buffers are large),
        # it might be better to keep the runner active the whole time.
        #
        with dynamic_batching:
            # NOTE: The very first time we activate this runner, we need to set
            # the profile index (it's 0 by default). We need to do this *only once*.
            # Alternatively, we could have set the profile index in the context directly (see above).
            #
            dynamic_batching.set_profile(
                1
            )  # Use the second profile, which is intended for dynamic batching.

            # We'll create fake batches by repeating our fake input image.
            small_input_batch = np.repeat(input_img, 4,
                                          axis=0)  # Shape: (4, 3, 28, 28)
            outputs = dynamic_batching.infer({"X": small_input_batch})
            assert np.array_equal(outputs["Y"], small_input_batch)

    # If we need dynamic batching again later, we can activate the runner once more.
    #
    # NOTE: This time, we do *not* need to set the profile.
    #
    with dynamic_batching:
        # NOTE: We can use any shape that's in the range of the profile without
        # additional setup - Polygraphy handles the details behind the scenes!
        #
        large_input_batch = np.repeat(input_img, 16,
                                      axis=0)  # Shape: (16, 3, 28, 28)
        outputs = dynamic_batching.infer({"X": large_input_batch})
        assert np.array_equal(outputs["Y"], large_input_batch)

        print("Dynamic batching runner succeeded!")

    with offline:
        # NOTE: We must set the profile to something other than 0 or 1 since both of those
        # are now in use by the `low_latency` and `dynamic_batching` runners respectively.
        #
        offline.set_profile(
            2
        )  # Use the third profile, which is intended for the offline case.

        large_offline_batch = np.repeat(input_img, 128,
                                        axis=0)  # Shape: (128, 3, 28, 28)
        outputs = offline.infer({"X": large_offline_batch})
        assert np.array_equal(outputs["Y"], large_offline_batch)

        print("Offline runner succeeded!")

Beispiel #14

0

Datei anzeigen

Datei: polygraphyRun.py Projekt: NVIDIA/trt-samples-for-hackathon-cn

from polygraphy.backend.onnxrt import OnnxrtRunner, SessionFromOnnx
from polygraphy.backend.trt import CreateConfig as CreateTrtConfig, EngineFromNetwork, NetworkFromOnnxPath, Profile, SaveEngine, TrtRunner
from polygraphy.common import TensorMetadata
from polygraphy.comparator import Comparator, CompareFunc, DataLoader
import sys

# Data Loader
data_loader = DataLoader(
    input_metadata=TensorMetadata().add('tensor-0', None, (4, 1, 28, 28)))

# Loaders
build_onnxrt_session = SessionFromOnnx(
    '/work/gitlab/tensorrt-cookbook-in-chinese/08-Tool/Polygraphy/runExample/model.onnx'
)
parse_network_from_onnx = NetworkFromOnnxPath(
    '/work/gitlab/tensorrt-cookbook-in-chinese/08-Tool/Polygraphy/runExample/model.onnx'
)
profiles = [
    Profile().add('tensor-0',
                  min=[1, 1, 28, 28],
                  opt=[4, 1, 28, 28],
                  max=[16, 1, 28, 28])
]
create_trt_config = CreateTrtConfig(max_workspace_size=1000000000,
                                    profiles=profiles)
build_engine = EngineFromNetwork(parse_network_from_onnx,
                                 config=create_trt_config)
save_engine = SaveEngine(build_engine, path='model-FP32.plan')

# Runners
runners = [