class TestProfile(object): def test_can_add(self): profile = Profile() min, opt, max = (1, 1), (2, 2), (4, 4) assert profile.add("input", min=min, opt=opt, max=max) is profile shape_tuple = profile["input"] assert shape_tuple.min == min assert shape_tuple.opt == opt assert shape_tuple.max == max @pytest.mark.skipif(mod.version(trt.__version__) < mod.version("7.0"), reason="Unsupported for TRT 6") def test_fill_defaults_does_not_overwrite(self, dynamic_identity_network): _, network, _ = dynamic_identity_network profile = Profile().add("X", (1, 1, 1, 1), (1, 1, 2, 2), (1, 1, 3, 3)) profile.fill_defaults(network) is profile assert profile["X"].min == (1, 1, 1, 1) assert profile["X"].opt == (1, 1, 2, 2) assert profile["X"].max == (1, 1, 3, 3) @pytest.mark.skipif(mod.version(trt.__version__) < mod.version("7.0"), reason="Unsupported for TRT 6") def test_to_trt(self, dynamic_identity_network): builder, network, _ = dynamic_identity_network profile = Profile().add("X", (1, 2, 1, 1), (1, 2, 2, 2), (1, 2, 4, 4)) trt_profile = profile.to_trt(builder, network) trt_profile.get_shape("X") == ((1, 2, 1, 1), (1, 2, 2, 2), (1, 2, 4, 4))
def get_base_selector_type(): ALGO_SELECTOR_ENABLED = False if mod.version(trt.__version__) >= mod.version("8.0"): ALGO_SELECTOR_ENABLED = True IAlgorithmSelector = trt.IAlgorithmSelector else: IAlgorithmSelector = object class BaseSelector(IAlgorithmSelector): def __init__(self, data): if not ALGO_SELECTOR_ENABLED: trt_util.fail_unavailable("Algorithm selector") # Must explicitly initialize parent for any trampoline class! Will mysteriously segfault without this. IAlgorithmSelector.__init__(self) self.path = None self.data = TacticReplayData() if isinstance(data, TacticReplayData): self.data = data else: self.path = data def select_algorithms(self, context, choices): return list(range(len(choices))) return BaseSelector
def test_int8_calibration_cache(self): with tempfile.NamedTemporaryFile() as outpath: cmd = [ONNX_MODELS["identity"].path, "--trt", "--int8", "--calibration-cache", outpath.name] if mod.version(trt.__version__) >= mod.version("7.0"): cmd += ["--onnxrt"] run_polygraphy_run(cmd) check_file_non_empty(outpath.name)
class TestOnnxLikeFromNetwork(object): @pytest.mark.skipif(mod.version(trt.__version__) < mod.version("7.2"), reason="Unsupported for TRT 7.1 and older") @pytest.mark.parametrize( "model_name", ["identity", "empty_tensor_expand", "const_foldable", "and", "scan", "dim_param", "tensor_attr"] ) def test_onnx_like_from_network(self, model_name): assert onnx_like_from_network(NetworkFromOnnxBytes(ONNX_MODELS[model_name].loader))
def test_loader_explicit_precision(self): builder, network, parser = network_from_onnx_path( ONNX_MODELS["identity"].path, explicit_precision=True) with builder, network, parser: assert not network.has_implicit_batch_dimension if mod.version(trt.__version__) < mod.version("8.0"): assert network.has_explicit_precision
class TestTrtLegacy(object): def test_uff(self): run_polygraphy_run([TF_MODELS["identity"].path, "--trt-legacy"]) @pytest.mark.skipif(mod.version(trt.__version__) >= mod.version("7.0"), reason="Unsupported in TRT 7.0 and later") def test_onnx(self): run_polygraphy_run([ONNX_MODELS["identity"].path, "--trt-legacy"])
def test_api_examples(example): if mod.version(trt.__version__) < mod.version("8.0") and ( example.path.endswith("07_tensorrt_and_dynamic_shapes")): pytest.skip("Not intended for older versions of TRT") with example as commands: for command in commands: example.run(command)
def test_int8_calibration_base_class(self, base_class): cmd = [ ONNX_MODELS["identity"].path, "--trt", "--int8", "--calibration-base-class", base_class ] if mod.version(trt.__version__) >= mod.version("7.0"): cmd += ["--onnxrt"] run_polygraphy_run()
def test_precision_flags(self, trt_config_args, arg, flag): if flag == "TF32" and mod.version(trt.__version__) < mod.version("7.1"): pytest.skip("TF32 support was added in 7.1") trt_config_args.parse_args([arg]) builder, network = create_network() with builder, network, trt_config_args.create_config(builder, network=network) as config: assert config.get_flag(getattr(trt.BuilderFlag, flag))
def test_cli_examples(example): if mod.version(trt.__version__) < mod.version( "8.0") and example.path.endswith("01_debugging_flaky_trt_tactics"): pytest.skip( "Tactic replays are not supported on older versions of TRT") with example as commands: for command in commands: example.run(command)
class TestPluginRef(object): def test_basic(self): run_polygraphy_run([ONNX_MODELS["identity"].path, "--pluginref"]) @pytest.mark.skipif(mod.version(trt.__version__) < mod.version("7.0"), reason="Unsupported for TRT 6") @pytest.mark.parametrize("model", ["identity", "instancenorm"]) def test_ref_implementations(self, model): run_polygraphy_run( [ONNX_MODELS[model].path, "--pluginref", "--onnxrt", "--trt"])
def test_model_trt_sanity(self, run_inspect_model, model): import tensorrt as trt if model == "tensor_attr" and mod.version(trt.__version__) < mod.version("7.2"): pytest.skip("Models with constant outputs were not supported before 7.2") if model == "scan" and mod.version(trt.__version__) < mod.version("7.0"): pytest.skip("Scan was not supported until 7.0") run_inspect_model([ONNX_MODELS[model].path, "--display-as=trt"])
def test_can_automatically_install_deps(self, virtualenv_with_poly, cmd): if "--trt" in cmd and mod.version( trt.__version__) < mod.version("7.0"): pytest.skip("TRT 6 container has an old version of CUDA") virtualenv_with_poly.env["POLYGRAPHY_AUTOINSTALL_DEPS"] = "1" POLYGRAPHY_BIN = os.path.join(ROOT_DIR, "bin", "polygraphy") output = virtualenv_with_poly.run(["python3", POLYGRAPHY_BIN] + cmd, capture=True) print(output) assert "is required, but not installed. Attempting to install now" in output
def test_cli_examples(example): if mod.version(trt.__version__) < mod.version("8.0") and ( example.path.endswith("01_debugging_flaky_trt_tactics") or example.path.endswith("02_deterministic_engine_builds_in_tensorrt") ): pytest.skip( "Tactic replays are not supported on older versions of TRT") with example as commands: for command in commands: example.run(command)
class TestConvertToOnnxLikeTrt(object): @pytest.mark.skipif(mod.version(trt.__version__) < mod.version("7.2"), reason="Unsupported for TRT 7.1 and older") @pytest.mark.parametrize("model_name", [ "identity", "empty_tensor_expand", "const_foldable", "and", "scan", "dim_param", "tensor_attr" ]) def test_onnx_to_trt_to_onnx_like(self, model_name): with util.NamedTemporaryFile() as outmodel: run_polygraphy_convert([ ONNX_MODELS[model_name].path, "--convert-to=onnx-like-trt-network", "-o", outmodel.name ])
class TestCapability(object): @pytest.mark.skipif( mod.version(trt.__version__) < mod.version("8.0"), reason="supports_model API not available before TRT 8.0" ) @pytest.mark.parametrize("case", TEST_CAPABILITY_CASES, ids=lambda case: case[0]) def test_capability(self, case): model, expected_files, expected_summary = case with tempfile.TemporaryDirectory() as outdir: status = run_polygraphy_inspect( ["capability", ONNX_MODELS[model].path, "-o", os.path.join(outdir, "subgraphs")], ) assert sorted(map(os.path.basename, glob.glob(os.path.join(outdir, "subgraphs", "**")))) == sorted( expected_files ) assert dedent(expected_summary).strip() in status.stdout
def test_calibrator_basic(self, identity_builder_network, BaseClass): if mod.version(trt.__version__) < mod.version( "7.0") and BaseClass == trt.IInt8LegacyCalibrator: pytest.skip("Bug in TRT 6 causes NaNs with legacy calibrator") builder, network = identity_builder_network NUM_BATCHES = 2 data = [{"x": np.ones((1, 1, 2, 2), dtype=np.float32)}] * NUM_BATCHES calibrator = Calibrator(data, BaseClass=BaseClass) create_config = CreateConfig(int8=True, calibrator=calibrator) with engine_from_network((builder, network), create_config): assert calibrator.num_batches == NUM_BATCHES self.check_calibrator_cleanup(calibrator)
def test_can_automatically_install_deps(self, virtualenv_with_poly, cmd): if "--trt" in cmd and mod.version( trt.__version__) < mod.version("7.0"): pytest.skip("TRT 6 container has an old version of CUDA") if "--trt" in cmd: pytest.xfail("TensorRT 8.0.1.6 wheels are currently broken") virtualenv_with_poly.env["POLYGRAPHY_AUTOINSTALL_DEPS"] = "1" POLYGRAPHY_BIN = os.path.join(ROOT_DIR, "bin", "polygraphy") cmd = ["python3", POLYGRAPHY_BIN] + cmd print("Running: {:}".format(" ".join(cmd))) output = virtualenv_with_poly.run(cmd, capture=True) print(output) assert "is required, but not installed. Attempting to install now" in output
class TestModifyNetwork(object): def test_mark_layerwise(self, modifiable_network): load_network = ModifyNetworkOutputs(modifiable_network, outputs=constants.MARK_ALL) builder, network, parser = load_network() with builder, network, parser: for layer in network: for index in range(layer.num_outputs): assert layer.get_output(index).is_network_output def test_mark_custom_outputs(self, modifiable_network): builder, network, parser = modify_network_outputs( modifiable_network, outputs=["identity_out_0"]) with builder, network, parser: assert network.num_outputs == 1 assert network.get_output(0).name == "identity_out_0" def test_exclude_outputs_with_mark_layerwise(self, modifiable_network): builder, network, parser = modify_network_outputs( modifiable_network, outputs=constants.MARK_ALL, exclude_outputs=["identity_out_2"]) with builder, network, parser: assert network.num_outputs == 1 assert network.get_output(0).name == "identity_out_0" @pytest.mark.skipif(mod.version(trt.__version__) < mod.version("7.0"), reason="Unsupported for TRT 6") def test_mark_shape_outputs(self, modifiable_reshape_network): builder, network, parser = modify_network_outputs( modifiable_reshape_network, outputs=["output", "reduce_prod_out_gs_2"]) with builder, network, parser: assert network.num_outputs == 2 assert network.get_output(0).name == "reduce_prod_out_gs_2" assert network.get_output(0).is_shape_tensor @pytest.mark.skipif(mod.version(trt.__version__) < mod.version("7.0"), reason="Unsupported for TRT 6") def test_unmark_shape_outputs(self, modifiable_reshape_network): builder, network, parser = modify_network_outputs( modifiable_reshape_network, outputs=constants.MARK_ALL, exclude_outputs=["reduce_prod_out_gs_2"]) with builder, network, parser: assert network.num_outputs == 1
def call_impl(self): """ Returns: (trt.IBuilder, trt.INetworkDefinition, trt.OnnxParser): A TensorRT network, as well as the builder used to create it, and the parser used to populate it. """ path = util.invoke_if_callable(self.path)[0] if mod.version(trt.__version__) >= mod.version("7.1"): with util.FreeOnException(super().call_impl()) as (builder, network, parser): # We need to use parse_from_file for the ONNX parser to keep track of the location of the ONNX file for # potentially parsing any external weights. success = parser.parse_from_file(path) trt_util.check_onnx_parser_errors(parser, success) return builder, network, parser else: from polygraphy.backend.common import bytes_from_path return network_from_onnx_bytes(bytes_from_path(path), self.explicit_precision)
def test_dim_param_trt_onnxrt(self): load_onnx_bytes = ONNX_MODELS["dim_param"].loader build_onnxrt_session = SessionFromOnnx(load_onnx_bytes) load_engine = EngineFromNetwork(NetworkFromOnnxBytes(load_onnx_bytes)) runners = [ OnnxrtRunner(build_onnxrt_session), TrtRunner(load_engine), ] run_results = Comparator.run(runners) compare_func = CompareFunc.basic_compare_func( check_shapes=mod.version(trt.__version__) >= mod.version("7.0")) assert bool( Comparator.compare_accuracy(run_results, compare_func=compare_func)) assert len(list( run_results.values())[0]) == 1 # Default number of iterations
def test_calibrator_outside_polygraphy(self, identity_builder_network): builder, network = identity_builder_network NUM_BATCHES = 2 config = builder.create_builder_config() config.set_flag(trt.BuilderFlag.INT8) with Calibrator(generate_data(NUM_BATCHES)) as calibrator: config.int8_calibrator = calibrator if mod.version(trt.__version__) < mod.version("8.0"): engine = builder.build_engine(network, config) else: with trt.Runtime(get_trt_logger()) as runtime: engine = runtime.deserialize_cuda_engine( builder.build_serialized_network(network, config)) with engine: assert engine self.check_calibrator_cleanup(calibrator)
def test_defaults(self, identity_builder_network): builder, network = identity_builder_network loader = CreateConfig() assert loader.timing_cache_path is None with loader(builder, network) as config: assert config.max_workspace_size == 1 << 24 with contextlib.suppress(AttributeError): assert not config.get_flag(trt.BuilderFlag.TF32) with contextlib.suppress(AttributeError): assert not config.get_flag(trt.BuilderFlag.SPARSE_WEIGHTS) assert not config.get_flag(trt.BuilderFlag.FP16) assert not config.get_flag(trt.BuilderFlag.INT8) assert config.num_optimization_profiles == 1 assert config.int8_calibrator is None with contextlib.suppress(AttributeError): if mod.version(trt.__version__) < mod.version("8.0"): assert config.get_tactic_sources() == 3 else: assert config.get_tactic_sources() == 7
def test_multiple_runners(self): load_tf = TF_MODELS["identity"].loader build_tf_session = SessionFromGraph(load_tf) load_serialized_onnx = BytesFromOnnx(OnnxFromTfGraph(load_tf)) build_onnxrt_session = SessionFromOnnx(load_serialized_onnx) load_engine = EngineFromNetwork( NetworkFromOnnxBytes(load_serialized_onnx)) runners = [ TfRunner(build_tf_session), OnnxrtRunner(build_onnxrt_session), TrtRunner(load_engine), ] run_results = Comparator.run(runners) compare_func = CompareFunc.basic_compare_func( check_shapes=mod.version(trt.__version__) >= mod.version("7.0")) assert bool( Comparator.compare_accuracy(run_results, compare_func=compare_func)) assert len(list( run_results.values())[0]) == 1 # Default number of iterations
class TestEngineFromNetwork(object): def test_defaults(self, identity_network): loader = EngineFromNetwork(identity_network) assert loader.timing_cache_path is None def test_can_build_with_parser_owning(self, identity_network): loader = EngineFromNetwork(identity_network) with loader(): pass def test_can_build_without_parser_non_owning(self, identity_builder_network): builder, network = identity_builder_network loader = EngineFromNetwork((builder, network)) with loader(): pass def test_can_build_with_calibrator(self, identity_builder_network): builder, network = identity_builder_network calibrator = Calibrator(DataLoader()) create_config = CreateConfig(int8=True, calibrator=calibrator) loader = EngineFromNetwork((builder, network), create_config) with loader(): pass # Calibrator buffers should be freed after the build assert all([ buf.allocated_nbytes == 0 for buf in calibrator.device_buffers.values() ]) @pytest.mark.skipif(mod.version(trt.__version__) < mod.version("8.0"), reason="Unsupported for TRT 7.2 and older") @pytest.mark.parametrize("path_mode", [True, False], ids=["path", "file-like"]) def test_timing_cache_generate_and_append(self, path_mode): with tempfile.NamedTemporaryFile( ) as total_cache, tempfile.NamedTemporaryFile() as identity_cache: def build_engine(model, cache): if not path_mode: cache.seek(0) network_loader = NetworkFromOnnxBytes( ONNX_MODELS[model].loader) # In non-path_mode, use the file-like object directly. # Must load the cache with CreateConfig so that new data is appended # instead of overwriting the previous cache. loader = EngineFromNetwork( network_loader, CreateConfig(load_timing_cache=cache.name), save_timing_cache=cache.name if path_mode else cache, ) with loader(): pass if not path_mode: cache.seek(0) assert not total_cache.read() build_engine("const_foldable", total_cache) const_foldable_cache_size = get_file_size(total_cache.name) # Build this network twice. Once with a fresh cache so we can determine its size. assert get_file_size(identity_cache.name) == 0 build_engine("identity", identity_cache) identity_cache_size = get_file_size(identity_cache.name) build_engine("identity", total_cache) total_cache_size = get_file_size(total_cache.name) # The total cache should be larger than either of the individual caches. assert total_cache_size > const_foldable_cache_size and total_cache_size > identity_cache_size # The total cache should also be smaller than or equal to the sum of the individual caches since # header information should not be duplicated. assert total_cache_size <= (const_foldable_cache_size + identity_cache_size)
class TestConfigLoader(object): def test_defaults(self, identity_builder_network): builder, network = identity_builder_network loader = CreateConfig() assert loader.timing_cache_path is None with loader(builder, network) as config: assert config.max_workspace_size == 1 << 24 with contextlib.suppress(AttributeError): assert not config.get_flag(trt.BuilderFlag.TF32) with contextlib.suppress(AttributeError): assert not config.get_flag(trt.BuilderFlag.SPARSE_WEIGHTS) assert not config.get_flag(trt.BuilderFlag.FP16) assert not config.get_flag(trt.BuilderFlag.INT8) assert config.num_optimization_profiles == 1 assert config.int8_calibrator is None with contextlib.suppress(AttributeError): if mod.version(trt.__version__) < mod.version("8.0"): assert config.get_tactic_sources() == 3 else: assert config.get_tactic_sources() == 7 def test_workspace_size(self, identity_builder_network): builder, network = identity_builder_network loader = CreateConfig(max_workspace_size=0) with loader(builder, network) as config: assert config.max_workspace_size == 0 @pytest.mark.parametrize("flag", [True, False]) def test_strict_types(self, identity_builder_network, flag): builder, network = identity_builder_network loader = CreateConfig(strict_types=flag) with loader(builder, network) as config: assert config.get_flag(trt.BuilderFlag.STRICT_TYPES) == flag @pytest.mark.skipif(mod.version(trt.__version__) < mod.version("8.0.0.0"), reason="API was added in TRT 8.0") @pytest.mark.parametrize("flag", [True, False]) def test_restricted(self, identity_builder_network, flag): builder, network = identity_builder_network loader = CreateConfig(restricted=flag) with loader(builder, network) as config: assert config.get_flag(trt.BuilderFlag.SAFETY_SCOPE) == flag @pytest.mark.skipif(mod.version(trt.__version__) < mod.version("7.1.0.0"), reason="API was added in TRT 7.1") @pytest.mark.parametrize("flag", [True, False]) def test_tf32(self, identity_builder_network, flag): builder, network = identity_builder_network loader = CreateConfig(tf32=flag) with loader(builder, network) as config: assert config.get_flag(trt.BuilderFlag.TF32) == flag @pytest.mark.parametrize("flag", [True, False]) def test_fp16(self, identity_builder_network, flag): builder, network = identity_builder_network loader = CreateConfig(fp16=flag) with loader(builder, network) as config: assert config.get_flag(trt.BuilderFlag.FP16) == flag @pytest.mark.parametrize("flag", [True, False]) def test_int8(self, identity_builder_network, flag): builder, network = identity_builder_network loader = CreateConfig(int8=flag) with loader(builder, network) as config: assert config.get_flag(trt.BuilderFlag.INT8) == flag @pytest.mark.skipif(mod.version(trt.__version__) < mod.version("8.0"), reason="API was not available in 7.2 and older") @pytest.mark.parametrize("flag", [True, False]) def test_sparse_weights(self, identity_builder_network, flag): builder, network = identity_builder_network loader = CreateConfig(sparse_weights=flag) with loader(builder, network) as config: assert config.get_flag(trt.BuilderFlag.SPARSE_WEIGHTS) == flag with contextlib.suppress(AttributeError): if mod.version(trt.__version__) < mod.version("8.0"): TACTIC_SOURCES_CASES = [ (None, 3), # By default, all sources are enabled. ([], 0), ([trt.TacticSource.CUBLAS], 1), ([trt.TacticSource.CUBLAS_LT], 2), ([trt.TacticSource.CUBLAS, trt.TacticSource.CUBLAS_LT], 3), ] else: TACTIC_SOURCES_CASES = [ (None, 7), # By default, all sources are enabled. ([], 0), ([trt.TacticSource.CUBLAS], 1), ([trt.TacticSource.CUBLAS_LT], 2), ([trt.TacticSource.CUDNN], 4), ([trt.TacticSource.CUBLAS, trt.TacticSource.CUBLAS_LT], 3), ([trt.TacticSource.CUBLAS, trt.TacticSource.CUDNN], 5), ([trt.TacticSource.CUBLAS_LT, trt.TacticSource.CUDNN], 6), ([ trt.TacticSource.CUDNN, trt.TacticSource.CUBLAS, trt.TacticSource.CUBLAS_LT ], 7), ] @pytest.mark.parametrize("sources, expected", TACTIC_SOURCES_CASES) def test_tactic_sources(self, identity_builder_network, sources, expected): builder, network = identity_builder_network loader = CreateConfig(tactic_sources=sources) with loader(builder, network) as config: assert config.get_tactic_sources() == expected def test_calibrator_metadata_set(self, identity_builder_network): builder, network = identity_builder_network calibrator = Calibrator(DataLoader()) loader = CreateConfig(int8=True, calibrator=calibrator) with loader(builder, network) as config: assert config.int8_calibrator assert "x" in calibrator.data_loader.input_metadata def test_multiple_profiles(self, identity_builder_network): builder, network = identity_builder_network profiles = [ Profile().add("x", (1, 2, 1, 1), (1, 2, 2, 2), (1, 2, 4, 4)), Profile().add("x", (1, 2, 4, 4), (1, 2, 8, 8), (1, 2, 16, 16)), ] loader = CreateConfig(profiles=profiles) with loader(builder, network) as config: assert config.num_optimization_profiles == 2 @pytest.mark.skipif(mod.version(trt.__version__) < mod.version("8.0"), reason="Unsupported for TRT 7.2 and older") @pytest.mark.parametrize("path_mode", [True, False], ids=["path", "file-like"]) def test_timing_cache(self, identity_builder_network, path_mode): builder, network = identity_builder_network with tempfile.NamedTemporaryFile() as cache: loader = CreateConfig( load_timing_cache=cache.name if path_mode else cache) with loader(builder, network) as config: assert config.get_timing_cache() @pytest.mark.skipif(mod.version(trt.__version__) < mod.version("8.0"), reason="Unsupported for TRT 7.2 and older") def test_empty_timing_cache_when_default(self, identity_builder_network): builder, network = identity_builder_network loader = CreateConfig() with loader(builder, network) as config: cache = config.get_timing_cache() with cache.serialize() as buffer: cache_size = len(bytes(buffer)) cache.reset() with cache.serialize() as buffer: new_cache_size = len(bytes(buffer)) assert cache_size == new_cache_size
builder, network, parser = network_from_onnx_bytes( ONNX_MODELS["identity"].loader) with builder, network, parser: assert not network.has_implicit_batch_dimension assert not network.has_explicit_precision def test_loader_explicit_precision(self): builder, network, parser = network_from_onnx_bytes( ONNX_MODELS["identity"].loader, explicit_precision=True) with builder, network, parser: assert not network.has_implicit_batch_dimension if mod.version(trt.__version__) < mod.version("8.0"): assert network.has_explicit_precision @pytest.mark.skipif(mod.version(trt.__version__) < mod.version("7.1.0.0"), reason="API was added in TRT 7.1") class TestNetworkFromOnnxPath(object): def test_loader(self): builder, network, parser = network_from_onnx_path( ONNX_MODELS["identity"].path) with builder, network, parser: assert not network.has_implicit_batch_dimension assert not network.has_explicit_precision def test_loader_explicit_precision(self): builder, network, parser = network_from_onnx_path( ONNX_MODELS["identity"].path, explicit_precision=True) with builder, network, parser: assert not network.has_implicit_batch_dimension if mod.version(trt.__version__) < mod.version("8.0"):
def infer(self, feed_dict, check_inputs=None): # Disable checks by default on TRT 6.0 due to implicit batch semantics. if mod.version(trt.__version__) < mod.version("7.0"): return super().infer(feed_dict, util.default(check_inputs, False)) return super().infer(feed_dict, util.default(check_inputs, True))
class TestTrt(object): def test_basic(self): run_polygraphy_run([ONNX_MODELS["identity"].path, "--trt"]) def test_plugins(self): run_polygraphy_run([ ONNX_MODELS["identity"].path, "--trt", "--plugins", "libnvinfer_plugin.so" ]) def test_custom_outputs(self): run_polygraphy_run([ ONNX_MODELS["identity_identity"].path, "--trt", "--trt-outputs", "identity_out_0" ]) def test_layerwise_outputs(self): with tempfile.NamedTemporaryFile() as outfile0: run_polygraphy_run([ ONNX_MODELS["identity_identity"].path, "--trt", "--trt-outputs", "mark", "all", "--save-outputs", outfile0.name, ]) results = load_json(outfile0.name) [result] = list(results.values())[0] assert len(result) == 2 assert "identity_out_0" in result assert "identity_out_2" in result def test_exclude_outputs_with_layerwise(self): with tempfile.NamedTemporaryFile() as outfile0: run_polygraphy_run([ ONNX_MODELS["identity_identity"].path, "--trt", "--trt-outputs", "mark", "all", "--trt-exclude-outputs", "identity_out_2", "--save-outputs", outfile0.name, ]) results = load_json(outfile0.name) [result] = list(results.values())[0] assert len(result) == 1 assert "identity_out_0" in result def test_int8(self): run_polygraphy_run([ONNX_MODELS["identity"].path, "--trt", "--int8"]) @pytest.mark.skipif(mod.version(trt.__version__) < mod.version("8.0"), reason="API was added after TRT 7.2") def test_sparse_weights(self): run_polygraphy_run( [ONNX_MODELS["identity"].path, "--trt", "--sparse-weights"]) @pytest.mark.skipif(mod.version(trt.__version__) < mod.version("7.0"), reason="Unsupported for TRT 6") def test_input_shape(self): run_polygraphy_run([ ONNX_MODELS["dynamic_identity"].path, "--trt", "--onnxrt", "--input-shapes", "X:[1,2,4,4]" ]) @pytest.mark.skipif(mod.version(trt.__version__) < mod.version("7.0"), reason="Unsupported for TRT 6") def test_dynamic_input_shape(self): run_polygraphy_run([ ONNX_MODELS["dynamic_identity"].path, "--trt", "--onnxrt", "--input-shapes", "X:[1,2,-1,4]" ]) @pytest.mark.skipif(mod.version(trt.__version__) < mod.version("7.0"), reason="Unsupported for TRT 6") def test_dynamic_input_shape(self): run_polygraphy_run([ ONNX_MODELS["dynamic_identity"].path, "--trt", "--onnxrt", "--input-shapes", "X,1x2x-1x4" ]) @pytest.mark.skipif(mod.version(trt.__version__) < mod.version("7.0"), reason="Unsupported for TRT 6") def test_explicit_profile(self): run_polygraphy_run([ ONNX_MODELS["dynamic_identity"].path, "--trt", "--onnxrt", "--input-shapes", "X:[1,2,1,1]", "--trt-min-shapes", "X:[1,2,1,1]", "--trt-opt-shapes", "X:[1,2,1,1]", "--trt-max-shapes", "X:[1,2,1,1]", ]) @pytest.mark.skipif(mod.version(trt.__version__) < mod.version("7.0"), reason="Unsupported for TRT 6") def test_explicit_profile_implicit_runtime_shape(self): run_polygraphy_run([ ONNX_MODELS["dynamic_identity"].path, "--trt", "--onnxrt", "--trt-min-shapes", "X:[1,2,1,1]", "--trt-opt-shapes", "X:[1,2,1,1]", "--trt-max-shapes", "X:[1,2,1,1]", ]) @pytest.mark.skipif(mod.version(trt.__version__) < mod.version("7.0"), reason="Unsupported for TRT 6") def test_explicit_profile_opt_runtime_shapes_differ(self): run_polygraphy_run([ ONNX_MODELS["dynamic_identity"].path, "--trt", "--onnxrt", "--input-shapes", "X:[1,2,2,2]", "--trt-min-shapes", "X:[1,2,1,1]", "--trt-opt-shapes", "X:[1,2,3,3]", "--trt-max-shapes", "X:[1,2,4,4]", ]) @pytest.mark.skipif(mod.version(trt.__version__) < mod.version("7.0"), reason="Unsupported for TRT 6") def test_multiple_profiles(self): run_polygraphy_run([ ONNX_MODELS["dynamic_identity"].path, "--trt", "--onnxrt", "--trt-min-shapes", "X:[1,2,1,1]", "--trt-opt-shapes", "X:[1,2,1,1]", "--trt-max-shapes", "X:[1,2,1,1]", "--trt-min-shapes", "X:[1,2,4,4]", "--trt-opt-shapes", "X:[1,2,4,4]", "--trt-max-shapes", "X:[1,2,4,4]", ]) def test_int8_calibration_cache(self): with tempfile.NamedTemporaryFile() as outpath: cmd = [ ONNX_MODELS["identity"].path, "--trt", "--int8", "--calibration-cache", outpath.name ] if mod.version(trt.__version__) >= mod.version("7.0"): cmd += ["--onnxrt"] run_polygraphy_run(cmd) assert is_file_non_empty(outpath.name) @pytest.mark.skipif(mod.version(trt.__version__) < mod.version("7.0"), reason="Unsupported for TRT 6") @pytest.mark.parametrize( "base_class", ["IInt8LegacyCalibrator", "IInt8EntropyCalibrator2"]) def test_int8_calibration_base_class(self, base_class): cmd = [ ONNX_MODELS["identity"].path, "--trt", "--int8", "--calibration-base-class", base_class ] if mod.version(trt.__version__) >= mod.version("7.0"): cmd += ["--onnxrt"] run_polygraphy_run() @pytest.mark.skipif(mod.version(trt.__version__) < mod.version("8.0"), reason="Unsupported for TRT 7.2 and older") def test_timing_cache(self): with tempfile.TemporaryDirectory() as dir: # Test with files that haven't already been created instead of using NamedTemporaryFile(). total_cache = os.path.join(dir, "total.cache") identity_cache = os.path.join(dir, "identity.cache") run_polygraphy_run([ ONNX_MODELS["const_foldable"].path, "--trt", "--timing-cache", total_cache ]) assert is_file_non_empty(total_cache) const_foldable_cache_size = get_file_size(total_cache) run_polygraphy_run([ ONNX_MODELS["identity"].path, "--trt", "--timing-cache", identity_cache ]) identity_cache_size = get_file_size(identity_cache) run_polygraphy_run([ ONNX_MODELS["identity"].path, "--trt", "--timing-cache", total_cache ]) total_cache_size = get_file_size(total_cache) # The total cache should be larger than either of the individual caches. assert total_cache_size > const_foldable_cache_size and total_cache_size > identity_cache_size # The total cache should also be smaller than or equal to the sum of the individual caches since # header information should not be duplicated. assert total_cache_size <= (const_foldable_cache_size + identity_cache_size) def test_save_load_engine(self): with tempfile.NamedTemporaryFile() as outpath: run_polygraphy_run([ ONNX_MODELS["identity"].path, "--trt", "--save-engine", outpath.name ]) assert is_file_non_empty(outpath.name) run_polygraphy_run(["--trt", outpath.name, "--model-type=engine"]) @pytest.mark.skipif(mod.version(trt.__version__) < mod.version("8.0"), reason="Unsupported for TRT 7.2 and older") def test_tactic_replay(self): with tempfile.NamedTemporaryFile() as tactic_replay: run_polygraphy_run([ ONNX_MODELS["identity"].path, "--trt", "--save-tactics", tactic_replay.name ]) assert is_file_non_empty(tactic_replay.name) run_polygraphy_run([ ONNX_MODELS["identity"].path, "--trt", "--load-tactics", tactic_replay.name ]) @pytest.mark.skipif(mod.version(trt.__version__) < mod.version("7.2"), reason="Unsupported before TRT 7.2") def test_tactic_sources(self): run_polygraphy_run([ ONNX_MODELS["identity"].path, "--trt", "--tactic-sources", "CUBLAS", "CUBLAS_LT" ]) def test_data_loader_script_calibration(self): with tempfile.NamedTemporaryFile("w+", suffix=".py") as f: f.write( dedent(""" import numpy as np def load_data(): for _ in range(5): yield {"x": np.ones((1, 1, 2, 2), dtype=np.float32) * 6.4341} """)) f.flush() run_polygraphy_run([ ONNX_MODELS["identity"].path, "--trt", "--int8", "--data-loader-script", f.name ])
class TestOther(object): def test_0_iterations(self): run_polygraphy_run( [ONNX_MODELS["identity"].path, "--onnxrt", "--iterations=0"]) def test_subprocess_sanity(self): run_polygraphy_run( [ONNX_MODELS["identity"].path, "--onnxrt", "--use-subprocess"]) def test_custom_tolerance(self): run_polygraphy_run([ ONNX_MODELS["identity"].path, "--onnxrt", "--onnxrt", "--iterations=0", "--atol=1.0", "--rtol=1.0" ]) def test_custom_per_output_tolerance(self): run_polygraphy_run([ ONNX_MODELS["identity_identity"].path, "--onnxrt", "--onnxrt", "--onnx-outputs", "mark", "all", "--atol", "identity_out_0:1.0", "identity_out_2:3.0", "0.5", "--rtol", "identity_out_0:1.0", "identity_out_2:3.0", "0.5", ]) def test_custom_input_ranges(self): run_polygraphy_run([ ONNX_MODELS["identity_identity"].path, "--onnxrt", "--val-range", "X:[1.0,2.0]", "[0.5,1.5]" ]) def test_top_k(self): run_polygraphy_run( [ONNX_MODELS["identity"].path, "--onnxrt", "--top-k=5"]) @pytest.mark.parametrize("check_error_stat", ["max", "median", "mean"]) def test_check_error_stat(self, check_error_stat): run_polygraphy_run([ ONNX_MODELS["identity"].path, "--onnxrt", "--onnxrt", "--check-error-stat", check_error_stat ]) def test_save_load_outputs(self, tmp_path): OUTFILE0 = os.path.join(tmp_path, "outputs0.json") OUTFILE1 = os.path.join(tmp_path, "outputs1.json") run_polygraphy_run([ ONNX_MODELS["identity"].path, "--onnxrt", "--save-outputs", OUTFILE0 ]) run_polygraphy_run([ ONNX_MODELS["identity"].path, "--onnxrt", "--save-outputs", OUTFILE1 ]) status = run_polygraphy_run([ ONNX_MODELS["identity"].path, "--onnxrt", "--load-outputs", OUTFILE0, OUTFILE1 ]) assert ("Difference is within tolerance" in status.stdout + status.stderr ) # Make sure it actually compared stuff. # Should work with only one file status = run_polygraphy_run( [ONNX_MODELS["identity"].path, "--load-outputs", OUTFILE0]) assert ("Difference is within tolerance" not in status.stdout + status.stderr ) # Make sure it DIDN'T compare stuff. # Should work even with no runners specified status = run_polygraphy_run([ ONNX_MODELS["identity"].path, "--load-outputs", OUTFILE0, OUTFILE1 ]) assert ("Difference is within tolerance" in status.stdout + status.stderr ) # Make sure it actually compared stuff. # Should work even when comparing a single runner to itself. status = run_polygraphy_run([ ONNX_MODELS["identity"].path, "--load-outputs", OUTFILE0, OUTFILE0 ]) assert ("Difference is within tolerance" in status.stdout + status.stderr ) # Make sure it actually compared stuff. def test_save_load_inputs(self): with tempfile.NamedTemporaryFile( ) as infile0, tempfile.NamedTemporaryFile() as infile1: run_polygraphy_run([ ONNX_MODELS["identity"].path, "--onnxrt", "--save-input-data", infile0.name ]) run_polygraphy_run([ ONNX_MODELS["identity"].path, "--onnxrt", "--load-input-data", infile0.name, "--save-input-data", infile1.name, ]) # Copy run_polygraphy_run([ ONNX_MODELS["identity"].path, "--onnxrt", "--load-input-data", infile0.name, infile1.name ]) @pytest.mark.skipif(mod.version(trt.__version__) < mod.version("7.0"), reason="Unsupported for TRT 6") def test_runner_coexistence(self): run_polygraphy_run([ TF_MODELS["identity"].path, "--model-type=frozen", "--tf", "--onnxrt", "--trt" ])