def test_cannot_use_calibrator_without_activation(self): def generate_data(): for item in [np.ones((1, 1, 2, 2), dtype=np.float32)]: yield {"x": item} calibrator = Calibrator(generate_data()) assert calibrator.get_batch(["x"]) is None
def test_calibrator_caches_without_explicit_cache(self, identity_builder_network): builder, network = identity_builder_network data = [{"x": np.ones((1, 1, 2, 2), dtype=np.float32)}] calibrator = Calibrator(data) # First, populate the cache create_config = CreateConfig(int8=True, calibrator=calibrator) with EngineFromNetwork((builder, network), create_config)(): pass # Check that the internal cache is populated assert calibrator.read_calibration_cache()
def test_calibrator_rechecks_cache_on_reset(self, identity_builder_network): builder, network = identity_builder_network data = [{"x": np.ones((1, 1, 2, 2), dtype=np.float32)}] with tempfile.NamedTemporaryFile(mode="wb+") as cache: calibrator = Calibrator(data, cache=cache.name) # First, populate the cache create_config = CreateConfig(int8=True, calibrator=calibrator) with EngineFromNetwork((builder, network), create_config)(): pass # Ensure that now the calibrator will read from the cache when reset calibrator.reset() assert not calibrator.has_cached_scales assert len(calibrator.read_calibration_cache()) == os.stat(cache.name).st_size
def test_calibrator_device_buffers_multiinput(self, multi_input_builder_network, mode): def generate_dev_data(num_batches): with cuda.DeviceArray(shape=(1, ), dtype=np.float32) as x: for _ in range(num_batches): x.copy_from(np.ones((1, ), dtype=np.float32)) xdata = { "array": x, "view": cuda.DeviceView(x.ptr, x.shape, x.dtype), "pointer": x.ptr }[mode] yield { "X0": xdata, "Y0": np.zeros((1, ), dtype=np.float32) } builder, network = multi_input_builder_network NUM_BATCHES = 2 calibrator = Calibrator(generate_dev_data(NUM_BATCHES)) create_config = CreateConfig(int8=True, calibrator=calibrator) with engine_from_network((builder, network), create_config): assert calibrator.num_batches == NUM_BATCHES self.check_calibrator_cleanup(calibrator)
def test_host_data_copied_to_device(self): with Calibrator(generate_data(1)) as calibrator: [ptr] = calibrator.get_batch(names=["x"]) v = cuda.DeviceView(ptr, shape=(1, 1, 2, 2), dtype=np.float32) arr = v.numpy() assert arr.shape == (1, 1, 2, 2) assert np.all(arr == 1)
def main(): # We can provide a path or file-like object if we want to cache calibration data. # This lets us avoid running calibration the next time we build the engine. # # TIP: You can use this calibrator with TensorRT APIs directly (e.g. config.int8_calibrator). # You don't have to use it with Polygraphy loaders if you don't want to. calibrator = Calibrator(data_loader=calib_data(), cache="identity-calib.cache") # We must enable int8 mode in addition to providing the calibrator. build_engine = EngineFromNetwork(NetworkFromOnnxPath("identity.onnx"), config=CreateConfig( int8=True, calibrator=calibrator)) # When we activate our runner, it will calibrate and build the engine. If we want to # see the logging output from TensorRT, we can temporarily increase logging verbosity: with G_LOGGER.verbosity( G_LOGGER.VERBOSE), TrtRunner(build_engine) as runner: # Finally, we can test out our int8 TensorRT engine with some dummy input data: inp_data = np.ones(shape=(1, 1, 2, 2), dtype=np.float32) # NOTE: The runner owns the output buffers and is free to reuse them between `infer()` calls. # Thus, if you want to store results from multiple inferences, you should use `copy.deepcopy()`. outputs = runner.infer({"x": inp_data}) assert np.array_equal(outputs["y"], inp_data) # It's an identity model!
def test_calibrator_metadata_set(self, identity_builder_network): builder, network = identity_builder_network calibrator = Calibrator(DataLoader()) loader = CreateConfig(int8=True, calibrator=calibrator) with loader(builder, network) as config: assert config.int8_calibrator assert "x" in calibrator.data_loader.input_metadata
def test_calibrator_with_path_name_cache(self, identity_builder_network): builder, network = identity_builder_network data = [{"x": np.ones((1, 1, 2, 2), dtype=np.float32)}] with tempfile.NamedTemporaryFile() as cache: create_config = CreateConfig(int8=True, calibrator=Calibrator(data, cache=cache.name)) with EngineFromNetwork((builder, network), create_config)(): check_file_non_empty(cache.name)
def test_can_build_with_calibrator(self, identity_builder_network): builder, network = identity_builder_network calibrator = Calibrator(DataLoader()) create_config = CreateConfig(int8=True, calibrator=calibrator) loader = EngineFromNetwork((builder, network), create_config) with loader(): pass # Calibrator buffers should be freed after the build assert all([buf.allocated_nbytes == 0 for buf in calibrator.device_buffers.values()])
def test_calibrator_generator_data(self, identity_builder_network): builder, network = identity_builder_network NUM_BATCHES = 2 calibrator = Calibrator(generate_data(NUM_BATCHES)) create_config = CreateConfig(int8=True, calibrator=calibrator) with engine_from_network((builder, network), create_config): assert calibrator.num_batches == NUM_BATCHES self.check_calibrator_cleanup(calibrator)
def test_calibrator_iterable_data(self, identity_builder_network): builder, network = identity_builder_network NUM_BATCHES = 2 data = [{"x": np.ones((1, 1, 2, 2), dtype=np.float32)}] * NUM_BATCHES calibrator = Calibrator(data) create_config = CreateConfig(int8=True, calibrator=calibrator) loader = EngineFromNetwork((builder, network), create_config) with loader(): assert calibrator.num_batches == NUM_BATCHES
def test_calibrator_with_file_object_cache(self, identity_builder_network, mode): builder, network = identity_builder_network data = [{"x": np.ones((1, 1, 2, 2), dtype=np.float32)}] with tempfile.NamedTemporaryFile(mode=mode) as cache: calibrator = Calibrator(data, cache=cache) create_config = CreateConfig(int8=True, calibrator=calibrator) with engine_from_network((builder, network), create_config): if mode != "rb": check_file_non_empty(cache.name) self.check_calibrator_cleanup(calibrator)
def test_calibrator_generator_data(self, identity_builder_network): builder, network = identity_builder_network NUM_BATCHES = 2 def generate_data(): for item in [np.ones((1, 1, 2, 2), dtype=np.float32)] * NUM_BATCHES: yield {"x": item} calibrator = Calibrator(generate_data()) create_config = CreateConfig(int8=True, calibrator=calibrator) loader = EngineFromNetwork((builder, network), create_config) with loader(): assert calibrator.num_batches == NUM_BATCHES
def test_calibrator_outside_polygraphy(self, identity_builder_network): builder, network = identity_builder_network NUM_BATCHES = 2 def generate_data(): for item in [np.ones((1, 1, 2, 2), dtype=np.float32)] * NUM_BATCHES: yield {"x": item} calibrator = Calibrator(generate_data()) config = builder.create_builder_config() config.set_flag(trt.BuilderFlag.INT8) config.int8_calibrator = calibrator with builder.build_engine(network, config) as engine: assert engine
def test_calibrator_invalid_input_fails(self, identity_builder_network, names): builder, network = identity_builder_network data = [{ name: np.ones((1, 1, 2, 2), dtype=np.float32) for name in names }] calibrator = Calibrator(data) create_config = CreateConfig(int8=True, calibrator=calibrator) with pytest.raises(PolygraphyException): with engine_from_network((builder, network), create_config): pass
def test_calibrator_basic(self, identity_builder_network, BaseClass): if mod.version(trt.__version__) < mod.version( "7.0") and BaseClass == trt.IInt8LegacyCalibrator: pytest.skip("Bug in TRT 6 causes NaNs with legacy calibrator") builder, network = identity_builder_network NUM_BATCHES = 2 data = [{"x": np.ones((1, 1, 2, 2), dtype=np.float32)}] * NUM_BATCHES calibrator = Calibrator(data, BaseClass=BaseClass) create_config = CreateConfig(int8=True, calibrator=calibrator) with engine_from_network((builder, network), create_config): assert calibrator.num_batches == NUM_BATCHES self.check_calibrator_cleanup(calibrator)
def test_calibrator_outside_polygraphy(self, identity_builder_network): builder, network = identity_builder_network NUM_BATCHES = 2 config = builder.create_builder_config() config.set_flag(trt.BuilderFlag.INT8) with Calibrator(generate_data(NUM_BATCHES)) as calibrator: config.int8_calibrator = calibrator if mod.version(trt.__version__) < mod.version("8.0"): engine = builder.build_engine(network, config) else: with trt.Runtime(get_trt_logger()) as runtime: engine = runtime.deserialize_cuda_engine( builder.build_serialized_network(network, config)) with engine: assert engine self.check_calibrator_cleanup(calibrator)
def test_calibrator_data_and_ordering_correct(self): def generate_multidata(num_batches): for _ in range(num_batches): yield { "x0": np.zeros((4, 5), dtype=np.float32), "x1": cuda.DeviceArray(dtype=np.float32).copy_from( np.ones((4, 5), dtype=np.float32)), "x2": cuda.DeviceArray(dtype=np.float32).copy_from( np.ones((4, 5), dtype=np.float32) * 2).ptr, } NUM_BATCHES = 2 with Calibrator(generate_multidata(NUM_BATCHES)) as calibrator: for _ in range(NUM_BATCHES): ptrs = calibrator.get_batch(names=["x0", "x1", "x2"]) for index, ptr in enumerate(ptrs): v = cuda.DeviceView(ptr, shape=(4, 5), dtype=np.float32) assert np.all(v.numpy() == index)
def test_calibrator_outside_polygraphy(self, identity_builder_network): builder, network = identity_builder_network NUM_BATCHES = 2 def generate_data(): for item in [np.ones( (1, 1, 2, 2), dtype=np.float32)] * NUM_BATCHES: yield {"x": item} calibrator = Calibrator(generate_data()) config = builder.create_builder_config() config.set_flag(trt.BuilderFlag.INT8) config.int8_calibrator = calibrator if misc.version(trt.__version__) < misc.version("7.3"): engine = builder.build_engine(network, config) else: engine = func.invoke( EngineFromBytes( builder.build_serialized_network(network, config))) with engine: assert engine
""" from polygraphy.backend.trt import NetworkFromOnnxPath, CreateConfig, EngineFromNetwork, Calibrator, TrtRunner from polygraphy.logger import G_LOGGER import numpy as np import os MODEL = os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir, "models", "identity.onnx") INPUT_SHAPE = (1, 1, 2, 2) # The data loader argument to Calibrator can be any iterable or generator that yields `feed_dict`s. # A feed_dict is just a mapping of input names to corresponding inputs (as NumPy arrays). # Calibration will continue until our data loader runs out of data (4 batches in this example). def calib_data(): for _ in range(4): yield {"x": np.ones(shape=INPUT_SHAPE, dtype=np.float32)} # Totally real data # We can provide a path or file-like object if we want to cache calibration data. # This lets us avoid running calibration the next time we build the engine. calibrator = Calibrator(data_loader=calib_data(), cache="identity-calib.cache") build_engine = EngineFromNetwork(NetworkFromOnnxPath(MODEL), config=CreateConfig(int8=True, calibrator=calibrator)) # When we activate our runner, it will calibrate and build the engine. If we want to # see the logging output from TensorRT, we can temporarily increase logging verbosity: with G_LOGGER.verbosity(G_LOGGER.VERBOSE): with TrtRunner(build_engine) as runner: feed_dict = {"x": np.ones(shape=INPUT_SHAPE, dtype=np.float32)} outputs = runner.infer(feed_dict=feed_dict) assert np.all(outputs["y"] == feed_dict["x"])