def test_timing_cache(self): with tempfile.TemporaryDirectory() as dir: # Test with files that haven't already been created instead of using NamedTemporaryFile(). total_cache = os.path.join(dir, "total.cache") identity_cache = os.path.join(dir, "identity.cache") run_polygraphy_run([ ONNX_MODELS["const_foldable"].path, "--trt", "--timing-cache", total_cache ]) assert is_file_non_empty(total_cache) const_foldable_cache_size = get_file_size(total_cache) run_polygraphy_run([ ONNX_MODELS["identity"].path, "--trt", "--timing-cache", identity_cache ]) identity_cache_size = get_file_size(identity_cache) run_polygraphy_run([ ONNX_MODELS["identity"].path, "--trt", "--timing-cache", total_cache ]) total_cache_size = get_file_size(total_cache) # The total cache should be larger than either of the individual caches. assert total_cache_size > const_foldable_cache_size and total_cache_size > identity_cache_size # The total cache should also be smaller than or equal to the sum of the individual caches since # header information should not be duplicated. assert total_cache_size <= (const_foldable_cache_size + identity_cache_size)
def test_timing_cache_generate_and_append(self, path_mode): with tempfile.NamedTemporaryFile( ) as total_cache, tempfile.NamedTemporaryFile() as identity_cache: def build_engine(model, cache): if not path_mode: cache.seek(0) network_loader = NetworkFromOnnxBytes( ONNX_MODELS[model].loader) # In non-path_mode, use the file-like object directly. # Must load the cache with CreateConfig so that new data is appended # instead of overwriting the previous cache. loader = EngineFromNetwork( network_loader, CreateConfig(load_timing_cache=cache.name), save_timing_cache=cache.name if path_mode else cache, ) with loader(): pass if not path_mode: cache.seek(0) assert not total_cache.read() build_engine("const_foldable", total_cache) const_foldable_cache_size = get_file_size(total_cache.name) # Build this network twice. Once with a fresh cache so we can determine its size. assert get_file_size(identity_cache.name) == 0 build_engine("identity", identity_cache) identity_cache_size = get_file_size(identity_cache.name) build_engine("identity", total_cache) total_cache_size = get_file_size(total_cache.name) # The total cache should be larger than either of the individual caches. assert total_cache_size > const_foldable_cache_size and total_cache_size > identity_cache_size # The total cache should also be smaller than or equal to the sum of the individual caches since # header information should not be duplicated. assert total_cache_size <= (const_foldable_cache_size + identity_cache_size)
def test_calibrator_rechecks_cache_on_reset(self, identity_builder_network): builder, network = identity_builder_network data = [{"x": np.ones((1, 1, 2, 2), dtype=np.float32)}] with tempfile.NamedTemporaryFile(mode="wb+") as cache: calibrator = Calibrator(data, cache=cache.name) # First, populate the cache create_config = CreateConfig(int8=True, calibrator=calibrator) with engine_from_network((builder, network), create_config): pass # Ensure that now the calibrator will read from the cache when reset calibrator.reset() assert not calibrator.has_cached_scales assert len(calibrator.read_calibration_cache()) == get_file_size( cache.name) self.check_calibrator_cleanup(calibrator)