def mirror_dirs(): """Setup for mirror tests: - a temporary cache dir, - temporary settings, - two temporary directories """ with temp_directory() as cachedir: with settings.temporary(): settings.set("cache-directory", cachedir) with temp_directory() as mirrordir: with temp_directory() as mirrordir2: _reset_mirrors(use_env_var=False) yield mirrordir, mirrordir2
def test_multi_graph_2(): with temp_directory() as tmpdir: os.mkdir(os.path.join(tmpdir, "a1")) a11 = load_source("dummy-source", kind="grib", date=20000101) a11.save(os.path.join(tmpdir, "a1", "a11.grib")) a12 = load_source("dummy-source", kind="grib", date=20000102) a12.save(os.path.join(tmpdir, "a1", "a12.grib")) os.mkdir(os.path.join(tmpdir, "b1")) b11 = load_source("dummy-source", kind="grib", date=20000103) b11.save(os.path.join(tmpdir, "b1", "b11.grib")) b12 = load_source("dummy-source", kind="grib", date=20000104) b12.save(os.path.join(tmpdir, "b1", "b12.grib")) os.mkdir(os.path.join(tmpdir, "a2")) a21 = load_source("dummy-source", kind="grib", date=20000105) a21.save(os.path.join(tmpdir, "a2", "a21.grib")) a22 = load_source("dummy-source", kind="grib", date=20000106) a22.save(os.path.join(tmpdir, "a2", "a22.grib")) os.mkdir(os.path.join(tmpdir, "b2")) b21 = load_source("dummy-source", kind="grib", date=20000107) b21.save(os.path.join(tmpdir, "b2", "b21.grib")) b22 = load_source("dummy-source", kind="grib", date=20000108) b22.save(os.path.join(tmpdir, "b2", "b22.grib")) def filter(path_or_url): return path_or_url.endswith("2.grib") ds = load_source("file", tmpdir, filter=filter) ds.graph() assert len(ds) == 4
def test_unpack_zip(): TEST_URL = "https://get.ecmwf.int/test-data/climetlab/input/grib.zip" # Make sure we fail if not cached with pytest.raises(OfflineError), network_off(): ds = load_source("url", f"{TEST_URL}?time={time.time()}") with temp_directory() as tmpdir: with settings.temporary("cache-directory", tmpdir): ds = load_source("url", TEST_URL) assert len(ds) == 6, len(ds) with network_off(): # Check cache is used ds = load_source("url", TEST_URL) assert len(ds) == 6, len(ds) with pytest.raises(OfflineError), network_off(): # check force ds = load_source("url", TEST_URL, force=True) assert len(ds) == 6, len(ds) ds = load_source("url", TEST_URL, force=True) assert len(ds) == 6, len(ds) with network_off(): ds = load_source("url", TEST_URL) assert len(ds) == 6, len(ds) # Again ds = load_source("url", TEST_URL) assert len(ds) == 6, len(ds)
def test_glob(): s = load_source("file", climetlab_file("docs/examples/test.grib")) with temp_directory() as tmpdir: s.save(os.path.join(tmpdir, "a.grib")) s.save(os.path.join(tmpdir, "b.grib")) s = load_source("file", os.path.join(tmpdir, "*.grib")) assert len(s) == 4, len(s) s = load_source("file", tmpdir) assert len(s) == 4, len(s)
def large_multi_1(b, func): with temp_directory() as tmpdir: ilist = list(range(200)) pattern = os.path.join(tmpdir, "test-{i}.nc") for i in ilist: source = load_source( "dummy-source", kind="netcdf", dims=["lat", "lon", "time"], coord_values=dict(time=[i + 0.0, i + 0.5]), ) filename = pattern.format(i=i) source.save(filename) return b(func, pattern, ilist)
def test_cache_2(): with temp_directory() as tmpdir: with settings.temporary(): settings.set("cache-directory", tmpdir) settings.set("maximum-cache-size", "5MB") settings.set("number-of-download-threads", 5) assert cache_size() == 0 load_source( "url-pattern", "https://get.ecmwf.int/test-data/climetlab/1mb-{n}.bin", { "n": [0, 1, 2, 3, 4], }, ) assert cache_size() == 5 * 1024 * 1024, cache_size() cnt = 0 for i, f in enumerate(cache_entries()): print("FILE", i, f) cnt += 1 assert cnt == 5, f"Files in cache database: {cnt}" load_source( "url-pattern", "https://get.ecmwf.int/test-data/climetlab/1mb-{n}.bin", { "n": [5, 6, 7, 8, 9], }, ) assert cache_size( ) == 5 * 1024 * 1024, cache_size() / 1024.0 / 1024.0 cnt = 0 for i, f in enumerate(cache_entries()): print("FILE", i, f) cnt += 1 assert cnt == 5, f"Files in cache database: {cnt}" cnt = 0 for n in os.listdir(tmpdir): if n.startswith("cache-") and n.endswith(".db"): continue cnt += 1 assert cnt == 5, f"Files in cache directory: {cnt}"
def test_multi_directory_1(): with temp_directory() as directory: for date in (20000101, 20000102): ds = load_source("dummy-source", kind="grib", date=date) ds.save(os.path.join(directory, f"{date}.grib")) ds = load_source("file", directory) print(ds) assert len(ds) == 2 ds.graph() with temp_file() as filename: ds.save(filename) ds = load_source("file", filename) assert len(ds) == 2
def test_url_source_check_out_of_date(): def load(): load_source( "url", "http://download.ecmwf.int/test-data/metview/gallery/temp.bufr", ) with temp_directory() as tmpdir: with settings.temporary(): settings.set("cache-directory", tmpdir) load() settings.set("check-out-of-date-urls", False) with network_off(): load()
def generate_zip(target, sources=None, names=None, directory=None, **kwargs): if sources is not None: assert directory is None assert names is not None assert len(sources) == len(names) with temp_directory() as tmpdir: for s, n in zip(sources, names): s.save(os.path.join(tmpdir, n)) zip_dir(target, tmpdir) return if directory is not None: assert sources is None assert names is None zip_dir(target, directory) return assert False
def test_numbers(): with temp_directory() as tmpdir: with settings.temporary("cache-directory", tmpdir): settings.set("url-download-timeout", 30) assert settings.get("url-download-timeout") == 30 settings.set("url-download-timeout", "30") assert settings.get("url-download-timeout") == 30 settings.set("url-download-timeout", "30s") assert settings.get("url-download-timeout") == 30 settings.set("url-download-timeout", "2m") assert settings.get("url-download-timeout") == 120 settings.set("url-download-timeout", "10h") assert settings.get("url-download-timeout") == 36000 settings.set("url-download-timeout", "7d") assert settings.get("url-download-timeout") == 7 * 24 * 3600 with pytest.raises(ValueError): settings.set("url-download-timeout", "1x") settings.set("maximum-cache-size", "1") assert settings.get("maximum-cache-size") == 1 settings.set("maximum-cache-size", "1k") assert settings.get("maximum-cache-size") == 1024 settings.set("maximum-cache-size", "1kb") assert settings.get("maximum-cache-size") == 1024 settings.set("maximum-cache-size", "1k") assert settings.get("maximum-cache-size") == 1024 settings.set("maximum-cache-size", "1kb") assert settings.get("maximum-cache-size") == 1024 settings.set("maximum-cache-size", "1K") assert settings.get("maximum-cache-size") == 1024 settings.set("maximum-cache-size", "1M") assert settings.get("maximum-cache-size") == 1024 * 1024 settings.set("maximum-cache-size", "1G") assert settings.get("maximum-cache-size") == 1024 * 1024 * 1024 settings.set("maximum-cache-size", "1T") assert settings.get("maximum-cache-size") == 1024 * 1024 * 1024 * 1024 settings.set("maximum-cache-size", "1P") assert ( settings.get("maximum-cache-size") == 1024 * 1024 * 1024 * 1024 * 1024 ) settings.set("maximum-cache-size", None) assert settings.get("maximum-cache-size") is None settings.set("maximum-cache-disk-usage", "2%") assert settings.get("maximum-cache-disk-usage") == 2
def generate_zarr_zip(target, **kwargs): ds = make_xarray(**kwargs) with temp_directory() as tmpdir: ds.to_zarr(tmpdir) zip_dir(target, tmpdir)
def test_large_multi_2_xarray(benchmark): with temp_directory(): paths = large_multi_2_climetlab() benchmark(large_multi_2_xarray, paths)
def test_large_multi_2_climetlab(benchmark): with temp_directory(): large_multi_2_climetlab() benchmark(large_multi_2_climetlab)
def test_cache_2(): with temp_directory() as tmpdir: with settings.temporary(): settings.set("cache-directory", tmpdir) settings.set("maximum-cache-size", "5MB") settings.set("number-of-download-threads", 5) assert cache_size() == 0 load_source( "url-pattern", f"{TEST_DATA_URL}/input/" + "1mb-{n}.bin", { "n": [0, 1, 2, 3, 4], }, ) cachesize = cache_size() expected = 5 * 1024 * 1024 if cachesize != expected: print(json.dumps(dump_cache_database(), indent=4)) assert cachesize == expected, ("before", cachesize / 1024.0 / 1024.0) cnt = 0 for i, f in enumerate(cache_entries()): # print("FILE", i, f) cnt += 1 if cnt != 5: print(json.dumps(dump_cache_database(), indent=4)) assert cnt == 5, f"Files in cache database (before): {cnt}" load_source( "url-pattern", f"{TEST_DATA_URL}/input/" + "1mb-{n}.bin", { "n": [5, 6, 7, 8, 9], }, ) cachesize = cache_size() expected = 5 * 1024 * 1024 if cachesize != expected: print(json.dumps(dump_cache_database(), indent=4)) assert cachesize == expected, ("after", cachesize / 1024.0 / 1024.0) cnt = 0 for i, f in enumerate(cache_entries()): LOG.debug("FILE %s %s", i, f) cnt += 1 if cnt != 5: print(json.dumps(dump_cache_database(), indent=4)) assert cnt == 5, f"Files in cache database (after): {cnt}" cnt = 0 for n in os.listdir(tmpdir): if n.startswith("cache-") and n.endswith(".db"): continue cnt += 1 if cnt != 5: print(json.dumps(dump_cache_database(), indent=4)) assert cnt == 5, f"Files in cache directory: {cnt}"