Example #1
0
def mirror_dirs():
    """Setup for mirror tests:
    - a temporary cache dir,
    - temporary settings,
    - two temporary directories
    """
    with temp_directory() as cachedir:
        with settings.temporary():
            settings.set("cache-directory", cachedir)
            with temp_directory() as mirrordir:
                with temp_directory() as mirrordir2:
                    _reset_mirrors(use_env_var=False)
                    yield mirrordir, mirrordir2
Example #2
0
def test_multi_graph_2():
    with temp_directory() as tmpdir:
        os.mkdir(os.path.join(tmpdir, "a1"))
        a11 = load_source("dummy-source", kind="grib", date=20000101)
        a11.save(os.path.join(tmpdir, "a1", "a11.grib"))
        a12 = load_source("dummy-source", kind="grib", date=20000102)
        a12.save(os.path.join(tmpdir, "a1", "a12.grib"))

        os.mkdir(os.path.join(tmpdir, "b1"))
        b11 = load_source("dummy-source", kind="grib", date=20000103)
        b11.save(os.path.join(tmpdir, "b1", "b11.grib"))
        b12 = load_source("dummy-source", kind="grib", date=20000104)
        b12.save(os.path.join(tmpdir, "b1", "b12.grib"))

        os.mkdir(os.path.join(tmpdir, "a2"))
        a21 = load_source("dummy-source", kind="grib", date=20000105)
        a21.save(os.path.join(tmpdir, "a2", "a21.grib"))
        a22 = load_source("dummy-source", kind="grib", date=20000106)
        a22.save(os.path.join(tmpdir, "a2", "a22.grib"))

        os.mkdir(os.path.join(tmpdir, "b2"))
        b21 = load_source("dummy-source", kind="grib", date=20000107)
        b21.save(os.path.join(tmpdir, "b2", "b21.grib"))
        b22 = load_source("dummy-source", kind="grib", date=20000108)
        b22.save(os.path.join(tmpdir, "b2", "b22.grib"))

        def filter(path_or_url):
            return path_or_url.endswith("2.grib")

        ds = load_source("file", tmpdir, filter=filter)
        ds.graph()

        assert len(ds) == 4
Example #3
0
def test_unpack_zip():
    TEST_URL = "https://get.ecmwf.int/test-data/climetlab/input/grib.zip"

    # Make sure we fail if not cached
    with pytest.raises(OfflineError), network_off():
        ds = load_source("url", f"{TEST_URL}?time={time.time()}")

    with temp_directory() as tmpdir:
        with settings.temporary("cache-directory", tmpdir):
            ds = load_source("url", TEST_URL)
            assert len(ds) == 6, len(ds)

            with network_off():
                # Check cache is used
                ds = load_source("url", TEST_URL)
                assert len(ds) == 6, len(ds)

            with pytest.raises(OfflineError), network_off():
                # check force
                ds = load_source("url", TEST_URL, force=True)
                assert len(ds) == 6, len(ds)

            ds = load_source("url", TEST_URL, force=True)
            assert len(ds) == 6, len(ds)

            with network_off():
                ds = load_source("url", TEST_URL)
                assert len(ds) == 6, len(ds)

                # Again
                ds = load_source("url", TEST_URL)
                assert len(ds) == 6, len(ds)
Example #4
0
def test_glob():
    s = load_source("file", climetlab_file("docs/examples/test.grib"))
    with temp_directory() as tmpdir:
        s.save(os.path.join(tmpdir, "a.grib"))
        s.save(os.path.join(tmpdir, "b.grib"))

        s = load_source("file", os.path.join(tmpdir, "*.grib"))
        assert len(s) == 4, len(s)

        s = load_source("file", tmpdir)
        assert len(s) == 4, len(s)
Example #5
0
def large_multi_1(b, func):
    with temp_directory() as tmpdir:
        ilist = list(range(200))
        pattern = os.path.join(tmpdir, "test-{i}.nc")
        for i in ilist:
            source = load_source(
                "dummy-source",
                kind="netcdf",
                dims=["lat", "lon", "time"],
                coord_values=dict(time=[i + 0.0, i + 0.5]),
            )
            filename = pattern.format(i=i)
            source.save(filename)
        return b(func, pattern, ilist)
Example #6
0
def test_cache_2():

    with temp_directory() as tmpdir:
        with settings.temporary():
            settings.set("cache-directory", tmpdir)
            settings.set("maximum-cache-size", "5MB")
            settings.set("number-of-download-threads", 5)

            assert cache_size() == 0

            load_source(
                "url-pattern",
                "https://get.ecmwf.int/test-data/climetlab/1mb-{n}.bin",
                {
                    "n": [0, 1, 2, 3, 4],
                },
            )

            assert cache_size() == 5 * 1024 * 1024, cache_size()

            cnt = 0
            for i, f in enumerate(cache_entries()):
                print("FILE", i, f)
                cnt += 1
            assert cnt == 5, f"Files in cache database: {cnt}"

            load_source(
                "url-pattern",
                "https://get.ecmwf.int/test-data/climetlab/1mb-{n}.bin",
                {
                    "n": [5, 6, 7, 8, 9],
                },
            )

            assert cache_size(
            ) == 5 * 1024 * 1024, cache_size() / 1024.0 / 1024.0

            cnt = 0
            for i, f in enumerate(cache_entries()):
                print("FILE", i, f)
                cnt += 1
            assert cnt == 5, f"Files in cache database: {cnt}"

            cnt = 0
            for n in os.listdir(tmpdir):
                if n.startswith("cache-") and n.endswith(".db"):
                    continue
                cnt += 1
            assert cnt == 5, f"Files in cache directory: {cnt}"
Example #7
0
def test_multi_directory_1():
    with temp_directory() as directory:
        for date in (20000101, 20000102):
            ds = load_source("dummy-source", kind="grib", date=date)
            ds.save(os.path.join(directory, f"{date}.grib"))

        ds = load_source("file", directory)
        print(ds)
        assert len(ds) == 2
        ds.graph()

        with temp_file() as filename:
            ds.save(filename)
            ds = load_source("file", filename)
            assert len(ds) == 2
Example #8
0
def test_url_source_check_out_of_date():
    def load():
        load_source(
            "url",
            "http://download.ecmwf.int/test-data/metview/gallery/temp.bufr",
        )

    with temp_directory() as tmpdir:
        with settings.temporary():
            settings.set("cache-directory", tmpdir)
            load()

            settings.set("check-out-of-date-urls", False)
            with network_off():
                load()
Example #9
0
def generate_zip(target, sources=None, names=None, directory=None, **kwargs):
    if sources is not None:
        assert directory is None
        assert names is not None
        assert len(sources) == len(names)
        with temp_directory() as tmpdir:
            for s, n in zip(sources, names):
                s.save(os.path.join(tmpdir, n))
            zip_dir(target, tmpdir)
            return

    if directory is not None:
        assert sources is None
        assert names is None
        zip_dir(target, directory)
        return

    assert False
Example #10
0
def test_numbers():
    with temp_directory() as tmpdir:

        with settings.temporary("cache-directory", tmpdir):
            settings.set("url-download-timeout", 30)
            assert settings.get("url-download-timeout") == 30

            settings.set("url-download-timeout", "30")
            assert settings.get("url-download-timeout") == 30

            settings.set("url-download-timeout", "30s")
            assert settings.get("url-download-timeout") == 30

            settings.set("url-download-timeout", "2m")
            assert settings.get("url-download-timeout") == 120

            settings.set("url-download-timeout", "10h")
            assert settings.get("url-download-timeout") == 36000

            settings.set("url-download-timeout", "7d")
            assert settings.get("url-download-timeout") == 7 * 24 * 3600

            with pytest.raises(ValueError):
                settings.set("url-download-timeout", "1x")

            settings.set("maximum-cache-size", "1")
            assert settings.get("maximum-cache-size") == 1

            settings.set("maximum-cache-size", "1k")
            assert settings.get("maximum-cache-size") == 1024

            settings.set("maximum-cache-size", "1kb")
            assert settings.get("maximum-cache-size") == 1024

            settings.set("maximum-cache-size", "1k")
            assert settings.get("maximum-cache-size") == 1024

            settings.set("maximum-cache-size", "1kb")
            assert settings.get("maximum-cache-size") == 1024

            settings.set("maximum-cache-size", "1K")
            assert settings.get("maximum-cache-size") == 1024

            settings.set("maximum-cache-size", "1M")
            assert settings.get("maximum-cache-size") == 1024 * 1024

            settings.set("maximum-cache-size", "1G")
            assert settings.get("maximum-cache-size") == 1024 * 1024 * 1024

            settings.set("maximum-cache-size", "1T")
            assert settings.get("maximum-cache-size") == 1024 * 1024 * 1024 * 1024

            settings.set("maximum-cache-size", "1P")
            assert (
                settings.get("maximum-cache-size") == 1024 * 1024 * 1024 * 1024 * 1024
            )

            settings.set("maximum-cache-size", None)
            assert settings.get("maximum-cache-size") is None

            settings.set("maximum-cache-disk-usage", "2%")
            assert settings.get("maximum-cache-disk-usage") == 2
Example #11
0
def generate_zarr_zip(target, **kwargs):
    ds = make_xarray(**kwargs)
    with temp_directory() as tmpdir:
        ds.to_zarr(tmpdir)
        zip_dir(target, tmpdir)
Example #12
0
def test_large_multi_2_xarray(benchmark):

    with temp_directory():
        paths = large_multi_2_climetlab()
        benchmark(large_multi_2_xarray, paths)
Example #13
0
def test_large_multi_2_climetlab(benchmark):
    with temp_directory():
        large_multi_2_climetlab()
        benchmark(large_multi_2_climetlab)
Example #14
0
def test_cache_2():

    with temp_directory() as tmpdir:
        with settings.temporary():
            settings.set("cache-directory", tmpdir)
            settings.set("maximum-cache-size", "5MB")
            settings.set("number-of-download-threads", 5)

            assert cache_size() == 0

            load_source(
                "url-pattern",
                f"{TEST_DATA_URL}/input/" + "1mb-{n}.bin",
                {
                    "n": [0, 1, 2, 3, 4],
                },
            )

            cachesize = cache_size()
            expected = 5 * 1024 * 1024
            if cachesize != expected:
                print(json.dumps(dump_cache_database(), indent=4))
                assert cachesize == expected, ("before", cachesize / 1024.0 / 1024.0)

            cnt = 0
            for i, f in enumerate(cache_entries()):
                # print("FILE", i, f)
                cnt += 1
            if cnt != 5:
                print(json.dumps(dump_cache_database(), indent=4))
                assert cnt == 5, f"Files in cache database (before): {cnt}"

            load_source(
                "url-pattern",
                f"{TEST_DATA_URL}/input/" + "1mb-{n}.bin",
                {
                    "n": [5, 6, 7, 8, 9],
                },
            )

            cachesize = cache_size()
            expected = 5 * 1024 * 1024
            if cachesize != expected:
                print(json.dumps(dump_cache_database(), indent=4))
                assert cachesize == expected, ("after", cachesize / 1024.0 / 1024.0)

            cnt = 0
            for i, f in enumerate(cache_entries()):
                LOG.debug("FILE %s %s", i, f)
                cnt += 1
            if cnt != 5:
                print(json.dumps(dump_cache_database(), indent=4))
                assert cnt == 5, f"Files in cache database (after): {cnt}"

            cnt = 0
            for n in os.listdir(tmpdir):
                if n.startswith("cache-") and n.endswith(".db"):
                    continue
                cnt += 1
            if cnt != 5:
                print(json.dumps(dump_cache_database(), indent=4))
                assert cnt == 5, f"Files in cache directory: {cnt}"