Exemple #1
0
def test_nc_wrong_concat_var():
    s1 = load_source(
        "dummy-source",
        kind="netcdf",
        dims=["lat", "lon", "time"],
        variables=["a", "b"],
        coord_values=dict(time=[1, 2]),
    )
    ds1 = s1.to_xarray()

    s2 = load_source(
        "dummy-source",
        kind="netcdf",
        dims=["lat", "time"],
        variables=["a", "b"],
        coord_values=dict(time=[8, 9]),
    )
    ds2 = s2.to_xarray()

    target = xr.concat([ds1, ds2], dim="time")
    print(target)
    ds = load_source("multi", [s1, s2], merger="concat(concat_dim=time)")

    ds.graph()
    merged = ds.to_xarray()

    assert target.identical(merged)
def test_bbox():

    area = [30.0, 2.0, 3.0, 4.0]
    bbox = BoundingBox(north=30, west=2, south=3, east=4)

    assert bbox_list(None, area) == bbox
    assert bbox_list(area=area, ignore=None) == bbox

    assert bbox_tuple(area) == tuple(area)
    assert bbox_tuple(area=area) == tuple(area)

    assert bbox_bbox(area) == area

    assert bbox_dict(area) == dict(north=30, west=2, south=3, east=4)

    assert bbox_defaults(area) == bbox

    source = load_source("file", climetlab_file("docs/examples/test.grib"))
    assert bbox_tuple(source[0]) == (73.0, -27.0, 33.0, 45.0)

    source = load_source("file", climetlab_file("docs/examples/test.nc"))
    assert bbox_tuple(source[0]) == (73.0, -27.0, 33.0, 45.0)

    assert bbox_tuple("france") == (54.5, -6.0, 39.0, 9.5)
    assert bbox_tuple("verification.france") == (51.5, -5.0, 42.0, 8.5)
Exemple #3
0
def test_nc_merge_var():

    s1 = load_source(
        "dummy-source",
        kind="netcdf",
        dims=["lat", "lon", "time"],
        variables=["a", "b"],
    )
    ds1 = s1.to_xarray()

    s2 = load_source(
        "dummy-source",
        kind="netcdf",
        dims=["lat", "lon", "time"],
        variables=["c", "d"],
    )
    ds2 = s2.to_xarray()

    target = xr.merge([ds1, ds2])
    ds = load_source("multi", [s1, s2])
    ds.graph()
    merged = ds.to_xarray()

    assert target.identical(merged)

    target2 = xr.open_mfdataset([s1.path, s2.path])
    assert target2.identical(merged)
Exemple #4
0
def test_zenodo_error_2():
    with pytest.raises(ValueError, match=r"Invalid zenodo key.*"):
        cml.load_source(
            "zenodo",
            record_id=4707154,
            file_key="unknown_",
        )
Exemple #5
0
    def _load(self, variable, period, domain=None, time=None, grid=None):
        self.variable = variable

        request = dict(
            variable=self.variable,
            product_type="reanalysis",
        )

        if domain is not None:
            request["area"] = domain_to_area(domain)

        if time is not None:
            request["time"] = time
        else:
            request["time"] = list(range(0, 24))

        if grid is not None:
            if isinstance(grid, (int, float)):
                request["grid"] = [grid, grid]
            else:
                request["grid"] = grid

        if isinstance(period, int):
            period = (period, period)

        if isinstance(period, (tuple, list)) and len(period) == 1:
            period = (period[0], period[0])

        sources = []
        for year in range(period[0], period[1] + 1):
            request["year"] = year
            sources.append(
                load_source("cds", "reanalysis-era5-single-levels", **request))

        self.source = load_source("multi", sources)
Exemple #6
0
def test_multi():
    if not os.path.exists(os.path.expanduser("~/.cdsapirc")):
        pytest.skip("No ~/.cdsapirc")
    s1 = load_source(
        "cds",
        "reanalysis-era5-single-levels",
        product_type="reanalysis",
        param="2t",
        date="2021-03-01",
        format="netcdf",
    )
    s1.to_xarray()
    s2 = load_source(
        "cds",
        "reanalysis-era5-single-levels",
        product_type="reanalysis",
        param="2t",
        date="2021-03-02",
        format="netcdf",
    )
    s2.to_xarray()

    source = load_source("multi", s1, s2)
    for s in source:
        print(s)

    source.to_xarray()
Exemple #7
0
def xtest_multi_grib():
    ds = load_source(
        "multi",
        load_source("dummy-source", kind="grib", date=20000101),
        load_source("dummy-source", kind="grib", date=20000102),
    )
    assert len(ds) == 2
Exemple #8
0
def test_multi():
    s1 = load_source(
        "cds",
        "reanalysis-era5-single-levels",
        product_type="reanalysis",
        param="2t",
        date="2021-03-01",
        format="netcdf",
    )
    print(s1.to_xarray())
    s2 = load_source(
        "cds",
        "reanalysis-era5-single-levels",
        product_type="reanalysis",
        param="2t",
        date="2021-03-02",
        format="netcdf",
    )
    print(s2.to_xarray())

    source = load_source("multi", s1, s2)
    for s in source:
        print(s)

    print(source.to_xarray())
Exemple #9
0
def test_part_url():

    ds = load_source(
        "url",
        "http://download.ecmwf.int/test-data/metview/gallery/temp.bufr",
    )

    ds = load_source(
        "url",
        "http://download.ecmwf.int/test-data/metview/gallery/temp.bufr",
        parts=((0, 4),),
    )

    assert os.path.getsize(ds.path) == 4

    with open(ds.path, "rb") as f:
        assert f.read() == b"BUFR"

    ds = load_source(
        "url",
        "http://download.ecmwf.int/test-data/metview/gallery/temp.bufr",
        parts=((0, 10), (50, 10), (60, 10)),
    )

    print(ds.path)

    assert os.path.getsize(ds.path) == 30

    with open(ds.path, "rb") as f:
        assert f.read()[:4] == b"BUFR"
Exemple #10
0
def _concat_var_different_coords_1(kind1, kind2):
    s1 = load_source(
        "dummy-source",
        kind=kind1,
        variables=["a"],
        dims=["lat", "lon", "time"],
        coord_values=dict(time=[1, 3]),
    )
    ds1 = s1.to_xarray()

    s2 = load_source(
        "dummy-source",
        kind=kind2,
        variables=["a"],
        dims=["lat", "lon", "time"],
        coord_values=dict(time=[2, 4]),
    )
    ds2 = s2.to_xarray()

    target = xr.concat([ds1, ds2], dim="time")
    # print(target)

    ds = load_source("multi", [s1, s2], merger="concat(concat_dim=time)")
    ds.graph()
    merged = ds.to_xarray()

    assert target.identical(merged), f"Contat failed for {kind1}, {kind2}"
Exemple #11
0
def retrieve_and_check(index, request, range_method=None, **kwargs):
    print("--------")
    # parts = index.lookup_request(request)
    print("range_method", range_method)
    print("REQUEST", request)
    #    for url, p in parts:
    #        total = len(index.get_backend(url).entries)
    #        print(f"PARTS: {len(p)}/{total} parts in {url}")

    now = time.time()
    s = load_source("indexed-urls",
                    index,
                    request,
                    range_method=range_method,
                    **kwargs)
    elapsed = time.time() - now
    print("ELAPSED", elapsed)
    try:
        paths = [s.path]
    except AttributeError:
        paths = [p.path for p in s.sources]

    for path in paths:
        # check that the downloaded gribs match the request
        for grib in load_source("file", path):
            for k, v in request.items():
                if k == "param":
                    k = "shortName"
                assert check_grib_value(grib._get(k), v), (grib._get(k), v)
    return elapsed
Exemple #12
0
def test_url_ftp_source_anonymous():
    date = datetime.datetime.now() - datetime.timedelta(days=1)
    load_source(
        "url-pattern",
        ("ftp://ftp.ncep.noaa.gov/pub/data/nccf/com/gfs/prod/"
         "gfs.{date:date(%Y%m%d)}/00/atmos/wafsgfs_P_t00z_intdsk84.grib2"),
        {"date": date},
    )
Exemple #13
0
def test_multi_grib_mixed():
    ds = load_source(
        "multi",
        load_source("dummy-source", kind="grib", date=20000101),
        load_source("dummy-source", kind="grib", date=20000102),
        load_source("dummy-source", kind="unknown", hello="world"),
    )
    assert len(ds) == 2
Exemple #14
0
def test_multi_grib():
    ds = load_source(
        "multi",
        load_source("dummy-source", kind="grib", date=20000101),
        load_source("dummy-source", kind="grib", date=20000102),
    )
    assert len(ds) == 2
    ds.to_xarray()
    ds.statistics()
Exemple #15
0
def test_a():
    s = cml.load_source("local", os.path.join(here, "gribs", "a"), param="t")
    s = cml.load_source("local",
                        os.path.join(here, "gribs", "a"),
                        param="t",
                        level="1000")
    print(s, len(s))
    ds = s.to_xarray()
    print(ds)
Exemple #16
0
def test_multi_missing_url():
    from requests.exceptions import HTTPError

    with pytest.raises(HTTPError, match=".*this-file-does-not-exists.*"):
        load_source(
            "url-pattern",
            "http://download.ecmwf.int/test-data/metview/gallery/{x}",
            x=["temp.bufr", "this-file-does-not-exists.bufr"],
        )
Exemple #17
0
def test_tfdataset_grib_4():
    s = cml.load_source(
        "multi",
        cml.load_source("file", climetlab_file("docs/examples/test.grib")),
        cml.load_source("file", climetlab_file("docs/examples/test.grib")),
    )
    dataset = s.to_tfdataset(label="paramId")
    for r in dataset:
        print(len(r), [type(x) for x in r])
Exemple #18
0
def test_multi_graph_2():
    with temp_directory() as tmpdir:
        os.mkdir(os.path.join(tmpdir, "a1"))
        a11 = load_source("dummy-source", kind="grib", date=20000101)
        a11.save(os.path.join(tmpdir, "a1", "a11.grib"))
        a12 = load_source("dummy-source", kind="grib", date=20000102)
        a12.save(os.path.join(tmpdir, "a1", "a12.grib"))

        os.mkdir(os.path.join(tmpdir, "b1"))
        b11 = load_source("dummy-source", kind="grib", date=20000103)
        b11.save(os.path.join(tmpdir, "b1", "b11.grib"))
        b12 = load_source("dummy-source", kind="grib", date=20000104)
        b12.save(os.path.join(tmpdir, "b1", "b12.grib"))

        os.mkdir(os.path.join(tmpdir, "a2"))
        a21 = load_source("dummy-source", kind="grib", date=20000105)
        a21.save(os.path.join(tmpdir, "a2", "a21.grib"))
        a22 = load_source("dummy-source", kind="grib", date=20000106)
        a22.save(os.path.join(tmpdir, "a2", "a22.grib"))

        os.mkdir(os.path.join(tmpdir, "b2"))
        b21 = load_source("dummy-source", kind="grib", date=20000107)
        b21.save(os.path.join(tmpdir, "b2", "b21.grib"))
        b22 = load_source("dummy-source", kind="grib", date=20000108)
        b22.save(os.path.join(tmpdir, "b2", "b22.grib"))

        def filter(path_or_url):
            return path_or_url.endswith("2.grib")

        ds = load_source("file", tmpdir, filter=filter)
        ds.graph()

        assert len(ds) == 4
Exemple #19
0
def test_glob():
    s = load_source("file", climetlab_file("docs/examples/test.grib"))
    with temp_directory() as tmpdir:
        s.save(os.path.join(tmpdir, "a.grib"))
        s.save(os.path.join(tmpdir, "b.grib"))

        s = load_source("file", os.path.join(tmpdir, "*.grib"))
        assert len(s) == 4, len(s)

        s = load_source("file", tmpdir)
        assert len(s) == 4, len(s)
def test_dates():
    npdate = np.datetime64("2016-01-01")
    assert dates_1(date=npdate) == [datetime.datetime(2016, 1, 1)]

    source = load_source("file", "docs/examples/test.grib")
    assert dates_1(source[0]) == [datetime.datetime(2020, 5, 13, 12, 0)]

    source = load_source("file", "docs/examples/test.nc")

    #  For now
    with pytest.raises(NotImplementedError):
        assert dates_1(source[0]) == [datetime.datetime(2020, 5, 13, 12, 0)]
Exemple #21
0
def test_nc_merge_concat_var():
    target, a1, a2, b1, b2 = get_hierarchy()
    s = load_source(
        "multi",
        [
            load_source("multi", [a1, b1], merger="merge()"),
            load_source("multi", [a2, b2], merger="merge()"),
        ],
        merger="concat(dim=forecast_time)",
    )
    merged = s.to_xarray()
    assert target.identical(merged)
Exemple #22
0
def test_extension():

    s = load_source(
        "url",
        f"{TEST_DATA_URL}/fixtures/tfrecord/EWCTest0.0.tfrecord",
    )
    assert s.path.endswith(".0.tfrecord")
    s = load_source(
        "url",
        f"{TEST_DATA_URL}/fixtures/tfrecord/EWCTest0.1.tfrecord",
    )
    assert s.path.endswith(".1.tfrecord")
Exemple #23
0
def test_user_2():
    s = load_source("file", climetlab_file("docs/examples/test.grib"))
    home_file = os.path.expanduser("~/.climetlab/test.grib")
    try:
        s.save(home_file)
        # Test expand vars
        s = load_source("file", "$HOME/.climetlab/test.grib", expand_vars=True)
        assert len(s) == 2
    finally:
        try:
            os.unlink(home_file)
        except OSError:
            LOG.exception("unlink(%s)", home_file)
Exemple #24
0
def test_cache_2():

    with temp_directory() as tmpdir:
        with settings.temporary():
            settings.set("cache-directory", tmpdir)
            settings.set("maximum-cache-size", "5MB")
            settings.set("number-of-download-threads", 5)

            assert cache_size() == 0

            load_source(
                "url-pattern",
                "https://get.ecmwf.int/test-data/climetlab/1mb-{n}.bin",
                {
                    "n": [0, 1, 2, 3, 4],
                },
            )

            assert cache_size() == 5 * 1024 * 1024, cache_size()

            cnt = 0
            for i, f in enumerate(cache_entries()):
                print("FILE", i, f)
                cnt += 1
            assert cnt == 5, f"Files in cache database: {cnt}"

            load_source(
                "url-pattern",
                "https://get.ecmwf.int/test-data/climetlab/1mb-{n}.bin",
                {
                    "n": [5, 6, 7, 8, 9],
                },
            )

            assert cache_size(
            ) == 5 * 1024 * 1024, cache_size() / 1024.0 / 1024.0

            cnt = 0
            for i, f in enumerate(cache_entries()):
                print("FILE", i, f)
                cnt += 1
            assert cnt == 5, f"Files in cache database: {cnt}"

            cnt = 0
            for n in os.listdir(tmpdir):
                if n.startswith("cache-") and n.endswith(".db"):
                    continue
                cnt += 1
            assert cnt == 5, f"Files in cache directory: {cnt}"
Exemple #25
0
def test_multi_directory_1():
    with temp_directory() as directory:
        for date in (20000101, 20000102):
            ds = load_source("dummy-source", kind="grib", date=date)
            ds.save(os.path.join(directory, f"{date}.grib"))

        ds = load_source("file", directory)
        print(ds)
        assert len(ds) == 2
        ds.graph()

        with temp_file() as filename:
            ds.save(filename)
            ds = load_source("file", filename)
            assert len(ds) == 2
Exemple #26
0
def test_normalize_dates_from_source():

    dates_3 = normalize("d", "date")(f)
    dates_list_3 = normalize("d", "date", multiple=True)(f)

    source = load_source("file", climetlab_file("docs/examples/test.grib"))
    assert dates_3(source[0]) == datetime.datetime(2020, 5, 13, 12, 0)
    assert dates_list_3(source[0]) == [datetime.datetime(2020, 5, 13, 12, 0)]

    source = load_source("file", climetlab_file("docs/examples/test.nc"))

    #  For now
    with pytest.raises(NotImplementedError):
        assert dates_3(source[0]) == datetime.datetime(2020, 5, 13, 12, 0)
        assert dates_list_3(
            source[0]) == [datetime.datetime(2020, 5, 13, 12, 0)]
Exemple #27
0
def test_zenodo_1():

    ds = cml.load_source("zenodo", record_id=5020468, filter=only_csv)

    pd = ds.to_pandas()
    print(pd)
    assert len(pd) == 49
Exemple #28
0
def test_download_zip_1():
    ds = load_source(
        "url",
        f"{TEST_DATA_URL}/input/grib.zip",
    )

    assert len(ds) == 6, len(ds)
Exemple #29
0
def test_download_zip_1():
    ds = load_source(
        "url",
        "https://get.ecmwf.int/repository/test-data/climetlab/grib.zip",
    )

    assert len(ds) == 6, len(ds)
def test_unknown_reader():
    s = cml.load_source(
        "file",
        os.path.join(os.path.dirname(__file__), "unknown_file.unknown_ext"),
    )
    print(s)
    assert isinstance(s._reader, cml.readers.unknown.Unknown)