Beispiel #1
0
    def test_make_filename():
        path = Path("reanalysis-era5-land"
                    "/2m_temperature/1979_2019/01_12.nc")

        name = ERA5LandPreprocessor.create_filename(path, "kenya")
        expected_name = "1979_2019_01_12_2m_temperature_kenya.nc"
        assert name == expected_name, f"{name} generated, expected {expected_name}"
Beispiel #2
0
    def test_init(self, tmp_path):

        ERA5LandPreprocessor(tmp_path)

        assert (tmp_path / "interim/reanalysis-era5-land_interim").exists()
        assert (tmp_path /
                "interim/reanalysis-era5-land_preprocessed").exists()
Beispiel #3
0
    def test_preprocess(self, tmp_path):

        (tmp_path / "raw/reanalysis-era5-land/"
         "2m_temperature/1979_2019").mkdir(parents=True)
        data_path = (tmp_path / "raw/reanalysis-era5-land/"
                     "2m_temperature/1979_2019/01_12.nc")
        dataset = self._make_era5_dataset(size=(100, 100))
        dataset.to_netcdf(path=data_path)

        kenya = get_kenya()
        regrid_dataset, _, _ = _make_dataset(
            size=(20, 20),
            latmin=kenya.latmin,
            latmax=kenya.latmax,
            lonmin=kenya.lonmin,
            lonmax=kenya.lonmax,
        )

        regrid_path = tmp_path / "regridder.nc"
        regrid_dataset.to_netcdf(regrid_path)

        processor = ERA5LandPreprocessor(tmp_path)
        processor.preprocess(
            subset_str="kenya",
            regrid=regrid_path,
            parallel_processes=1,
            variable="2m_temperature",
        )

        expected_out_path = (
            tmp_path / "interim/reanalysis-era5"
            "-land_preprocessed/reanalysis-era5-land_kenya.nc")
        assert (expected_out_path.exists(
        )), f"Expected processed file to be saved to {expected_out_path}"

        # check the subsetting happened correctly
        out_data = xr.open_dataset(expected_out_path)
        expected_dims = ["lat", "lon", "time"]
        assert len(list(out_data.dims)) == len(expected_dims)
        for dim in expected_dims:
            assert dim in list(
                out_data.dims
            ), f"Expected {dim} to be in the processed dataset dims"

        lons = out_data.lon.values
        assert (lons.min() >= kenya.lonmin) and (
            lons.max() <= kenya.lonmax), "Longitudes not correctly subset"

        lats = out_data.lat.values
        assert (lats.min() >= kenya.latmin) and (
            lats.max() <= kenya.latmax), "Latitudes not correctly subset"

        assert out_data.t2m.values.shape[1:] == (20, 20)

        assert (not processor.interim.exists()
                ), f"Interim era5 folder should have been deleted"
Beispiel #4
0
    def test_get_filenames(tmp_path):
        (tmp_path / "raw/reanalysis-era5-land/"
         "2m_temperature/1979_2019").mkdir(parents=True)

        test_file = (tmp_path / "raw/reanalysis-era5-land"
                     "/2m_temperature/1979_2019.01_12.nc")
        test_file.touch()

        processor = ERA5LandPreprocessor(tmp_path)

        files = processor.get_filepaths()
        assert files[0] == test_file, f"Expected {test_file} to be retrieved"
Beispiel #5
0
def process_era5_land(
    variables: Optional[Union[List, str]] = None,
    subset_str: str = "kenya",
    monmean: bool = True,
):
    data_path = get_data_path()

    # Check all the provided variables exist
    if variables is None:
        variables = [
            d.name for d in (data_path / "raw/reanalysis-era5-land").iterdir()
        ]
        assert (
            variables != []
        ), f"Expecting to find some variables in: {(data_path / 'raw/reanalysis-era5-land')}"
    else:
        if isinstance(variables, str):
            variables = [variables]
            assert variables in [
                d.name
                for d in (data_path / "raw/reanalysis-era5-land").iterdir()
            ], f"Expect to find {variables} in {(data_path / 'raw/reanalysis-era5-land')}"
        else:
            assert all(
                np.isin(
                    variables,
                    [
                        d.name for d in (data_path /
                                         "raw/reanalysis-era5-land").iterdir()
                    ],
                )), f"Expected to find {variables}"

    # regrid_path = data_path / f"interim/reanalysis-era5-land_preprocessed/data_{subset_str}.nc"
    # assert regrid_path.exists(), f"{regrid_path} not available"
    regrid_path = None

    if monmean:
        processor = ERA5LandMonthlyMeansPreprocessor(data_path)
    else:
        processor = ERA5LandPreprocessor(data_path)

    for variable in variables:
        processor.preprocess(
            subset_str=subset_str,
            regrid=None,
            resample_time="M",
            upsampling=False,
            variable=variable,
        )
Beispiel #6
0
def process_era5_land(variable: str):
    if Path(".").absolute().as_posix().split("/")[-1] == "ml_drought":
        data_path = Path("data")
    else:
        data_path = Path("../data")
    regrid_path = data_path / "interim/chirps_preprocessed/chirps_kenya.nc"
    assert regrid_path.exists(), f"{regrid_path} not available"

    processor = ERA5LandPreprocessor(data_path)

    processor.preprocess(
        subset_str="kenya",
        regrid=None,
        resample_time="M",
        upsampling=False,
        variable=variable,
    )
from pathlib import Path
import xarray as xr
%load_ext autoreload
%autoreload 2

from src.preprocess import ERA5LandPreprocessor

data_dir = Path('/Volumes/Lees_Extend/data/ecmwf_sowc/data')

e = ERA5LandPreprocessor(data_dir)

# get filepaths
e.get_filepaths()