Example #1
0
    def test_alternative_region(self, tmp_path):
        # make the dataset
        (tmp_path / "raw/era5POS/global").mkdir(parents=True)
        data_path = tmp_path / "raw/era5POS/global/testy_test.nc"
        dataset = self._make_era5POS_dataset(size=(100, 100))
        dataset.to_netcdf(path=data_path)
        ethiopia = get_ethiopia()

        regrid_dataset, _, _ = _make_dataset(
            size=(20, 20),
            latmin=ethiopia.latmin,
            latmax=ethiopia.latmax,
            lonmin=ethiopia.lonmin,
            lonmax=ethiopia.lonmax,
        )

        regrid_path = tmp_path / "regridder.nc"
        regrid_dataset.to_netcdf(regrid_path)

        processor = PlanetOSPreprocessor(tmp_path)
        processor.preprocess(subset_str="ethiopia", regrid=regrid_path, n_processes=1)

        expected_out_path = tmp_path / "interim/era5POS_preprocessed/data_ethiopia.nc"
        assert (
            expected_out_path.exists()
        ), f"Expected processed file to be saved to {expected_out_path}"
Example #2
0
    def test_alternative_region(self, tmp_path):
        # make the dataset
        (tmp_path / 'raw/era5POS/global').mkdir(parents=True)
        data_path = tmp_path / 'raw/era5POS/global/testy_test.nc'
        dataset = self._make_era5POS_dataset(size=(100, 100))
        dataset.to_netcdf(path=data_path)
        ethiopia = get_ethiopia()

        regrid_dataset, _, _ = _make_dataset(size=(20, 20),
                                             latmin=ethiopia.latmin,
                                             latmax=ethiopia.latmax,
                                             lonmin=ethiopia.lonmin,
                                             lonmax=ethiopia.lonmax)

        regrid_path = tmp_path / 'regridder.nc'
        regrid_dataset.to_netcdf(regrid_path)

        processor = PlanetOSPreprocessor(tmp_path)
        processor.preprocess(subset_str='ethiopia',
                             regrid=regrid_path,
                             parallel=False)

        expected_out_path = tmp_path / 'interim/era5POS_preprocessed/data_ethiopia.nc'
        assert expected_out_path.exists(), \
            f'Expected processed file to be saved to {expected_out_path}'
Example #3
0
    def test_get_filenames(tmp_path):

        (tmp_path / 'raw' / 'era5POS').mkdir(parents=True)

        test_file = tmp_path / 'raw/era5POS/testy_test.nc'
        test_file.touch()

        processor = PlanetOSPreprocessor(tmp_path)

        files = processor.get_filepaths()
        assert files[0] == test_file, f'Expected {test_file} to be retrieved'
Example #4
0
    def test_preprocess(self, tmp_path):

        (tmp_path / "raw/era5POS/global").mkdir(parents=True)
        data_path = tmp_path / "raw/era5POS/global/testy_test.nc"
        dataset = self._make_era5POS_dataset(size=(100, 100))
        dataset.to_netcdf(path=data_path)

        kenya = get_kenya()
        regrid_dataset, _, _ = _make_dataset(
            size=(20, 20),
            latmin=kenya.latmin,
            latmax=kenya.latmax,
            lonmin=kenya.lonmin,
            lonmax=kenya.lonmax,
        )

        regrid_path = tmp_path / "regridder.nc"
        regrid_dataset.to_netcdf(regrid_path)

        processor = PlanetOSPreprocessor(tmp_path)
        processor.preprocess(subset_str="kenya", regrid=regrid_path, parallel=False)

        expected_out_path = tmp_path / "interim/era5POS_preprocessed/data_kenya.nc"
        assert (
            expected_out_path.exists()
        ), f"Expected processed file to be saved to {expected_out_path}"

        # check the subsetting happened correctly
        out_data = xr.open_dataset(expected_out_path)
        expected_dims = ["lat", "lon", "time"]
        assert len(list(out_data.dims)) == len(expected_dims)
        for dim in expected_dims:
            assert dim in list(
                out_data.dims
            ), f"Expected {dim} to be in the processed dataset dims"

        lons = out_data.lon.values
        assert (lons.min() >= kenya.lonmin) and (
            lons.max() <= kenya.lonmax
        ), "Longitudes not correctly subset"

        lats = out_data.lat.values
        assert (lats.min() >= kenya.latmin) and (
            lats.max() <= kenya.latmax
        ), "Latitudes not correctly subset"

        assert out_data.VHI.values.shape[1:] == (20, 20)
        assert out_data.precip.values.shape[1:] == (20, 20)

        assert (
            not processor.interim.exists()
        ), f"Interim era5 folder should have been deleted"
Example #5
0
def process_era5POS_2018():
    data_path = get_data_path()
    regrid_path = data_path / "interim/VCI_preprocessed/data_kenya.nc"
    assert regrid_path.exists(), f"{regrid_path} not available"

    processor = PlanetOSPreprocessor(data_path)

    processor.preprocess(
        subset_str="kenya",
        regrid=regrid_path,
        parallel=False,
        resample_time="M",
        upsampling=False,
    )
Example #6
0
def process_era5POS_2018(subset_str: str = "kenya"):
    data_path = get_data_path()
    regrid_path = (
        data_path /
        f"interim/reanalysis-era5-land_preprocessed/data_{subset_str}.nc")
    assert regrid_path.exists(), f"{regrid_path} not available"

    processor = PlanetOSPreprocessor(data_path)

    processor.preprocess(
        subset_str=subset_str,
        regrid=regrid_path,
        parallel=False,
        resample_time="M",
        upsampling=False,
    )
def process_era5POS_2018():
    # if the working directory is alread ml_drought don't need ../data
    if Path('.').absolute().as_posix().split('/')[-1] == 'ml_drought':
        data_path = Path('data')
    else:
        data_path = Path('../data')
    regrid_path = data_path / 'interim/VCI_preprocessed/data_kenya.nc'
    assert regrid_path.exists(), f'{regrid_path} not available'

    processor = PlanetOSPreprocessor(data_path)

    processor.preprocess(subset_str='kenya',
                         regrid=regrid_path,
                         parallel=False,
                         resample_time='M',
                         upsampling=False)
Example #8
0
    def test_rotate_and_filter(self):

        dataset = self._make_era5POS_dataset(size=(100, 100)).rename(
            {'time1': 'time'})
        rotated_ds = PlanetOSPreprocessor._rotate_and_filter(dataset)

        assert (rotated_ds.lon.min() > -180) & (rotated_ds.lon.max() < 180), \
            f'Longitudes not properly rotated!'
Example #9
0
    def test_make_filename():
        path = Path('2008/01/vhi.nc')

        name = PlanetOSPreprocessor.create_filename(path, 'kenya')
        expected_name = '2008_01_vhi_kenya.nc'
        assert name == expected_name, f'{name} generated, expected {expected_name}'
Example #10
0
    def test_init(self, tmp_path):

        PlanetOSPreprocessor(tmp_path)

        assert (tmp_path / 'interim/era5POS_preprocessed').exists()
        assert (tmp_path / 'interim/era5POS_interim').exists()
Example #11
0
from pathlib import Path

from src.preprocess import PlanetOSPreprocessor

data_path = Path("data")

regrid_path = data_path / "interim/chirps_preprocessed/chirps_kenya.nc"
assert regrid_path.exists(), f"{regrid_path} not available"

processor = PlanetOSPreprocessor(data_path)
processor.preprocess(
    subset_kenya=True,
    regrid=regrid_path,
    parallel=False,
    resample_time="M",
    upsampling=False,
)
Example #12
0
    def test_make_filename():
        path = Path("2008/01/vhi.nc")

        name = PlanetOSPreprocessor.create_filename(path, "kenya")
        expected_name = "2008_01_vhi_kenya.nc"
        assert name == expected_name, f"{name} generated, expected {expected_name}"