Example #1
0
    def test_alternative_region(self, tmp_path):
        # make the dataset
        (tmp_path / "raw/gleam/monthly").mkdir(parents=True)
        data_path = tmp_path / "raw/gleam/monthly/testy_test.nc"
        dataset = self._make_gleam_dataset(size=(100, 100))
        dataset.to_netcdf(path=data_path)
        ethiopia = get_ethiopia()

        # regrid the datasets
        regrid_dataset, _, _ = _make_dataset(
            size=(20, 20),
            latmin=ethiopia.latmin,
            latmax=ethiopia.latmax,
            lonmin=ethiopia.lonmin,
            lonmax=ethiopia.lonmax,
        )
        regrid_path = tmp_path / "regridder.nc"
        regrid_dataset.to_netcdf(regrid_path)

        # build the Preprocessor object and subset with a different subset_str
        processor = GLEAMPreprocessor(tmp_path)
        processor.preprocess(subset_str="ethiopia", regrid=regrid_path)

        expected_out_path = tmp_path / "interim/gleam_preprocessed/data_ethiopia.nc"
        assert (
            expected_out_path.exists()
        ), f"Expected processed file to be saved to {expected_out_path}"
Example #2
0
    def test_preprocess(self, tmp_path):

        (tmp_path / "raw/gleam/monthly").mkdir(parents=True)
        data_path = tmp_path / "raw/gleam/monthly/testy_test.nc"
        dataset = self._make_gleam_dataset(size=(100, 100))
        dataset.to_netcdf(path=data_path)

        kenya = get_kenya()
        regrid_dataset, _, _ = _make_dataset(
            size=(20, 20),
            latmin=kenya.latmin,
            latmax=kenya.latmax,
            lonmin=kenya.lonmin,
            lonmax=kenya.lonmax,
        )

        regrid_path = tmp_path / "regridder.nc"
        regrid_dataset.to_netcdf(regrid_path)

        processor = GLEAMPreprocessor(tmp_path)
        processor.preprocess(subset_str="kenya", regrid=regrid_path)

        expected_out_path = tmp_path / "interim/gleam_preprocessed/data_kenya.nc"
        assert (
            expected_out_path.exists()
        ), f"Expected processed file to be saved to {expected_out_path}"

        # check the subsetting happened correctly
        out_data = xr.open_dataset(expected_out_path)
        expected_dims = ["lat", "lon", "time"]
        assert len(list(out_data.dims)) == len(expected_dims)
        for dim in expected_dims:
            assert dim in list(
                out_data.dims
            ), f"Expected {dim} to be in the processed dataset dims"

        lons = out_data.lon.values
        assert (lons.min() >= kenya.lonmin) and (
            lons.max() <= kenya.lonmax
        ), "Longitudes not correctly subset"

        lats = out_data.lat.values
        assert (lats.min() >= kenya.latmin) and (
            lats.max() <= kenya.latmax
        ), "Latitudes not correctly subset"

        assert set(out_data.data_vars) == {"E"}, f"Got unexpected variables!"

        assert (
            not processor.interim.exists()
        ), f"Interim gleam folder should have been deleted"
def process_gleam():
    # if the working directory is alread ml_drought don't need ../data
    if Path('.').absolute().as_posix().split('/')[-1] == 'ml_drought':
        data_path = Path('data')
    else:
        data_path = Path('../data')
    regrid_path = data_path / 'interim/VCI_preprocessed/data_kenya.nc'
    assert regrid_path.exists(), f'{regrid_path} not available'

    processor = GLEAMPreprocessor(data_path)

    processor.preprocess(subset_str='kenya',
                         regrid=regrid_path,
                         resample_time='M',
                         upsampling=False)
Example #4
0
    def test_preprocess(self, tmp_path):

        (tmp_path / 'raw/gleam/monthly').mkdir(parents=True)
        data_path = tmp_path / 'raw/gleam/monthly/testy_test.nc'
        dataset = self._make_gleam_dataset(size=(100, 100))
        dataset.to_netcdf(path=data_path)

        kenya = get_kenya()
        regrid_dataset, _, _ = _make_dataset(size=(20, 20),
                                             latmin=kenya.latmin,
                                             latmax=kenya.latmax,
                                             lonmin=kenya.lonmin,
                                             lonmax=kenya.lonmax)

        regrid_path = tmp_path / 'regridder.nc'
        regrid_dataset.to_netcdf(regrid_path)

        processor = GLEAMPreprocessor(tmp_path)
        processor.preprocess(subset_str='kenya', regrid=regrid_path)

        expected_out_path = tmp_path / 'interim/gleam_preprocessed/data_kenya.nc'
        assert expected_out_path.exists(), \
            f'Expected processed file to be saved to {expected_out_path}'

        # check the subsetting happened correctly
        out_data = xr.open_dataset(expected_out_path)
        expected_dims = ['lat', 'lon', 'time']
        assert len(list(out_data.dims)) == len(expected_dims)
        for dim in expected_dims:
            assert dim in list(out_data.dims), \
                f'Expected {dim} to be in the processed dataset dims'

        lons = out_data.lon.values
        assert (lons.min() >= kenya.lonmin) and (lons.max() <= kenya.lonmax), \
            'Longitudes not correctly subset'

        lats = out_data.lat.values
        assert (lats.min() >= kenya.latmin) and (lats.max() <= kenya.latmax), \
            'Latitudes not correctly subset'

        assert set(out_data.data_vars) == {'E'}, f'Got unexpected variables!'

        assert not processor.interim.exists(), \
            f'Interim gleam folder should have been deleted'