Exemple #1
0
    def test_make_filename():
        path = Path("reanalysis-era5-single-levels-monthly-means"
                    "/2m_temperature/1979_2019/01_12.nc")

        name = ERA5MonthlyMeanPreprocessor.create_filename(path, "kenya")
        expected_name = "1979_2019_01_12_2m_temperature_kenya.nc"
        assert name == expected_name, f"{name} generated, expected {expected_name}"
    def test_make_filename():
        path = Path('reanalysis-era5-single-levels-monthly-means'
                    '/2m_temperature/1979_2019/01_12.nc')

        name = ERA5MonthlyMeanPreprocessor.create_filename(path, 'kenya')
        expected_name = '1979_2019_01_12_2m_temperature_kenya.nc'
        assert name == expected_name, f'{name} generated, expected {expected_name}'
Exemple #3
0
def preprocess_era5():
    data_path = get_data_path()

    regrid_path = data_path / "interim/VCI_preprocessed/data_kenya.nc"
    assert regrid_path.exists(), f"{regrid_path} not available"

    processor = ERA5MonthlyMeanPreprocessor(data_path)
    processor.preprocess(subset_str="kenya", regrid=regrid_path)
    def test_init(self, tmp_path):

        ERA5MonthlyMeanPreprocessor(tmp_path)

        assert (tmp_path / 'interim/reanalysis-era5-single-levels-'
                           'monthly-means_preprocessed').exists()
        assert (tmp_path / 'interim/reanalysis-era5-single-levels-'
                           'monthly-means_interim').exists()
Exemple #5
0
def preprocess_era5(subset_str: str = "kenya"):
    data_path = get_data_path()

    # regrid_path = data_path / f"interim/reanalysis-era5-land_preprocessed/data_{subset_str}.nc"
    # assert regrid_path.exists(), f"{regrid_path} not available"
    regrid_path = None

    processor = ERA5MonthlyMeanPreprocessor(data_path)
    processor.preprocess(subset_str=subset_str, regrid=regrid_path)
def preprocess_era5():
    if Path('.').absolute().as_posix().split('/')[-1] == 'ml_drought':
        data_path = Path('data')
    else:
        data_path = Path('../data')
    regrid_path = data_path / 'interim/VCI_preprocessed/data_kenya.nc'
    assert regrid_path.exists(), f'{regrid_path} not available'

    processor = ERA5MonthlyMeanPreprocessor(data_path)
    processor.preprocess(subset_str='kenya', regrid=regrid_path)
Exemple #7
0
    def test_preprocess(self, tmp_path):

        (tmp_path / "raw/reanalysis-era5-single-levels-monthly-means/"
         "2m_temperature/1979_2019").mkdir(parents=True)
        data_path = (tmp_path /
                     "raw/reanalysis-era5-single-levels-monthly-means/"
                     "2m_temperature/1979_2019/01_12.nc")
        dataset = self._make_era5_dataset(size=(100, 100))
        dataset.to_netcdf(path=data_path)

        kenya = get_kenya()
        regrid_dataset, _, _ = _make_dataset(
            size=(20, 20),
            latmin=kenya.latmin,
            latmax=kenya.latmax,
            lonmin=kenya.lonmin,
            lonmax=kenya.lonmax,
        )

        regrid_path = tmp_path / "regridder.nc"
        regrid_dataset.to_netcdf(regrid_path)

        processor = ERA5MonthlyMeanPreprocessor(tmp_path)
        processor.preprocess(subset_str="kenya",
                             regrid=regrid_path,
                             parallel=False)

        expected_out_path = (tmp_path /
                             "interim/reanalysis-era5-single-levels-monthly-"
                             "means_preprocessed/data_kenya.nc")
        assert (expected_out_path.exists(
        )), f"Expected processed file to be saved to {expected_out_path}"

        # check the subsetting happened correctly
        out_data = xr.open_dataset(expected_out_path)
        expected_dims = ["lat", "lon", "time"]
        assert len(list(out_data.dims)) == len(expected_dims)
        for dim in expected_dims:
            assert dim in list(
                out_data.dims
            ), f"Expected {dim} to be in the processed dataset dims"

        lons = out_data.lon.values
        assert (lons.min() >= kenya.lonmin) and (
            lons.max() <= kenya.lonmax), "Longitudes not correctly subset"

        lats = out_data.lat.values
        assert (lats.min() >= kenya.latmin) and (
            lats.max() <= kenya.latmax), "Latitudes not correctly subset"

        assert out_data.t2m.values.shape[1:] == (20, 20)

        assert (not processor.interim.exists()
                ), f"Interim era5 folder should have been deleted"
    def test_get_filenames(tmp_path):

        (tmp_path / 'raw/reanalysis-era5-single-levels-monthly-means/'
                    '2m_temperature/1979_2019').mkdir(parents=True)

        test_file = tmp_path / 'raw/reanalysis-era5-single-levels-' \
                               'monthly-means/2m_temperature/1979_2019.01_12.nc'
        test_file.touch()

        processor = ERA5MonthlyMeanPreprocessor(tmp_path)

        files = processor.get_filepaths()
        assert files[0] == test_file, f'Expected {test_file} to be retrieved'
Exemple #9
0
    def test_make_filename(tmp_path, granularity):
        if granularity == "monthly":
            basename = "reanalysis-era5-single-levels-monthly-means"
            processor = ERA5MonthlyMeanPreprocessor(tmp_path)
        elif granularity == "hourly":
            basename = "reanalysis-era5-single-levels"
            processor = ERA5HourlyPreprocessor(tmp_path)

        path = Path(basename + "/2m_temperature/1979_2019/01_12.nc")

        name = processor.create_filename(path, "kenya")
        expected_name = "1979_2019_01_12_2m_temperature_kenya.nc"
        assert name == expected_name, f"{name} generated, expected {expected_name}"
Exemple #10
0
    def test_get_filenames(tmp_path, granularity):

        if granularity == "monthly":
            basename = "reanalysis-era5-single-levels-monthly-means"
            processor = ERA5MonthlyMeanPreprocessor(tmp_path)
        elif granularity == "hourly":
            basename = "reanalysis-era5-single-levels"
            processor = ERA5HourlyPreprocessor(tmp_path)
        (tmp_path / f"raw/{basename}/" "2m_temperature/1979_2019").mkdir(parents=True)

        test_file = tmp_path / f"raw/{basename}/" "/2m_temperature/1979_2019.01_12.nc"
        test_file.touch()

        files = processor.get_filepaths()
        assert files[0] == test_file, f"Expected {test_file} to be retrieved"
Exemple #11
0
data_dir = Path('/soge-home/projects/crop_yield/ml_drought/data/')
base_out_dir = Path('/soge-home/projects/crop_yield/hackathon/')
final_out_dir = Path('/soge-home/projects/crop_yield/hackathon/v_wind')
v_wind_dir = Path(
    '/soge-home/data/analysis/era5/0.28125x0.28125/hourly/v_component_of_wind/nc'
)

# make directories
out_dir = base_out_dir / 'africa' / 'v_component_of_wind_hourly'
if not out_dir.exists():
    out_dir.mkdir(exist_ok=True, parents=True)
if not final_out_dir.exists():
    final_out_dir.mkdir(exist_ok=True, parents=True)

processor = ERA5MonthlyMeanPreprocessor(data_dir)

# # SUBSET AFRICA from hourly files
# nc_files = [f for f in v_wind_dir.glob('*.nc')]

# for netcdf_filepath in nc_files:
#     ds = xr.open_dataset(netcdf_filepath).rename(
#         {'longitude': 'lon', 'latitude': 'lat'}
#     )
#     ds = processor.chop_roi(ds, subset_str='africa', inverse_lat=True)
#     ds.to_netcdf(out_dir / file.name)
#     print(f'Done for {file.name}')

# JOIN ALL FILES AND MAKE DAILY
ds = xr.open_mfdataset(str(out_dir / '*.nc'), chunks={'time': 1})
ds = processor.resample_time(ds, resample_length='D', upsampling=False)