def test_init_hourly(self, tmp_path): ERA5HourlyPreprocessor(tmp_path) assert ( tmp_path / "interim/reanalysis-era5-single-levels_preprocessed" ).exists() assert (tmp_path / "interim/reanalysis-era5-single-levels_interim").exists()
def preprocess_era5_hourly(): data_path = get_data_path() regrid_path = data_path / "interim/VCI_preprocessed/data_kenya.nc" assert regrid_path.exists(), f"{regrid_path} not available" processor = ERA5HourlyPreprocessor(data_path) # W-MON is weekly each monday (the same as the NDVI data from Atzberger) processor.preprocess(subset_str="kenya", resample_time="W-MON")
def preprocess_era5_hourly(subset_str: str = "kenya"): data_path = get_data_path() regrid_path = ( data_path / f"interim/reanalysis-era5-land_preprocessed/data_{subset_str}.nc") assert regrid_path.exists(), f"{regrid_path} not available" processor = ERA5HourlyPreprocessor(data_path) # W-MON is weekly each monday (the same as the NDVI data from Atzberger) processor.preprocess(subset_str=subset_str, resample_time="W-MON")
def test_make_filename(tmp_path, granularity): if granularity == "monthly": basename = "reanalysis-era5-single-levels-monthly-means" processor = ERA5MonthlyMeanPreprocessor(tmp_path) elif granularity == "hourly": basename = "reanalysis-era5-single-levels" processor = ERA5HourlyPreprocessor(tmp_path) path = Path(basename + "/2m_temperature/1979_2019/01_12.nc") name = processor.create_filename(path, "kenya") expected_name = "1979_2019_01_12_2m_temperature_kenya.nc" assert name == expected_name, f"{name} generated, expected {expected_name}"
def test_get_filenames(tmp_path, granularity): if granularity == "monthly": basename = "reanalysis-era5-single-levels-monthly-means" processor = ERA5MonthlyMeanPreprocessor(tmp_path) elif granularity == "hourly": basename = "reanalysis-era5-single-levels" processor = ERA5HourlyPreprocessor(tmp_path) (tmp_path / f"raw/{basename}/" "2m_temperature/1979_2019").mkdir(parents=True) test_file = tmp_path / f"raw/{basename}/" "/2m_temperature/1979_2019.01_12.nc" test_file.touch() files = processor.get_filepaths() assert files[0] == test_file, f"Expected {test_file} to be retrieved"
def test_preprocess(self, tmp_path, granularity): if granularity == "monthly": basename = "reanalysis-era5-single-levels-monthly-means" processor = ERA5MonthlyMeanPreprocessor(tmp_path) elif granularity == "hourly": basename = "reanalysis-era5-single-levels" processor = ERA5HourlyPreprocessor(tmp_path) (tmp_path / f"raw/{basename}/" "2m_temperature/1979_2019").mkdir(parents=True) data_path = tmp_path / f"raw/{basename}/" "2m_temperature/1979_2019/01_12.nc" if granularity == "hourly": dataset = self._make_era5_dataset(size=(100, 100), monthly=False) else: dataset = self._make_era5_dataset(size=(100, 100), monthly=True) dataset.to_netcdf(path=data_path) kenya = get_kenya() regrid_dataset, _, _ = _make_dataset( size=(20, 20), latmin=kenya.latmin, latmax=kenya.latmax, lonmin=kenya.lonmin, lonmax=kenya.lonmax, ) regrid_path = tmp_path / "regridder.nc" regrid_dataset.to_netcdf(regrid_path) processor.preprocess(subset_str="kenya", regrid=regrid_path, parallel=False) expected_out_path = ( tmp_path / f"interim/{basename}" "_preprocessed/data_kenya.nc" ) assert ( expected_out_path.exists() ), f"Expected processed file to be saved to {expected_out_path}" # check the subsetting happened correctly out_data = xr.open_dataset(expected_out_path) expected_dims = ["lat", "lon", "time"] assert len(list(out_data.dims)) == len(expected_dims) for dim in expected_dims: assert dim in list( out_data.dims ), f"Expected {dim} to be in the processed dataset dims" lons = out_data.lon.values assert (lons.min() >= kenya.lonmin) and ( lons.max() <= kenya.lonmax ), "Longitudes not correctly subset" lats = out_data.lat.values assert (lats.min() >= kenya.latmin) and ( lats.max() <= kenya.latmax ), "Latitudes not correctly subset" assert out_data.t2m.values.shape[1:] == (20, 20) assert ( not processor.interim.exists() ), f"Interim era5 folder should have been deleted"