def test_alternative_region(self, tmp_path): # make the dataset (tmp_path / "raw/chirps/global").mkdir(parents=True) data_path = tmp_path / "raw/chirps/global/testy_test.nc" dataset = self._make_chirps_dataset(size=(100, 100)) dataset.to_netcdf(path=data_path) ethiopia = get_ethiopia() # regrid the datasets regrid_dataset, _, _ = _make_dataset( size=(20, 20), latmin=ethiopia.latmin, latmax=ethiopia.latmax, lonmin=ethiopia.lonmin, lonmax=ethiopia.lonmax, ) regrid_path = tmp_path / "regridder.nc" regrid_dataset.to_netcdf(regrid_path) # build the Preprocessor object and subset with a different subset_str processor = CHIRPSPreprocessor(tmp_path) processor.preprocess(subset_str="ethiopia", regrid=regrid_path, parallel=False) expected_out_path = tmp_path / "interim/chirps_preprocessed/data_ethiopia.nc" assert (expected_out_path.exists( )), f"Expected processed file to be saved to {expected_out_path}"
def process_precip_2018(): data_path = get_data_path() regrid_path = data_path / "interim/VCI_preprocessed/data_kenya.nc" assert regrid_path.exists(), f"{regrid_path} not available" processor = CHIRPSPreprocessor(data_path) processor.preprocess(subset_str="kenya", regrid=regrid_path, parallel=False)
def test_get_filenames(tmp_path): (tmp_path / "raw" / "chirps").mkdir(parents=True) test_file = tmp_path / "raw/chirps/testy_test.nc" test_file.touch() processor = CHIRPSPreprocessor(tmp_path) files = processor.get_filepaths() assert files[0] == test_file, f"Expected {test_file} to be retrieved"
def process_precip_2018(subset_str: str = "kenya"): data_path = get_data_path() regrid_path = ( data_path / f"interim/reanalysis-era5-land_preprocessed/data_{subset_str}.nc") assert regrid_path.exists(), f"{regrid_path} not available" processor = CHIRPSPreprocessor(data_path) processor.preprocess(subset_str=subset_str, regrid=regrid_path, parallel=False)
def test_preprocess(self, tmp_path): (tmp_path / "raw/chirps/global").mkdir(parents=True) data_path = tmp_path / "raw/chirps/global/testy_test.nc" dataset = self._make_chirps_dataset(size=(100, 100)) dataset.to_netcdf(path=data_path) kenya = get_kenya() regrid_dataset, _, _ = _make_dataset( size=(20, 20), latmin=kenya.latmin, latmax=kenya.latmax, lonmin=kenya.lonmin, lonmax=kenya.lonmax, ) regrid_path = tmp_path / "regridder.nc" regrid_dataset.to_netcdf(regrid_path) processor = CHIRPSPreprocessor(tmp_path) processor.preprocess(subset_str="kenya", regrid=regrid_path, parallel=False) expected_out_path = tmp_path / "interim/chirps_preprocessed/data_kenya.nc" assert (expected_out_path.exists( )), f"Expected processed file to be saved to {expected_out_path}" # check the subsetting happened correctly out_data = xr.open_dataset(expected_out_path) expected_dims = ["lat", "lon", "time"] assert len(list(out_data.dims)) == len(expected_dims) for dim in expected_dims: assert dim in list( out_data.dims ), f"Expected {dim} to be in the processed dataset dims" lons = out_data.lon.values assert (lons.min() >= kenya.lonmin) and ( lons.max() <= kenya.lonmax), "Longitudes not correctly subset" lats = out_data.lat.values assert (lats.min() >= kenya.latmin) and ( lats.max() <= kenya.latmax), "Latitudes not correctly subset" assert out_data.VHI.values.shape[1:] == (20, 20) assert (not processor.interim.exists() ), f"Interim chirps folder should have been deleted"
def test_make_filename(): test_file = "testy_test.nc" expected_output = "testy_test_kenya.nc" filename = CHIRPSPreprocessor.create_filename(test_file, "kenya") assert (filename == expected_output ), f"Expected output to be {expected_output}, got {filename}"
def test_directories_created(tmp_path): v = CHIRPSPreprocessor(tmp_path) assert ( tmp_path / v.preprocessed_folder / "chirps_preprocessed" ).exists(), ( "Should have created a directory tmp_path/interim/chirps_preprocessed" ) assert (tmp_path / v.preprocessed_folder / "chirps_interim").exists( ), "Should have created a directory tmp_path/interim/chirps_interim"
def preprocess_data(data_path): # preprocess VHI print("** Preprocessing VHI **") processor = VHIPreprocessor(data_path) processor.preprocess( subset_str="kenya", regrid=regrid_path, n_parallel_processes=1, resample_time="M", upsampling=False, ) regrid_path = data_path / "interim" / "vhi_preprocessed" / "vhi_kenya.nc" # preprocess CHIRPS Rainfall print("** Preprocessing CHIRPS Precipitation **") processor = CHIRPSPreprocessor(data_path) processor.preprocess(subset_str="kenya", regrid=regrid_path, n_parallel_processes=1) # preprocess GLEAM evaporation print("** Preprocessing GLEAM Evaporation **") processor = GLEAMPreprocessor(data_path) processor.preprocess(subset_str="kenya", regrid=regrid_path, resample_time="M", upsampling=False) # preprocess SRTM Topography print("** Preprocessing SRTM Topography **") processor = SRTMPreprocessor(data_path) processor.preprocess(subset_str="kenya", regrid=regrid_path) # preprocess ESA CCI Landcover print("** Preprocessing ESA CCI Landcover **") processor = ESACCIPreprocessor(data_path) processor.preprocess(subset_str="kenya", regrid=regrid_path, resample_time="M", upsampling=False)
# ml_drought_dir = Path('/users/tommylees/github/ml_drought/data') ds = xr.open_dataset(data_dir / 'topo.nc') # ds['nlon'] = ds.latitude # ds['nlat'] = ds.longitude # ds = ds.rename({'nlon': 'lon', 'nlat': 'lat'}) # topo = ds.elevation # topo.to_netcdf(data_dir / 'topo_clean.nc') regrid_path = data_dir / 'temp/temp_doy.nc' # regrid_path = data_dir / 'precip/precip_africa.nc' assert regrid_path.exists() print("Reading in topo data for regridding") processor = CHIRPSPreprocessor() ds = xr.open_dataset(out_dir / 'topo.nc') regrid_da = processor.load_reference_grid(regrid_path) print("Chop Africa") # AFRICA bounding box lonmin = -31.6 lonmax = 51.8 latmin = -35.8 latmax = 37.2 inverse_lat = inverse_lon = False # processor.chop_roi( # ds=ds, # subset_str='africa',