def count_mappings_for_regions(reference_da: xr.DataArray, comparison_da: xr.DataArray, regions: List[Region]) -> pd.DataFrame: all_df = [] variable = reference_da.name for region in regions: # add error catching in case need to invert latlon try: region_reference_da = select_bounding_box( reference_da.to_dataset(), region)[variable] except AssertionError: region_reference_da = select_bounding_box( reference_da.to_dataset(), region, inverse_lat=True)[variable] try: region_comparison_da = select_bounding_box( comparison_da.to_dataset(), region)[variable] except AssertionError: region_comparison_da = select_bounding_box( comparison_da.to_dataset(), region, inverse_lat=True)[variable] # count the pixels in each group d = convert_counts_dict_to_dataframe( count_matching_pixels(region_reference_da, region_comparison_da)) d["region"] = [region.name for _ in range(len(d))] all_df.append(d) all_df = pd.concat(all_df) return all_df
def test_select_bounding_box_inversed(self): """Test that inversion works correctly """ size = (100, 100) ds, (lonmin, lonmax), (latmin, latmax) = _make_dataset(size) global_region = Region(name='global', lonmin=lonmax, lonmax=lonmin, latmin=latmin, latmax=latmax) subset = select_bounding_box(ds, global_region, inverse_lon=True) # add the time dimension assert subset.VHI.values.shape[1:] == size, \ f'Expected output subset to have size {size}, got {subset.VHI.values.shape[1:]}'
def test_selection(self): size = (100, 100) ds, (lonmin, lonmax), (latmin, latmax) = _make_dataset(size) mid = lonmin + ((lonmax - lonmin) / 2) half_region = Region(name='half', lonmin=lonmin, lonmax=mid, latmin=latmin, latmax=latmax) subset = select_bounding_box(ds, half_region) assert subset.VHI.values.shape[1:] == (100, 50), \ f'Expected output subset to have size (50, 100), got {subset.VHI.values.shape}' assert max(subset.lon.values) < 0, \ f'Got a longitude greater than 0, {max(subset.lon.values)}'
name='victoria', lonmin=cluster_ds.isel(lon=0).lon.values, lonmax=cluster_ds.isel(lon=7).lon.values, latmin=cluster_ds.isel(lat=-28).lat.values, latmax=cluster_ds.isel(lat=-18).lat.values, ) turkana = Region( name='turkana', lonmin=cluster_ds.isel(lon=5).lon.values, lonmax=cluster_ds.isel(lon=16).lon.values, latmin=cluster_ds.isel(lat=-16).lat.values, latmax=cluster_ds.isel(lat=-6).lat.values, ) southern_highlands = Region( name='southern_highlands', lonmin=cluster_ds.isel(lon=3).lon.values, lonmax=cluster_ds.isel(lon=13).lon.values, latmin=cluster_ds.isel(lat=-41).lat.values, latmax=cluster_ds.isel(lat=-31).lat.values, ) coastal = Region( name='coastal', lonmin=cluster_ds.isel(lon=15).lon.values, lonmax=cluster_ds.isel(lon=20).lon.values, latmin=cluster_ds.isel(lat=-44).lat.values, latmax=cluster_ds.isel(lat=-34).lat.values, ) fig, ax = plt.subplots() select_bounding_box(cluster_ds, southern_highlands).cluster_5.isel(time=0).plot()
"""!ls -U glofas_africa| head -4""" """NOTE: https://github.com/esowc/ml_drought for the `src` code""" from pathlib import Path import xarray as xr import pprint from src.preprocess.utils import select_bounding_box from src.utils import region_lookup region = region_lookup['africa'] base_data_dir = Path("/lustre/soge1/projects/crop_yield/hackathon") data_dir = base_data_dir / "glofas_africa" out_file = base_data_dir / "glofas/glofas_africa.nc" in_files = [f for f in data_dir.glob('*.nc')] # ds = xr.open_mfdataset( # (data_dir / '*.nc').as_posix(), chunks={'time': 1} # ) # ds.rename({'latitude': 'lat', 'longitude': 'lon'}) select_bounding_box()