def test_aggregate_with_some_nans(): ds = xr.Dataset( { 'test': (['lon', 'lat'], np.array([[np.nan, 1], [2, np.nan]])), 'lat_bnds': (['lat', 'bnds'], np.array([[-0.5, 0.5], [0.5, 1.5]])), 'lon_bnds': (['lon', 'bnds'], np.array([[-0.5, 0.5], [0.5, 1.5]])) }, coords={ 'lat': (['lat'], np.array([0, 1])), 'lon': (['lon'], np.array([0, 1])), 'bnds': (['bnds'], np.array([0, 1])) }) # get aggregation mapping pix_agg = create_raster_polygons(ds) # Create polygon covering multiple pixels gdf = { 'name': ['test'], 'geometry': [Polygon([(0, 0), (0, 1), (1, 1), (1, 0), (0, 0)])] } gdf = gpd.GeoDataFrame(gdf, crs="EPSG:4326") # Get pixel overlaps wm = get_pixel_overlaps(gdf, pix_agg) # Get aggregate agg = aggregate(ds, wm) # Should be 1.5; with one pixel valued 1, one pixel valued 2. assert np.allclose([agg.agg.test[0]], 1.5, rtol=1e-4)
def test_get_pixel_overlaps_multiple_pixels_partial(pix_agg=pix_agg): # Create polygon covering multiple pixels gdf_test = { 'name': ['test'], 'geometry': [Polygon([(0, 0), (0, 1), (1, 1), (1, 0), (0, 0)])] } gdf_test = gpd.GeoDataFrame(gdf_test, crs="EPSG:4326") # Get pixel overlaps; store output as pandas dataframe (no # more geometry info relevant here) wm_out = get_pixel_overlaps(gdf_test, pix_agg) df0 = pd.DataFrame(wm_out.agg) # Define what the output should be # (rel_area isn't 0.25, 0.25, 0.25, 0.25 because of slight # latitude differences) df_compare = pd.DataFrame({ 'name': ['test'], 'poly_idx': 0, 'rel_area': [[[0.250009, 0.250009, 0.249991, 0.249991]]], 'pix_idxs': [[0, 1, 2, 3]], 'coords': [[(0, 0), (0, 1), (1, 0), (1, 1)]] }) # Since the elements of some of these data frame columns are lists, # pd.testing.assert_frame_equal() fails on a ValueError ("the truth value # of a Series is ambiguous..."). This is the current way around it; but # it's not very robust... Maybe this should result in a rethink of how # the geodataframe is organized in the future? assert np.allclose([v for v in df0.rel_area], [v for v in df_compare.rel_area]) assert np.allclose([v for v in df0.pix_idxs], [v for v in df_compare.pix_idxs]) assert np.allclose([v for v in df0.coords], [v for v in df_compare.coords])
def test_aggregate_with_all_nans(): ds = xr.Dataset( { 'test': (['lon', 'lat'], np.array([[np.nan, np.nan], [np.nan, np.nan]])), 'lat_bnds': (['lat', 'bnds'], np.array([[-0.5, 0.5], [0.5, 1.5]])), 'lon_bnds': (['lon', 'bnds'], np.array([[-0.5, 0.5], [0.5, 1.5]])) }, coords={ 'lat': (['lat'], np.array([0, 1])), 'lon': (['lon'], np.array([0, 1])), 'bnds': (['bnds'], np.array([0, 1])) }) # get aggregation mapping pix_agg = create_raster_polygons(ds) # Create polygon covering multiple pixels gdf = { 'name': ['test'], 'geometry': [Polygon([(0, 0), (0, 1), (1, 1), (1, 0), (0, 0)])] } gdf = gpd.GeoDataFrame(gdf, crs="EPSG:4326") # Get pixel overlaps wm = get_pixel_overlaps(gdf, pix_agg) # Get aggregate agg = aggregate(ds, wm) # Should only return nan # (this is not a great assert - but agg.agg.test[0] comes out as [array(nan)], # which... I'm not entirely sure how to reproduce. It quaks like a single nan, # but it's unclear to me how to get it to work) assert np.all([np.isnan(k) for k in agg.agg.test])
def test_aggregate_with_weights(ds=ds): # Create polygon covering multiple pixels gdf = { 'name': ['test'], 'geometry': [Polygon([(0, 0), (0, 1), (1, 1), (1, 0), (0, 0)])] } gdf = gpd.GeoDataFrame(gdf, crs="EPSG:4326") # add a simple weights grid (equator pixels have weight 1, # 1 N pixels have weight 0) weights = xr.DataArray(data=np.array([[1, 1], [0, 0]]), dims=['lat', 'lon'], coords=[ds.lat, ds.lon]) # calculate the pix_agg variable tested above, to be used in the # tests below pix_agg = create_raster_polygons(ds, weights=weights) # Get pixel overlaps wm = get_pixel_overlaps(gdf, pix_agg) # Get aggregate agg = aggregate(ds, wm) # Since the "test" for the input ds has [0,2] for the two # equatorial pixels, the average should just be 1.0 assert np.allclose([v for v in agg.agg.test.values], 1.0)
def test_get_pixel_overlaps_fraction_of_pixel(pix_agg=pix_agg): # Create polygon covering less than one pixel gdf_test = { 'name': ['test'], 'geometry': [Polygon([(-0.5, -0.5), (-0.5, 0), (0, 0), (0, -0.5), (-0.5, -0.5)])] } gdf_test = gpd.GeoDataFrame(gdf_test, crs="EPSG:4326") # Get pixel overlaps; store output as pandas dataframe (no # more geometry info relevant here) wm_out = get_pixel_overlaps(gdf_test, pix_agg) df0 = pd.DataFrame(wm_out.agg) # Define what the output should be df_compare = pd.DataFrame({ 'name': ['test'], 'poly_idx': 0, 'rel_area': [[[1.0]]], 'pix_idxs': [[0]], 'coords': [[(0, 0)]] }) # Since the elements of some of these data frame columns are lists, # pd.testing.assert_frame_equal() fails on a ValueError ("the truth value # of a Series is ambiguous..."). This is the current way around it; but # it's not very robust... Maybe this should result in a rethink of how # the geodataframe is organized in the future? assert np.allclose([v for v in df0.rel_area], [v for v in df_compare.rel_area]) assert np.allclose([v for v in df0.pix_idxs], [v for v in df_compare.pix_idxs]) assert np.allclose([v for v in df0.coords], [v for v in df_compare.coords])
def test_get_pixel_overlaps_passthru_weights(pix_agg=pix_agg): # Create polygon covering multiple pixels gdf_test = { 'name': ['test'], 'geometry': [Polygon([(0, 0), (0, 1), (1, 1), (1, 0), (0, 0)])] } gdf_test = gpd.GeoDataFrame(gdf_test, crs="EPSG:4326") # Get pixel overlaps; store output as pandas dataframe (no # more geometry info relevant here) wm_out = get_pixel_overlaps(gdf_test, pix_agg) pd.testing.assert_series_equal(wm_out.weights, pix_agg['gdf_pixels'].weights)
def test_get_pixel_overlaps_passthru_source_grid(pix_agg=pix_agg): # Create polygon covering multiple pixels gdf_test = { 'name': ['test'], 'geometry': [Polygon([(0, 0), (0, 1), (1, 1), (1, 0), (0, 0)])] } gdf_test = gpd.GeoDataFrame(gdf_test, crs="EPSG:4326") # Get pixel overlaps; store output as pandas dataframe (no # more geometry info relevant here) wm_out = get_pixel_overlaps(gdf_test, pix_agg) xr.testing.assert_equal(wm_out.source_grid['lat'], pix_agg['source_grid']['lat']) xr.testing.assert_equal(wm_out.source_grid['lon'], pix_agg['source_grid']['lon'])
def test_get_pixel_overlaps_gdf_wpreexisting_index(pix_agg=pix_agg): # Test to make sure it works with pre-existing indices in the gdf # Create polygon covering multiple pixels gdf_test = { 'name': ['test'], 'geometry': [Polygon([(0, 0), (0, 1), (1, 1), (1, 0), (0, 0)])] } gdf_test = gpd.GeoDataFrame(gdf_test, crs="EPSG:4326", index=np.arange(10, 11)) # Get pixel overlaps wm_out = get_pixel_overlaps(gdf_test, pix_agg) # The index error for an incorrectly-indexed gdf is thrown in aggregate() agg = aggregate(ds, wm_out) # this assert uses 2.1666 because of the weighting that creates # the pix_agg variable that this whole section has used. Doesn't really # matter, since this is testing an index error that would've # happened during aggregate() above. assert np.allclose([v for v in agg.agg.test.values], 2.1666, rtol=1e-4)
def test_aggregate_basic_wdotproduct(ds=ds): # Create polygon covering multiple pixels, using the dot product option gdf = { 'name': ['test'], 'geometry': [Polygon([(0, 0), (0, 1), (1, 1), (1, 0), (0, 0)])] } gdf = gpd.GeoDataFrame(gdf, crs="EPSG:4326") # calculate the pix_agg variable tested above, to be used in the # tests below pix_agg = create_raster_polygons(ds) # Get pixel overlaps wm = get_pixel_overlaps(gdf, pix_agg, impl='dot_product') # Get aggregate agg = aggregate(ds, wm, impl='dot_product') # This requires shifting rtol to 1e-4 for some reason, in that # it's actually 1.499981, whereas multiplying out # np.sum(agg.agg.rel_area[0]*np.array([0,1,2,3]))gives 1.499963... # Possibly worth examining more closely later assert np.allclose([v for v in agg.agg.test.values], 1.4999, rtol=1e-4)
def test_aggregate_with_mismatched_grid(): # This is to see if the subset_find call works ds = xr.Dataset( { 'test': (['lon', 'lat'], np.array([[30, 40, 50], [10, 0, 1], [20, 2, 3]])), 'lat_bnds': (['lat', 'bnds'], np.array([[-1.5, -0.5], [-0.5, 0.5], [0.5, 1.5]])), 'lon_bnds': (['lon', 'bnds' ], np.array([[-1.5, -0.5], [-0.5, 0.5], [0.5, 1.5]])) }, coords={ 'lat': (['lat'], np.array([-1, 0, 1])), 'lon': (['lon'], np.array([-1, 0, 1])), 'bnds': (['bnds'], np.array([0, 1])) }) # Create polygon covering multiple pixels gdf = { 'name': ['test'], 'geometry': [Polygon([(0, 0), (0, 1), (1, 1), (1, 0), (0, 0)])] } gdf = gpd.GeoDataFrame(gdf, crs="EPSG:4326") # calculate the pix_agg variable tested above, to be used in the # tests below pix_agg = create_raster_polygons(ds) # Get pixel overlaps wm = get_pixel_overlaps(gdf, pix_agg) # Get aggregate agg = aggregate(ds, wm) # On change in rtol, see note in test_aggregate_basic assert np.allclose([v for v in agg.agg.test.values], 1.4999, rtol=1e-4)