def test_get_average(self): """Test averaging bucket resampling.""" data = da.from_array(np.array([[2., 4.], [2., 2.]])) # Without pre-calculated indices with dask.config.set(scheduler=CustomScheduler(max_computes=0)): result = self.resampler.get_average(data) result = result.compute() self.assertEqual(np.nanmax(result), 3.) self.assertTrue(np.any(np.isnan(result))) # Use a fill value other than np.nan with dask.config.set(scheduler=CustomScheduler(max_computes=0)): result = self.resampler.get_average(data, fill_value=-1) result = result.compute() self.assertEqual(np.max(result), 3.) self.assertEqual(np.min(result), -1) self.assertFalse(np.any(np.isnan(result))) # Test masking all-NaN bins data = da.from_array(np.array([[np.nan, np.nan], [np.nan, np.nan]])) with dask.config.set(scheduler=CustomScheduler(max_computes=0)): result = self.resampler.get_average(data, mask_all_nan=True) self.assertTrue(np.all(np.isnan(result))) # By default all-NaN bins have a value of 0.0 with dask.config.set(scheduler=CustomScheduler(max_computes=0)): result = self.resampler.get_average(data) self.assertEqual(np.nanmax(result), 0.0)
def test_get_bucket_indices(self): """Test calculation of array indices.""" # Ensure nothing is calculated with dask.config.set(scheduler=CustomScheduler(max_computes=0)): self.resampler._get_indices() x_idxs, y_idxs = da.compute(self.resampler.x_idxs, self.resampler.y_idxs) np.testing.assert_equal(x_idxs, np.array([1710, 1710, 1707, 1705])) np.testing.assert_equal(y_idxs, np.array([465, 465, 459, 455])) # Additional small test case adef = create_area_def(area_id='test', projection={'proj': 'latlong'}, width=2, height=2, center=(0, 0), resolution=10) lons = da.from_array(np.array( [-10.0, -9.9, -0.1, 0, 0.1, 9.9, 10.0, -10.1, 0]), chunks=2) lats = da.from_array(np.array( [-10.0, -9.9, -0.1, 0, 0.1, 9.9, 10.0, 0, 10.1]), chunks=2) resampler = bucket.BucketResampler(source_lats=lats, source_lons=lons, target_area=adef) resampler._get_indices() np.testing.assert_equal(resampler.x_idxs, np.array([-1, 0, 0, 1, 1, 1, -1, -1, -1])) np.testing.assert_equal(resampler.y_idxs, np.array([-1, 1, 1, 1, 0, 0, -1, -1, -1]))
def test_get_bucket_indices(self): """Test calculation of array indices.""" # Ensure nothing is calculated with dask.config.set(scheduler=CustomScheduler(max_computes=0)): self.resampler._get_indices() x_idxs, y_idxs = da.compute(self.resampler.x_idxs, self.resampler.y_idxs) self.assertTrue(np.all(x_idxs == np.array([1709, 1709, 1706, 1705]))) self.assertTrue(np.all(y_idxs == np.array([465, 465, 458, 455])))
def test_get_count(self): """Test drop-in-a-bucket sum.""" with dask.config.set(scheduler=CustomScheduler(max_computes=0)): result = self.resampler.get_count() result = result.compute() self.assertTrue(np.max(result) == 2) self.assertEqual(np.sum(result == 1), 2) self.assertEqual(np.sum(result == 2), 1) self.assertTrue(self.resampler.counts is not None)
def test_get_sum(self): """Test drop-in-a-bucket sum.""" data = da.from_array(np.array([[2., 2.], [2., 2.]]), chunks=self.chunks) with dask.config.set(scheduler=CustomScheduler(max_computes=0)): result = self.resampler.get_sum(data) result = result.compute() # One bin with two hits, so max value is 2.0 self.assertTrue(np.max(result) == 4.) # Two bins with the same value self.assertEqual(np.sum(result == 2.), 2) # One bin with double the value self.assertEqual(np.sum(result == 4.), 1) self.assertEqual(result.shape, self.adef.shape) # Test that also Xarray.DataArrays work data = xr.DataArray(data) with dask.config.set(scheduler=CustomScheduler(max_computes=0)): result = self.resampler.get_sum(data) # One bin with two hits, so max value is 2.0 self.assertTrue(np.max(result) == 4.) # Two bins with the same value self.assertEqual(np.sum(result == 2.), 2) # One bin with double the value self.assertEqual(np.sum(result == 4.), 1) self.assertEqual(result.shape, self.adef.shape) # Test masking all-NaN bins data = da.from_array(np.array([[np.nan, np.nan], [np.nan, np.nan]]), chunks=self.chunks) with dask.config.set(scheduler=CustomScheduler(max_computes=0)): result = self.resampler.get_sum(data, mask_all_nan=True) self.assertTrue(np.all(np.isnan(result))) # By default all-NaN bins have a value of 0.0 with dask.config.set(scheduler=CustomScheduler(max_computes=0)): result = self.resampler.get_sum(data) self.assertEqual(np.nanmax(result), 0.0)
def test_resample_bucket_fractions(self): """Test fraction calculations for categorical data.""" data = da.from_array(np.array([[2, 4], [2, 2]])) categories = [1, 2, 3, 4] with dask.config.set(scheduler=CustomScheduler(max_computes=0)): result = self.resampler.get_fractions(data, categories=categories) self.assertEqual(set(categories), set(result.keys())) res = result[1].compute() self.assertTrue(np.nanmax(res) == 0.) res = result[2].compute() self.assertTrue(np.nanmax(res) == 1.) self.assertTrue(np.nanmin(res) == 0.5) res = result[3].compute() self.assertTrue(np.nanmax(res) == 0.) res = result[4].compute() self.assertTrue(np.nanmax(res) == 0.5) self.assertTrue(np.nanmin(res) == 0.) # There should be NaN values self.assertTrue(np.any(np.isnan(res))) # Use a fill value with dask.config.set(scheduler=CustomScheduler(max_computes=0)): result = self.resampler.get_fractions(data, categories=categories, fill_value=-1) # There should not be any NaN values for i in categories: res = result[i].compute() self.assertFalse(np.any(np.isnan(res))) self.assertTrue(np.min(res) == -1) # No categories given, need to compute the data once to get # the categories with dask.config.set(scheduler=CustomScheduler(max_computes=1)): result = self.resampler.get_fractions(data, categories=None)
def _get_average_result(self, data, **kwargs): """Compute the bucket average with kwargs and check that no dask computation is performed.""" with dask.config.set(scheduler=CustomScheduler(max_computes=0)): result = self.resampler.get_average(data, **kwargs) return result.compute()