def test_nearest_area_2d_to_area_1n_3d_data(self): """Test 2D area definition to 2D area definition; 1 neighbor, 3d data.""" from pyresample.kd_tree import XArrayResamplerNN import xarray as xr import dask.array as da data = self.data_3d resampler = XArrayResamplerNN(self.src_area_2d, self.area_def, radius_of_influence=50000, neighbours=1) ninfo = resampler.get_neighbour_info() for val in ninfo[:3]: # vii, ia, voi self.assertIsInstance(val, da.Array) self.assertRaises(AssertionError, resampler.get_sample_from_neighbour_info, data) # rename data dimensions to match the expected area dimensions data = data.rename({'my_dim_y': 'y', 'my_dim_x': 'x'}) res = resampler.get_sample_from_neighbour_info(data) self.assertIsInstance(res, xr.DataArray) self.assertIsInstance(res.data, da.Array) six.assertCountEqual(self, res.coords['bands'], ['r', 'g', 'b']) res = res.values cross_sum = np.nansum(res) expected = 83120259.0 self.assertEqual(cross_sum, expected)
def test_nearest_type_preserve(self): """Test 1D swath definition to 2D grid definition; 1 neighbor.""" from pyresample.kd_tree import XArrayResamplerNN import xarray as xr import dask.array as da resampler = XArrayResamplerNN(self.tswath_1d, self.tgrid, radius_of_influence=100000, neighbours=1) data = self.tdata_1d data = xr.DataArray(da.from_array(np.array([1, 2, 3]), chunks=5), dims=('my_dim1',)) ninfo = resampler.get_neighbour_info() for val in ninfo[:3]: # vii, ia, voi self.assertIsInstance(val, da.Array) res = resampler.get_sample_from_neighbour_info(data, fill_value=255) self.assertIsInstance(res, xr.DataArray) self.assertIsInstance(res.data, da.Array) actual = res.values expected = np.array([ [1, 2, 2], [1, 2, 2], [1, 255, 2], [1, 2, 2], ]) np.testing.assert_equal(actual, expected)
def precompute(self, mask=None, radius_of_influence=None, epsilon=0, reduce_data=True, cache_dir=None, **kwargs): """Create a KDTree structure and store it for later use. Note: The `mask` keyword should be provided if geolocation may be valid where data points are invalid. This defaults to the `mask` attribute of the `data` numpy masked array passed to the `resample` method. """ del kwargs source_geo_def = mask_source_lonlats(self.source_geo_def, mask) if radius_of_influence is None: try: radius_of_influence = source_geo_def.lons.resolution * 3 except (AttributeError, TypeError): radius_of_influence = 10000 if self.resampler is None: kwargs = dict(source_geo_def=source_geo_def, target_geo_def=self.target_geo_def, radius_of_influence=radius_of_influence, neighbours=1, epsilon=epsilon, reduce_data=reduce_data) self.resampler = XArrayResamplerNN(**kwargs) try: self.load_neighbour_info(cache_dir, **kwargs) LOG.debug("Read pre-computed kd-tree parameters") except IOError: LOG.debug("Computing kd-tree parameters") self.resampler.get_neighbour_info() self.save_neighbour_info(cache_dir, **kwargs)
def test_nearest_swath_1d_mask_to_grid_8n(self): """Test 1D swath definition to 2D grid definition; 8 neighbors.""" from pyresample.kd_tree import XArrayResamplerNN import xarray as xr import dask.array as da resampler = XArrayResamplerNN(self.tswath_1d, self.tgrid, radius_of_influence=100000, neighbours=8) data = self.tdata_1d ninfo = resampler.get_neighbour_info(mask=data.isnull()) for val in ninfo[:3]: # vii, ia, voi self.assertIsInstance(val, da.Array) res = resampler.get_sample_from_neighbour_info(data) self.assertIsInstance(res, xr.DataArray) self.assertIsInstance(res.data, da.Array)
def test_nearest_swath_2d_mask_to_area_1n(self): """Test 2D swath definition to 2D area definition; 1 neighbor.""" from pyresample.kd_tree import XArrayResamplerNN import xarray as xr import dask.array as da swath_def = self.swath_def_2d data = self.data_2d resampler = XArrayResamplerNN(swath_def, self.area_def, radius_of_influence=50000, neighbours=1) ninfo = resampler.get_neighbour_info(mask=data.isnull()) for val in ninfo[:3]: # vii, ia, voi self.assertIsInstance(val, da.Array) res = resampler.get_sample_from_neighbour_info(data) self.assertIsInstance(res, xr.DataArray) self.assertIsInstance(res.data, da.Array) res = res.values cross_sum = np.nansum(res) expected = 15874591.0 self.assertEqual(cross_sum, expected)
def test_nearest_swath_1d_mask_to_grid_1n(self): """Test 1D swath definition to 2D grid definition; 1 neighbor.""" from pyresample.kd_tree import XArrayResamplerNN import xarray as xr import dask.array as da resampler = XArrayResamplerNN(self.tswath_1d, self.tgrid, radius_of_influence=100000, neighbours=1) data = self.tdata_1d ninfo = resampler.get_neighbour_info(mask=data.isnull()) for val in ninfo[:3]: # vii, ia, voi self.assertIsInstance(val, da.Array) res = resampler.get_sample_from_neighbour_info(data) self.assertIsInstance(res, xr.DataArray) self.assertIsInstance(res.data, da.Array) actual = res.values expected = np.array([ [1., 2., 2.], [1., 2., 2.], [1., np.nan, 2.], [1., 2., 2.], ]) np.testing.assert_allclose(actual, expected)
class KDTreeResampler(BaseResampler): """ Resample using nearest neighbour. """ def __init__(self, source_geo_def, target_geo_def): super(KDTreeResampler, self).__init__(source_geo_def, target_geo_def) self.resampler = None def precompute(self, mask=None, radius_of_influence=None, epsilon=0, reduce_data=True, cache_dir=None, **kwargs): """Create a KDTree structure and store it for later use. Note: The `mask` keyword should be provided if geolocation may be valid where data points are invalid. This defaults to the `mask` attribute of the `data` numpy masked array passed to the `resample` method. """ del kwargs source_geo_def = mask_source_lonlats(self.source_geo_def, mask) if radius_of_influence is None: try: radius_of_influence = source_geo_def.lons.resolution * 3 except (AttributeError, TypeError): radius_of_influence = 10000 if self.resampler is None: kwargs = dict(source_geo_def=source_geo_def, target_geo_def=self.target_geo_def, radius_of_influence=radius_of_influence, neighbours=1, epsilon=epsilon, reduce_data=reduce_data) self.resampler = XArrayResamplerNN(**kwargs) try: self.load_neighbour_info(cache_dir, **kwargs) LOG.debug("Read pre-computed kd-tree parameters") except IOError: LOG.debug("Computing kd-tree parameters") self.resampler.get_neighbour_info() self.save_neighbour_info(cache_dir, **kwargs) def load_neighbour_info(self, cache_dir, **kwargs): if cache_dir: filename = self._create_cache_filename(cache_dir, **kwargs) cache = np.load(filename) for elt in ['valid_input_index', 'valid_output_index', 'index_array', 'distance_array']: if isinstance(cache[elt], tuple): setattr(self.resampler, elt, cache[elt][0]) else: setattr(self.resampler, elt, cache[elt]) cache.close() else: raise IOError def save_neighbour_info(self, cache_dir, **kwargs): if cache_dir: filename = self._create_cache_filename(cache_dir, **kwargs) LOG.info('Saving kd_tree neighbour info to %s', filename) cache = {'valid_input_index': self.resampler.valid_input_index, 'valid_output_index': self.resampler.valid_output_index, 'index_array': self.resampler.index_array, 'distance_array': self.resampler.distance_array} np.savez(filename, **cache) def compute(self, data, weight_funcs=None, fill_value=None, with_uncert=False, **kwargs): del kwargs LOG.debug("Resampling " + str(data.name)) if fill_value is None: fill_value = data.attrs.get('_FillValue', np.nan) res = self.resampler.get_sample_from_neighbour_info(data, fill_value) return res
class KDTreeResampler(BaseResampler): """Resample using a KDTree-based nearest neighbor algorithm. This resampler implements on-disk caching when the `cache_dir` argument is provided to the `resample` method. This should provide significant performance improvements on consecutive resampling of geostationary data. It is not recommended to provide `cache_dir` when the `mask` keyword argument is provided to `precompute` which occurs by default for `SwathDefinition` source areas. Args: cache_dir (str): Long term storage directory for intermediate results. By default only 10 different source/target combinations are cached to save space. mask_area (bool): Force resampled data's invalid pixel mask to be used when searching for nearest neighbor pixels. By default this is True for SwathDefinition source areas and False for all other area definition types. radius_of_influence (float): Search radius cut off distance in meters epsilon (float): Allowed uncertainty in meters. Increasing uncertainty reduces execution time. """ def __init__(self, source_geo_def, target_geo_def): super(KDTreeResampler, self).__init__(source_geo_def, target_geo_def) self.resampler = None self._index_caches = {} def precompute(self, mask=None, radius_of_influence=None, epsilon=0, cache_dir=None, **kwargs): """Create a KDTree structure and store it for later use. Note: The `mask` keyword should be provided if geolocation may be valid where data points are invalid. """ del kwargs source_geo_def = self.source_geo_def if mask is not None and cache_dir is not None: LOG.warning("Mask and cache_dir both provided to nearest " "resampler. Cached parameters are affected by " "masked pixels. Will not cache results.") cache_dir = None if radius_of_influence is None: try: radius_of_influence = source_geo_def.lons.resolution * 3 except (AttributeError, TypeError): radius_of_influence = 10000 kwargs = dict(source_geo_def=source_geo_def, target_geo_def=self.target_geo_def, radius_of_influence=radius_of_influence, neighbours=1, epsilon=epsilon) if self.resampler is None: # FIXME: We need to move all of this caching logic to pyresample self.resampler = XArrayResamplerNN(**kwargs) try: self.load_neighbour_info(cache_dir, mask=mask, **kwargs) LOG.debug("Read pre-computed kd-tree parameters") except IOError: LOG.debug("Computing kd-tree parameters") self.resampler.get_neighbour_info(mask=mask) self.save_neighbour_info(cache_dir, mask=mask, **kwargs) def _apply_cached_indexes(self, cached_indexes, persist=False): """Reassign various resampler index attributes.""" # cacheable_dict = {} for elt in ['valid_input_index', 'valid_output_index', 'index_array', 'distance_array']: val = cached_indexes[elt] if isinstance(val, tuple): val = cached_indexes[elt][0] elif isinstance(val, np.ndarray): val = da.from_array(val, chunks=CHUNK_SIZE) elif persist and isinstance(val, da.Array): cached_indexes[elt] = val = val.persist() setattr(self.resampler, elt, val) def load_neighbour_info(self, cache_dir, mask=None, **kwargs): """Read index arrays from either the in-memory or disk cache.""" mask_name = getattr(mask, 'name', None) filename = self._create_cache_filename(cache_dir, mask=mask_name, **kwargs) if kwargs.get('mask') in self._index_caches: self._apply_cached_indexes(self._index_caches[kwargs.get('mask')]) elif cache_dir: cache = np.load(filename, mmap_mode='r') # copy the dict so we can modify it's keys new_cache = dict(cache.items()) cache.close() self._apply_cached_indexes(new_cache) # modifies cache dict in-place self._index_caches[mask_name] = new_cache else: raise IOError def save_neighbour_info(self, cache_dir, mask=None, **kwargs): """Cache resampler's index arrays if there is a cache dir.""" if cache_dir: mask_name = getattr(mask, 'name', None) filename = self._create_cache_filename( cache_dir, mask=mask_name, **kwargs) LOG.info('Saving kd_tree neighbour info to %s', filename) cache = self._read_resampler_attrs() # update the cache in place with persisted dask arrays self._apply_cached_indexes(cache, persist=True) self._index_caches[mask_name] = cache np.savez(filename, **cache) def _read_resampler_attrs(self): """Read certain attributes from the resampler for caching.""" return {attr_name: getattr(self.resampler, attr_name) for attr_name in [ 'valid_input_index', 'valid_output_index', 'index_array', 'distance_array']} def compute(self, data, weight_funcs=None, fill_value=np.nan, with_uncert=False, **kwargs): del kwargs LOG.debug("Resampling " + str(data.name)) res = self.resampler.get_sample_from_neighbour_info(data, fill_value) return res