예제 #1
0
    def test_nearest_area_2d_to_area_1n_3d_data(self):
        """Test 2D area definition to 2D area definition; 1 neighbor, 3d data."""
        from pyresample.kd_tree import XArrayResamplerNN
        import xarray as xr
        import dask.array as da
        data = self.data_3d
        resampler = XArrayResamplerNN(self.src_area_2d, self.area_def,
                                      radius_of_influence=50000,
                                      neighbours=1)
        ninfo = resampler.get_neighbour_info()
        for val in ninfo[:3]:
            # vii, ia, voi
            self.assertIsInstance(val, da.Array)
        self.assertRaises(AssertionError,
                          resampler.get_sample_from_neighbour_info, data)

        # rename data dimensions to match the expected area dimensions
        data = data.rename({'my_dim_y': 'y', 'my_dim_x': 'x'})
        res = resampler.get_sample_from_neighbour_info(data)
        self.assertIsInstance(res, xr.DataArray)
        self.assertIsInstance(res.data, da.Array)
        six.assertCountEqual(self, res.coords['bands'], ['r', 'g', 'b'])
        res = res.values
        cross_sum = np.nansum(res)
        expected = 83120259.0
        self.assertEqual(cross_sum, expected)
예제 #2
0
 def test_nearest_type_preserve(self):
     """Test 1D swath definition to 2D grid definition; 1 neighbor."""
     from pyresample.kd_tree import XArrayResamplerNN
     import xarray as xr
     import dask.array as da
     resampler = XArrayResamplerNN(self.tswath_1d, self.tgrid,
                                   radius_of_influence=100000,
                                   neighbours=1)
     data = self.tdata_1d
     data = xr.DataArray(da.from_array(np.array([1, 2, 3]),
                                       chunks=5),
                         dims=('my_dim1',))
     ninfo = resampler.get_neighbour_info()
     for val in ninfo[:3]:
         # vii, ia, voi
         self.assertIsInstance(val, da.Array)
     res = resampler.get_sample_from_neighbour_info(data, fill_value=255)
     self.assertIsInstance(res, xr.DataArray)
     self.assertIsInstance(res.data, da.Array)
     actual = res.values
     expected = np.array([
         [1, 2, 2],
         [1, 2, 2],
         [1, 255, 2],
         [1, 2, 2],
     ])
     np.testing.assert_equal(actual, expected)
예제 #3
0
    def precompute(self, mask=None, radius_of_influence=None, epsilon=0,
                   reduce_data=True, cache_dir=None, **kwargs):
        """Create a KDTree structure and store it for later use.

        Note: The `mask` keyword should be provided if geolocation may be valid
        where data points are invalid. This defaults to the `mask` attribute of
        the `data` numpy masked array passed to the `resample` method.
        """

        del kwargs
        source_geo_def = mask_source_lonlats(self.source_geo_def, mask)

        if radius_of_influence is None:
            try:
                radius_of_influence = source_geo_def.lons.resolution * 3
            except (AttributeError, TypeError):
                radius_of_influence = 10000
        if self.resampler is None:
            kwargs = dict(source_geo_def=source_geo_def,
                          target_geo_def=self.target_geo_def,
                          radius_of_influence=radius_of_influence,
                          neighbours=1,
                          epsilon=epsilon,
                          reduce_data=reduce_data)

            self.resampler = XArrayResamplerNN(**kwargs)
            try:
                self.load_neighbour_info(cache_dir, **kwargs)
                LOG.debug("Read pre-computed kd-tree parameters")
            except IOError:
                LOG.debug("Computing kd-tree parameters")
                self.resampler.get_neighbour_info()
                self.save_neighbour_info(cache_dir, **kwargs)
예제 #4
0
 def test_nearest_swath_1d_mask_to_grid_8n(self):
     """Test 1D swath definition to 2D grid definition; 8 neighbors."""
     from pyresample.kd_tree import XArrayResamplerNN
     import xarray as xr
     import dask.array as da
     resampler = XArrayResamplerNN(self.tswath_1d, self.tgrid,
                                   radius_of_influence=100000,
                                   neighbours=8)
     data = self.tdata_1d
     ninfo = resampler.get_neighbour_info(mask=data.isnull())
     for val in ninfo[:3]:
         # vii, ia, voi
         self.assertIsInstance(val, da.Array)
     res = resampler.get_sample_from_neighbour_info(data)
     self.assertIsInstance(res, xr.DataArray)
     self.assertIsInstance(res.data, da.Array)
예제 #5
0
 def test_nearest_swath_2d_mask_to_area_1n(self):
     """Test 2D swath definition to 2D area definition; 1 neighbor."""
     from pyresample.kd_tree import XArrayResamplerNN
     import xarray as xr
     import dask.array as da
     swath_def = self.swath_def_2d
     data = self.data_2d
     resampler = XArrayResamplerNN(swath_def, self.area_def,
                                   radius_of_influence=50000,
                                   neighbours=1)
     ninfo = resampler.get_neighbour_info(mask=data.isnull())
     for val in ninfo[:3]:
         # vii, ia, voi
         self.assertIsInstance(val, da.Array)
     res = resampler.get_sample_from_neighbour_info(data)
     self.assertIsInstance(res, xr.DataArray)
     self.assertIsInstance(res.data, da.Array)
     res = res.values
     cross_sum = np.nansum(res)
     expected = 15874591.0
     self.assertEqual(cross_sum, expected)
예제 #6
0
 def test_nearest_swath_1d_mask_to_grid_1n(self):
     """Test 1D swath definition to 2D grid definition; 1 neighbor."""
     from pyresample.kd_tree import XArrayResamplerNN
     import xarray as xr
     import dask.array as da
     resampler = XArrayResamplerNN(self.tswath_1d, self.tgrid,
                                   radius_of_influence=100000,
                                   neighbours=1)
     data = self.tdata_1d
     ninfo = resampler.get_neighbour_info(mask=data.isnull())
     for val in ninfo[:3]:
         # vii, ia, voi
         self.assertIsInstance(val, da.Array)
     res = resampler.get_sample_from_neighbour_info(data)
     self.assertIsInstance(res, xr.DataArray)
     self.assertIsInstance(res.data, da.Array)
     actual = res.values
     expected = np.array([
         [1., 2., 2.],
         [1., 2., 2.],
         [1., np.nan, 2.],
         [1., 2., 2.],
     ])
     np.testing.assert_allclose(actual, expected)
예제 #7
0
class KDTreeResampler(BaseResampler):

    """
    Resample using nearest neighbour.
    """

    def __init__(self, source_geo_def, target_geo_def):
        super(KDTreeResampler, self).__init__(source_geo_def, target_geo_def)
        self.resampler = None

    def precompute(self, mask=None, radius_of_influence=None, epsilon=0,
                   reduce_data=True, cache_dir=None, **kwargs):
        """Create a KDTree structure and store it for later use.

        Note: The `mask` keyword should be provided if geolocation may be valid
        where data points are invalid. This defaults to the `mask` attribute of
        the `data` numpy masked array passed to the `resample` method.
        """

        del kwargs
        source_geo_def = mask_source_lonlats(self.source_geo_def, mask)

        if radius_of_influence is None:
            try:
                radius_of_influence = source_geo_def.lons.resolution * 3
            except (AttributeError, TypeError):
                radius_of_influence = 10000
        if self.resampler is None:
            kwargs = dict(source_geo_def=source_geo_def,
                          target_geo_def=self.target_geo_def,
                          radius_of_influence=radius_of_influence,
                          neighbours=1,
                          epsilon=epsilon,
                          reduce_data=reduce_data)

            self.resampler = XArrayResamplerNN(**kwargs)
            try:
                self.load_neighbour_info(cache_dir, **kwargs)
                LOG.debug("Read pre-computed kd-tree parameters")
            except IOError:
                LOG.debug("Computing kd-tree parameters")
                self.resampler.get_neighbour_info()
                self.save_neighbour_info(cache_dir, **kwargs)

    def load_neighbour_info(self, cache_dir, **kwargs):

        if cache_dir:
            filename = self._create_cache_filename(cache_dir, **kwargs)
            cache = np.load(filename)
            for elt in ['valid_input_index', 'valid_output_index', 'index_array', 'distance_array']:
                if isinstance(cache[elt], tuple):
                    setattr(self.resampler, elt, cache[elt][0])
                else:
                    setattr(self.resampler, elt, cache[elt])
            cache.close()
        else:
            raise IOError

    def save_neighbour_info(self, cache_dir, **kwargs):
        if cache_dir:
            filename = self._create_cache_filename(cache_dir, **kwargs)
            LOG.info('Saving kd_tree neighbour info to %s', filename)
            cache = {'valid_input_index': self.resampler.valid_input_index,
                     'valid_output_index': self.resampler.valid_output_index,
                     'index_array': self.resampler.index_array,
                     'distance_array': self.resampler.distance_array}

            np.savez(filename, **cache)

    def compute(self, data, weight_funcs=None, fill_value=None,
                with_uncert=False, **kwargs):
        del kwargs
        LOG.debug("Resampling " + str(data.name))
        if fill_value is None:
            fill_value = data.attrs.get('_FillValue', np.nan)
        res = self.resampler.get_sample_from_neighbour_info(data, fill_value)
        return res
예제 #8
0
class KDTreeResampler(BaseResampler):
    """Resample using a KDTree-based nearest neighbor algorithm.

    This resampler implements on-disk caching when the `cache_dir` argument
    is provided to the `resample` method. This should provide significant
    performance improvements on consecutive resampling of geostationary data.
    It is not recommended to provide `cache_dir` when the `mask` keyword
    argument is provided to `precompute` which occurs by default for
    `SwathDefinition` source areas.

    Args:
        cache_dir (str): Long term storage directory for intermediate
                         results. By default only 10 different source/target
                         combinations are cached to save space.
        mask_area (bool): Force resampled data's invalid pixel mask to be used
                          when searching for nearest neighbor pixels. By
                          default this is True for SwathDefinition source
                          areas and False for all other area definition types.
        radius_of_influence (float): Search radius cut off distance in meters
        epsilon (float): Allowed uncertainty in meters. Increasing uncertainty
                         reduces execution time.

    """

    def __init__(self, source_geo_def, target_geo_def):
        super(KDTreeResampler, self).__init__(source_geo_def, target_geo_def)
        self.resampler = None
        self._index_caches = {}

    def precompute(self, mask=None, radius_of_influence=None, epsilon=0,
                   cache_dir=None, **kwargs):
        """Create a KDTree structure and store it for later use.

        Note: The `mask` keyword should be provided if geolocation may be valid
        where data points are invalid.

        """
        del kwargs
        source_geo_def = self.source_geo_def

        if mask is not None and cache_dir is not None:
            LOG.warning("Mask and cache_dir both provided to nearest "
                        "resampler. Cached parameters are affected by "
                        "masked pixels. Will not cache results.")
            cache_dir = None

        if radius_of_influence is None:
            try:
                radius_of_influence = source_geo_def.lons.resolution * 3
            except (AttributeError, TypeError):
                radius_of_influence = 10000

        kwargs = dict(source_geo_def=source_geo_def,
                      target_geo_def=self.target_geo_def,
                      radius_of_influence=radius_of_influence,
                      neighbours=1,
                      epsilon=epsilon)

        if self.resampler is None:
            # FIXME: We need to move all of this caching logic to pyresample
            self.resampler = XArrayResamplerNN(**kwargs)

        try:
            self.load_neighbour_info(cache_dir, mask=mask, **kwargs)
            LOG.debug("Read pre-computed kd-tree parameters")
        except IOError:
            LOG.debug("Computing kd-tree parameters")
            self.resampler.get_neighbour_info(mask=mask)
            self.save_neighbour_info(cache_dir, mask=mask, **kwargs)

    def _apply_cached_indexes(self, cached_indexes, persist=False):
        """Reassign various resampler index attributes."""
        # cacheable_dict = {}
        for elt in ['valid_input_index', 'valid_output_index',
                    'index_array', 'distance_array']:
            val = cached_indexes[elt]
            if isinstance(val, tuple):
                val = cached_indexes[elt][0]
            elif isinstance(val, np.ndarray):
                val = da.from_array(val, chunks=CHUNK_SIZE)
            elif persist and isinstance(val, da.Array):
                cached_indexes[elt] = val = val.persist()
            setattr(self.resampler, elt, val)

    def load_neighbour_info(self, cache_dir, mask=None, **kwargs):
        """Read index arrays from either the in-memory or disk cache."""
        mask_name = getattr(mask, 'name', None)
        filename = self._create_cache_filename(cache_dir,
                                               mask=mask_name, **kwargs)
        if kwargs.get('mask') in self._index_caches:
            self._apply_cached_indexes(self._index_caches[kwargs.get('mask')])
        elif cache_dir:
            cache = np.load(filename, mmap_mode='r')
            # copy the dict so we can modify it's keys
            new_cache = dict(cache.items())
            cache.close()
            self._apply_cached_indexes(new_cache)  # modifies cache dict in-place
            self._index_caches[mask_name] = new_cache
        else:
            raise IOError

    def save_neighbour_info(self, cache_dir, mask=None, **kwargs):
        """Cache resampler's index arrays if there is a cache dir."""
        if cache_dir:
            mask_name = getattr(mask, 'name', None)
            filename = self._create_cache_filename(
                cache_dir, mask=mask_name, **kwargs)
            LOG.info('Saving kd_tree neighbour info to %s', filename)
            cache = self._read_resampler_attrs()
            # update the cache in place with persisted dask arrays
            self._apply_cached_indexes(cache, persist=True)
            self._index_caches[mask_name] = cache
            np.savez(filename, **cache)

    def _read_resampler_attrs(self):
        """Read certain attributes from the resampler for caching."""
        return {attr_name: getattr(self.resampler, attr_name)
                for attr_name in [
                    'valid_input_index', 'valid_output_index',
                    'index_array', 'distance_array']}

    def compute(self, data, weight_funcs=None, fill_value=np.nan,
                with_uncert=False, **kwargs):
        del kwargs
        LOG.debug("Resampling " + str(data.name))
        res = self.resampler.get_sample_from_neighbour_info(data, fill_value)
        return res