Example #1
0
    def fetch(self, grouped: VirtualDatasetBox,
              **load_settings: Dict[str, Any]) -> xarray.Dataset:
        """ Convert grouped datasets to `xarray.Dataset`. """

        load_keys = self._LOAD_KEYS - {'measurements'}
        merged = merge_search_terms(select_keys(self, load_keys),
                                    select_keys(load_settings, load_keys))

        product = grouped.product_definitions[self._product]

        if 'measurements' in self and 'measurements' in load_settings:
            for measurement in load_settings['measurements']:
                self._assert(
                    measurement in self['measurements'],
                    '{} not found in {}'.format(measurement, self._product))

        measurement_dicts = self.output_measurements(
            grouped.product_definitions, load_settings.get('measurements'))

        if grouped.load_natively:
            canonical_names = [
                product.canonical_measurement(measurement)
                for measurement in measurement_dicts
            ]
            dataset_geobox = geobox_union_conservative([
                native_geobox(ds,
                              measurements=canonical_names,
                              basis=merged.get('like'))
                for ds in grouped.box.sum().item()
            ])

            if grouped.geopolygon is not None:
                reproject_roi = compute_reproject_roi(
                    dataset_geobox,
                    GeoBox.from_geopolygon(
                        grouped.geopolygon,
                        crs=dataset_geobox.crs,
                        align=dataset_geobox.alignment,
                        resolution=dataset_geobox.resolution))

                self._assert(reproject_roi.is_st,
                             "native load is not axis-aligned")
                self._assert(numpy.isclose(reproject_roi.scale, 1.0),
                             "native load should not require scaling")

                geobox = dataset_geobox[reproject_roi.roi_src]
            else:
                geobox = dataset_geobox
        else:
            geobox = grouped.geobox

        result = Datacube.load_data(grouped.box,
                                    geobox,
                                    list(measurement_dicts.values()),
                                    fuse_func=merged.get('fuse_func'),
                                    dask_chunks=merged.get('dask_chunks'),
                                    resampling=merged.get(
                                        'resampling', 'nearest'))

        return result
Example #2
0
def reproject_band(band, geobox, resampling, dims, dask_chunks=None):
    """ Reproject a single measurement to the geobox. """
    if not hasattr(band.data, 'dask') or dask_chunks is None:
        data = reproject_array(band.data, band.nodata, band.geobox, geobox,
                               resampling)
        return wrap_in_dataarray(data, band, geobox, dims)

    dask_name = 'warp_{name}-{token}'.format(name=band.name,
                                             token=uuid.uuid4().hex)
    dependencies = [band.data]

    spatial_chunks = tuple(
        dask_chunks.get(k, geobox.shape[i]) for i, k in enumerate(geobox.dims))

    gt = GeoboxTiles(geobox, spatial_chunks)
    new_layer = {}

    for tile_index in numpy.ndindex(gt.shape):
        sub_geobox = gt[tile_index]
        # find the input array slice from the output geobox
        reproject_roi = compute_reproject_roi(band.geobox,
                                              sub_geobox,
                                              padding=1)

        # find the chunk from the input array with the slice index
        subset_band = band[(..., ) + reproject_roi.roi_src].chunk(-1)

        if min(subset_band.shape) == 0:
            # pad the empty chunk
            new_layer[(dask_name, ) + tile_index] = (numpy.full,
                                                     sub_geobox.shape,
                                                     band.nodata, band.dtype)
        else:
            # next 3 lines to generate the new graph
            dependencies.append(subset_band.data)
            # get the input dask array for the function `reproject_array`
            band_key = list(flatten(subset_band.data.__dask_keys__()))[0]
            # generate a new layer of dask graph with reroject
            new_layer[(dask_name, ) + tile_index] = (reproject_array, band_key,
                                                     band.nodata,
                                                     subset_band.geobox,
                                                     sub_geobox, resampling)

    # create a new graph with the additional layer and pack the graph into dask.array
    # since only regular chunking is allowed at the higher level dask.array interface,
    # to manipulate the graph seems to be the easiest way to obtain a dask.array with irregular chunks after reproject
    data = dask.array.Array(band.data.dask.from_collections(
        dask_name, new_layer, dependencies=dependencies),
                            dask_name,
                            chunks=spatial_chunks,
                            dtype=band.dtype,
                            shape=gt.base.shape)

    return wrap_in_dataarray(data, band, geobox, dims)
    def _read(gbox, resampling='nearest',
              fallback_nodata=-999,
              dst_nodata=-999,
              check_paste=False):
        with RasterFileDataSource(mm.path, 1, nodata=fallback_nodata).open() as rdr:
            if check_paste:
                # check that we are using paste
                paste_ok, reason = can_paste(compute_reproject_roi(rdr_geobox(rdr), gbox))
                assert paste_ok is True, reason

            yy = np.full(gbox.shape, dst_nodata, dtype=rdr.dtype)
            roi = read_time_slice(rdr, yy, gbox, resampling, dst_nodata)
            return yy, roi
Example #4
0
def test_compute_reproject_roi():
    src = AlbersGS.tile_geobox((15, -40))
    dst = geometry.GeoBox.from_geopolygon(
        src.extent.to_crs(epsg3857).buffer(10), resolution=src.resolution)

    rr = compute_reproject_roi(src, dst)

    assert rr.roi_src == np.s_[0:src.height, 0:src.width]
    assert 0 < rr.scale < 1
    assert rr.is_st is False
    assert rr.transform.linear is None
    assert rr.scale in rr.scale2

    # check pure translation case
    roi_ = np.s_[113:-100, 33:-10]
    rr = compute_reproject_roi(src, src[roi_])
    assert rr.roi_src == roi_normalise(roi_, src.shape)
    assert rr.scale == 1
    assert rr.is_st is True

    rr = compute_reproject_roi(src, src[roi_], padding=0, align=0)
    assert rr.roi_src == roi_normalise(roi_, src.shape)
    assert rr.scale == 1
    assert rr.scale2 == (1, 1)

    # check pure translation case
    roi_ = np.s_[113:-100, 33:-10]
    rr = compute_reproject_roi(src, src[roi_], align=256)

    assert rr.roi_src == np.s_[0:src.height, 0:src.width]
    assert rr.scale == 1

    roi_ = np.s_[113:-100, 33:-10]
    rr = compute_reproject_roi(src, src[roi_])

    assert rr.scale == 1
    assert roi_shape(rr.roi_src) == roi_shape(rr.roi_dst)
    assert roi_shape(rr.roi_dst) == src[roi_].shape
Example #5
0
def test_compute_reproject_roi_issue1047():
    """ `compute_reproject_roi(geobox, geobox[roi])` sometimes returns
    `src_roi != roi`, when `geobox` has (1) tiny pixels and (2) oddly
    sized `alignment`.

    Test this issue is resolved.
    """
    geobox = GeoBox(3000, 3000,
                    Affine(0.00027778, 0.0, 148.72673054908861,
                           0.0, -0.00027778, -34.98825802556622), "EPSG:4326")
    src_roi = np.s_[2800:2810, 10:30]
    rr = compute_reproject_roi(geobox, geobox[src_roi])

    assert rr.is_st is True
    assert rr.roi_src == src_roi
    assert rr.roi_dst == np.s_[0:10, 0:20]
    def _read(gbox,
              resampling='nearest',
              fallback_nodata=-999,
              dst_nodata=-999,
              check_paste=False):

        rdr = open_reader(mm.path, nodata=fallback_nodata)
        if check_paste:
            # check that we are using paste
            paste_ok, reason = can_paste(
                compute_reproject_roi(rdr_geobox(rdr), gbox))
            assert paste_ok is True, reason

        yy = np.full(gbox.shape, dst_nodata, dtype=rdr.dtype)
        yy_, roi = read_time_slice_v2(rdr, gbox, resampling, dst_nodata)
        yy[roi] = yy_
        return yy, roi
Example #7
0
def test_compute_reproject_roi_issue647():
    """ In some scenarios non-overlapping geoboxes will result in non-empty
    `roi_dst` even though `roi_src` is empty.

    Test this case separately.
    """
    from datacube.utils.geometry import CRS

    src = GeoBox(10980, 10980, Affine(10, 0, 300000, 0, -10, 5900020),
                 CRS('epsg:32756'))

    dst = GeoBox(976, 976, Affine(10, 0, 1730240, 0, -10, -4170240),
                 CRS('EPSG:3577'))

    assert src.extent.overlaps(dst.extent.to_crs(src.crs)) is False

    rr = compute_reproject_roi(src, dst)

    assert roi_is_empty(rr.roi_src)
    assert roi_is_empty(rr.roi_dst)
 def check_false(dst, **kwargs):
     ok, reason = can_paste(compute_reproject_roi(src, dst), **kwargs)
     if ok:
         assert ok is False, "Expected can_paste to return False, but got True"
 def check_true(dst, **kwargs):
     ok, reason = can_paste(compute_reproject_roi(src, dst), **kwargs)
     if not ok:
         assert ok is True, reason
Example #10
0
def dask_reproject(
    src: da.Array,
    src_geobox: GeoBox,
    dst_geobox: GeoBox,
    resampling: str = "nearest",
    chunks: Optional[Tuple[int, int]] = None,
    src_nodata: Optional[NodataType] = None,
    dst_nodata: Optional[NodataType] = None,
    axis: int = 0,
    name: str = "reproject",
) -> da.Array:
    """
    Reproject to GeoBox as dask operation

    :param src       : Input src[(time,) y,x (, band)]
    :param src_geobox: GeoBox of the source array
    :param dst_geobox: GeoBox of the destination
    :param resampling: Resampling strategy as a string: nearest, bilinear, average, mode ...
    :param chunks    : In Y,X dimensions only, default is to use same input chunk size
    :param axis      : Index of Y axis (default is 0)
    :param src_nodata: nodata marker for source image
    :param dst_nodata: nodata marker for dst image
    :param name      : Dask graph name, "reproject" is the default
    """
    if chunks is None:
        chunks = src.chunksize[axis:axis + 2]

    if dst_nodata is None:
        dst_nodata = src_nodata

    assert src.shape[axis:axis + 2] == src_geobox.shape
    yx_shape = dst_geobox.shape
    yx_chunks = unpack_chunks(chunks, yx_shape)

    dst_chunks = src.chunks[:axis] + yx_chunks + src.chunks[axis + 2:]
    dst_shape = src.shape[:axis] + yx_shape + src.shape[axis + 2:]

    #  tuple(*dims1, y, x, *dims2) -- complete shape in blocks
    dims1 = tuple(map(len, dst_chunks[:axis]))
    dims2 = tuple(map(len, dst_chunks[axis + 2:]))
    assert dims2 == ()
    deps = [src]

    tile_shape = (yx_chunks[0][0], yx_chunks[1][0])
    gbt = GeoboxTiles(dst_geobox, tile_shape)
    xy_chunks_with_data = list(gbt.tiles(src_geobox.extent))

    name = randomize(name)
    dsk: Dict[Any, Any] = {}

    block_impl = (_reproject_block_bool_impl
                  if src.dtype == "bool" else _reproject_block_impl)

    for idx in xy_chunks_with_data:
        _dst_geobox = gbt[idx]
        rr = compute_reproject_roi(src_geobox, _dst_geobox)
        _src = crop_2d_dense(src, rr.roi_src, axis=axis)
        _src_geobox = src_geobox[rr.roi_src]

        deps.append(_src)

        for ii1 in np.ndindex(dims1):
            # TODO: band dims
            dsk[(name, *ii1, *idx)] = (
                block_impl,
                (_src.name, *ii1, 0, 0),
                _src_geobox,
                _dst_geobox,
                resampling,
                src_nodata,
                dst_nodata,
                axis,
            )

    fill_value = 0 if dst_nodata is None else dst_nodata
    shape_in_blocks = tuple(map(len, dst_chunks))

    mk_empty = empty_maker(fill_value, src.dtype, dsk)

    for idx in np.ndindex(shape_in_blocks):
        # TODO: other dims
        k = (name, *idx)
        if k not in dsk:
            bshape = tuple(ch[i] for ch, i in zip(dst_chunks, idx))
            dsk[k] = mk_empty(bshape)

    dsk = HighLevelGraph.from_collections(name, dsk, dependencies=deps)

    return da.Array(dsk,
                    name,
                    chunks=dst_chunks,
                    dtype=src.dtype,
                    shape=dst_shape)