Exemple #1
0
    def group(self, datasets: VirtualDatasetBag, **group_settings: Dict[str, Any]) -> VirtualDatasetBox:
        """
        Datasets grouped by their timestamps.
        :param datasets: the `VirtualDatasetBag` to fetch data from
        """
        geopolygon = datasets.geopolygon

        merged = merge_search_terms(self, group_settings)
        if geopolygon is None:
            selected = list(datasets.contained_datasets())
        else:
            selected = None

        geobox = output_geobox(datasets=selected,
                               output_crs=self['reproject']['output_crs'],
                               resolution=self['reproject']['resolution'],
                               align=self['reproject'].get('align'),
                               geopolygon=geopolygon)

        # load natively
        input_box = self._input.group(datasets, **reject_keys(merged, self._GEOBOX_KEYS))

        return VirtualDatasetBox(input_box.box,
                                 geobox,
                                 True,
                                 datasets.product_definitions,
                                 geopolygon=geopolygon)
Exemple #2
0
def test_output_geobox_load_hints():
    geobox0 = AlbersGS.tile_geobox((15, -40))

    geobox = output_geobox(load_hints={'output_crs': geobox0.crs,
                                       'resolution': geobox0.resolution},
                           geopolygon=geobox0.extent)
    assert geobox == geobox0
Exemple #3
0
        def _product_group_():
            # select only those inside the ROI
            # ROI could be smaller than the query for the `query` method

            if query_geopolygon(**search_terms) is not None:
                geopolygon = query_geopolygon(**search_terms)
                selected = list(
                    select_datasets_inside_polygon(datasets.pile, geopolygon))
            else:
                geopolygon = datasets.geopolygon
                selected = list(datasets.pile)

            # geobox
            merged = merge_search_terms(
                select_keys(self, self._NON_SPATIAL_KEYS),
                select_keys(search_terms, self._NON_SPATIAL_KEYS))

            geobox = output_geobox(datasets=selected,
                                   grid_spec=datasets.grid_spec,
                                   geopolygon=geopolygon,
                                   **select_keys(merged, self._GEOBOX_KEYS))

            # group by time
            group_query = query_group_by(
                **select_keys(merged, self._GROUPING_KEYS))

            # information needed for Datacube.load_data
            return VirtualDatasetBox(
                Datacube.group_datasets(selected, group_query), geobox,
                datasets.product_definitions)
Exemple #4
0
    def group(self, datasets: VirtualDatasetBag, **group_settings: Dict[str, Any]) -> VirtualDatasetBox:
        geopolygon = datasets.geopolygon
        selected = list(datasets.bag)

        # geobox
        merged = merge_search_terms(self, group_settings)

        try:
            geobox = output_geobox(datasets=selected,
                                   grid_spec=datasets.product_definitions[self._product].grid_spec,
                                   geopolygon=geopolygon, **select_keys(merged, self._GEOBOX_KEYS))
            load_natively = False

        except ValueError:
            # we are not calculating geoboxes here for the moment
            # since it may require filesystem access
            # in ODC 2.0 the dataset should know the information required
            geobox = None
            load_natively = True

        # group by time
        group_query = query_group_by(**select_keys(merged, self._GROUPING_KEYS))

        # information needed for Datacube.load_data
        return VirtualDatasetBox(Datacube.group_datasets(selected, group_query),
                                 geobox,
                                 load_natively,
                                 datasets.product_definitions,
                                 geopolygon=None if not load_natively else geopolygon)
Exemple #5
0
def test_output_geobox_fail_paths():

    with pytest.raises(ValueError):
        output_geobox()

    with pytest.raises(ValueError):
        output_geobox(output_crs='EPSG:4326')  # need resolution as well

    # need bounds
    with pytest.raises(ValueError):
        output_geobox(output_crs='EPSG:4326', resolution=(1, 1))
Exemple #6
0
def test_output_geobox_fail_paths():
    from datacube.api.core import output_geobox
    gs_nores = GridSpec(crs=geometry.CRS('EPSG:4326'),
                        tile_size=None,
                        resolution=None)

    with pytest.raises(ValueError):
        output_geobox()

    with pytest.raises(ValueError):
        output_geobox(output_crs='EPSG:4326')  # need resolution as well

    with pytest.raises(ValueError):
        output_geobox(grid_spec=gs_nores)  # GridSpec with missing resolution
Exemple #7
0
    def group(self, datasets: VirtualDatasetBag,
              **search_terms: Dict[str, Any]) -> VirtualDatasetBox:
        geopolygon = datasets.geopolygon
        selected = list(datasets.pile)

        # geobox
        merged = merge_search_terms(self, search_terms)

        geobox = output_geobox(
            datasets=selected,
            grid_spec=datasets.product_definitions[self._product].grid_spec,
            geopolygon=geopolygon,
            **select_keys(merged, self._GEOBOX_KEYS))

        # group by time
        group_query = query_group_by(
            **select_keys(merged, self._GROUPING_KEYS))

        # information needed for Datacube.load_data
        return VirtualDatasetBox(
            Datacube.group_datasets(selected, group_query), geobox,
            datasets.product_definitions)
def test_like_geobox():
    from datacube.testutils.geom import AlbersGS
    from datacube.api.core import output_geobox

    geobox = AlbersGS.tile_geobox((15, -40))
    assert output_geobox(like=geobox) is geobox
Exemple #9
0
def test_like_geobox():
    geobox = AlbersGS.tile_geobox((15, -40))
    assert output_geobox(like=geobox) is geobox
Exemple #10
0
    def group(self, datasets, **search_terms):
        # type: (QueryResult, Dict[str, Any]) -> DatasetPile
        """
        Datasets grouped by their timestamps.
        :param datasets: the `QueryResult` to fetch data from
        :param query: to specify a spatial sub-region
        """
        grid_spec = datasets.grid_spec

        if 'product' in self:
            # select only those inside the ROI
            # ROI could be smaller than the query for `query`
            spatial_query = reject_keys(search_terms, self._NON_SPATIAL_KEYS)
            selected = list(
                select_datasets_inside_polygon(
                    datasets.pile, query_geopolygon(**spatial_query)))

            # geobox
            merged = merge_search_terms(
                select_keys(self, self._NON_SPATIAL_KEYS),
                select_keys(spatial_query, self._NON_SPATIAL_KEYS))

            geobox = output_geobox(datasets=selected,
                                   grid_spec=grid_spec,
                                   **select_keys(merged, self._GEOBOX_KEYS),
                                   **spatial_query)

            # group by time
            group_query = query_group_by(
                **select_keys(merged, self._GROUPING_KEYS))

            def wrap(_, value):
                return QueryResult(value, grid_spec)

            # information needed for Datacube.load_data
            return DatasetPile(Datacube.group_datasets(selected, group_query),
                               geobox).map(wrap)

        elif 'transform' in self:
            return self._input.group(datasets, **search_terms)

        elif 'collate' in self:
            self._assert(
                len(datasets.pile) == len(self._children),
                "invalid dataset pile")

            def build(source_index, product, dataset_pile):
                grouped = product.group(dataset_pile, **search_terms)

                def tag(_, value):
                    in_position = [
                        value if i == source_index else None
                        for i, _ in enumerate(datasets.pile)
                    ]
                    return QueryResult(in_position, grid_spec)

                return grouped.map(tag)

            groups = [
                build(source_index, product, dataset_pile)
                for source_index, (product, dataset_pile) in enumerate(
                    zip(self._children, datasets.pile))
            ]

            return DatasetPile(
                xarray.concat([grouped.pile for grouped in groups],
                              dim='time'),
                select_unique([grouped.geobox for grouped in groups]))

        elif 'juxtapose' in self:
            self._assert(
                len(datasets.pile) == len(self._children),
                "invalid dataset pile")

            groups = [
                product.group(datasets, **search_terms)
                for product, datasets in zip(self._children, datasets.pile)
            ]

            aligned_piles = xarray.align(*[grouped.pile for grouped in groups])
            child_groups = [
                DatasetPile(aligned_piles[i], grouped.geobox)
                for i, grouped in enumerate(groups)
            ]

            def tuplify(indexes, _):
                return QueryResult([
                    grouped.pile.sel(**indexes).item()
                    for grouped in child_groups
                ], grid_spec)

            return DatasetPile(
                child_groups[0].map(tuplify).pile,
                select_unique([grouped.geobox for grouped in groups]))

        else:
            raise VirtualProductException("virtual product was not validated")
Exemple #11
0
    def group(self, datasets: VirtualDatasetBag,
              **search_terms: Dict[str, Any]) -> VirtualDatasetBox:
        """
        Datasets grouped by their timestamps.
        :param datasets: the `VirtualDatasetBag` to fetch data from
        :param query: to specify a spatial sub-region
        """
        grid_spec = datasets.grid_spec
        geopolygon = datasets.geopolygon

        if 'product' in self:
            # select only those inside the ROI
            # ROI could be smaller than the query for the `query` method
            if query_geopolygon(**search_terms) is not None:
                geopolygon = query_geopolygon(**search_terms)
                selected = list(
                    select_datasets_inside_polygon(datasets.pile, geopolygon))
            else:
                selected = list(datasets.pile)

            # geobox
            merged = merge_search_terms(
                select_keys(self, self._NON_SPATIAL_KEYS),
                select_keys(search_terms, self._NON_SPATIAL_KEYS))

            geobox = output_geobox(datasets=selected,
                                   grid_spec=grid_spec,
                                   geopolygon=geopolygon,
                                   **select_keys(merged, self._GEOBOX_KEYS))

            # group by time
            group_query = query_group_by(
                **select_keys(merged, self._GROUPING_KEYS))

            # information needed for Datacube.load_data
            return VirtualDatasetBox(
                Datacube.group_datasets(selected, group_query), geobox,
                datasets.product_definitions)

        elif 'transform' in self:
            return self._input.group(datasets, **search_terms)

        elif 'collate' in self:
            self._assert(
                'collate' in datasets.pile
                and len(datasets.pile['collate']) == len(self._children),
                "invalid dataset pile")

            def build(source_index, product, dataset_pile):
                grouped = product.group(
                    VirtualDatasetBag(dataset_pile, datasets.grid_spec,
                                      datasets.geopolygon,
                                      datasets.product_definitions),
                    **search_terms)

                def tag(_, value):
                    return {'collate': (source_index, value)}

                return grouped.map(tag)

            groups = [
                build(source_index, product, dataset_pile)
                for source_index, (product, dataset_pile) in enumerate(
                    zip(self._children, datasets.pile['collate']))
            ]

            return VirtualDatasetBox(
                xarray.concat([grouped.pile for grouped in groups],
                              dim='time'),
                select_unique([grouped.geobox for grouped in groups]),
                merge_dicts(
                    [grouped.product_definitions for grouped in groups]))

        elif 'juxtapose' in self:
            self._assert(
                'juxtapose' in datasets.pile
                and len(datasets.pile['juxtapose']) == len(self._children),
                "invalid dataset pile")

            groups = [
                product.group(
                    VirtualDatasetBag(dataset_pile, datasets.grid_spec,
                                      datasets.geopolygon,
                                      datasets.product_definitions),
                    **search_terms) for product, dataset_pile in zip(
                        self._children, datasets.pile['juxtapose'])
            ]

            aligned_piles = xarray.align(*[grouped.pile for grouped in groups])

            def tuplify(indexes, _):
                return {
                    'juxtapose':
                    [pile.sel(**indexes).item() for pile in aligned_piles]
                }

            return VirtualDatasetBox(
                xr_apply(aligned_piles[0], tuplify),
                select_unique([grouped.geobox for grouped in groups]),
                merge_dicts(
                    [grouped.product_definitions for grouped in groups]))

        else:
            raise VirtualProductException("virtual product was not validated")
Exemple #12
0
def dc_load(
    datasets: Sequence[Dataset],
    measurements: Optional[Union[str, Sequence[str]]] = None,
    geobox: Optional[GeoBox] = None,
    groupby: Optional[str] = None,
    resampling: Optional[Union[str, Dict[str, str]]] = None,
    skip_broken_datasets: bool = False,
    chunks: Optional[Dict[str, int]] = None,
    progress_cbk: Optional[Callable[[int, int], Any]] = None,
    fuse_func=None,
    **kw,
) -> xr.Dataset:
    assert len(datasets) > 0

    # dask_chunks is a backward-compatibility alias for chunks
    if chunks is None:
        chunks = kw.pop("dask_chunks", None)
    # group_by is a backward-compatibility alias for groupby
    if groupby is None:
        groupby = kw.pop("group_by", "time")
    # bands alias for measurements
    if measurements is None:
        measurements = kw.pop("bands", None)

    # extract all "output_geobox" inputs
    geo_keys = {
        k: kw.pop(k)
        for k in [
            "like",
            "geopolygon",
            "resolution",
            "output_crs",
            "crs",
            "align",
            "x",
            "y",
            "lat",
            "lon",
        ] if k in kw
    }

    ds = datasets[0]
    product = ds.type

    if geobox is None:
        geobox = output_geobox(
            grid_spec=product.grid_spec,
            load_hints=product.load_hints(),
            **geo_keys,
            datasets=datasets,
        )
    elif len(geo_keys):
        warn(f"Supplied 'geobox=' parameter aliases {list(geo_keys)} inputs")

    grouped = Datacube.group_datasets(datasets, groupby)
    mm = product.lookup_measurements(measurements)
    return Datacube.load_data(
        grouped,
        geobox,
        mm,
        resampling=resampling,
        fuse_func=fuse_func,
        dask_chunks=chunks,
        skip_broken_datasets=skip_broken_datasets,
        progress_cbk=progress_cbk,
        **kw,
    )