Example #1
0
        def _product_group_():
            # select only those inside the ROI
            # ROI could be smaller than the query for the `query` method

            if query_geopolygon(**search_terms) is not None:
                geopolygon = query_geopolygon(**search_terms)
                selected = list(
                    select_datasets_inside_polygon(datasets.pile, geopolygon))
            else:
                geopolygon = datasets.geopolygon
                selected = list(datasets.pile)

            # geobox
            merged = merge_search_terms(
                select_keys(self, self._NON_SPATIAL_KEYS),
                select_keys(search_terms, self._NON_SPATIAL_KEYS))

            geobox = output_geobox(datasets=selected,
                                   grid_spec=datasets.grid_spec,
                                   geopolygon=geopolygon,
                                   **select_keys(merged, self._GEOBOX_KEYS))

            # group by time
            group_query = query_group_by(
                **select_keys(merged, self._GROUPING_KEYS))

            # information needed for Datacube.load_data
            return VirtualDatasetBox(
                Datacube.group_datasets(selected, group_query), geobox,
                datasets.product_definitions)
Example #2
0
    def __call__(self, index, product, time, group_by) -> Tile:
        # Do for a specific poly whose boundary is known
        output_crs = CRS(self.storage['crs'])
        filtered_items = [
            'geopolygon', 'lon', 'lat', 'longitude', 'latitude', 'x', 'y'
        ]
        filtered_dict = {
            k: v
            for k, v in self.input_region.items() if k in filtered_items
        }
        if self.feature is not None:
            filtered_dict['geopolygon'] = self.feature.geopolygon
            geopoly = filtered_dict['geopolygon']
        else:
            geopoly = query_geopolygon(**self.input_region)

        dc = Datacube(index=index)
        datasets = dc.find_datasets(product=product,
                                    time=time,
                                    group_by=group_by,
                                    **filtered_dict)
        group_by = query_group_by(group_by=group_by)
        sources = dc.group_datasets(datasets, group_by)
        output_resolution = [
            self.storage['resolution'][dim] for dim in output_crs.dimensions
        ]
        geopoly = geopoly.to_crs(output_crs)
        geobox = GeoBox.from_geopolygon(geopoly, resolution=output_resolution)

        return Tile(sources, geobox)
Example #3
0
    def __call__(self, product, time, group_by) -> Tile:
        # Do for a specific poly whose boundary is known
        output_crs = CRS(self.storage['crs'])
        filtered_item = [
            'geopolygon', 'lon', 'lat', 'longitude', 'latitude', 'x', 'y'
        ]
        filtered_dict = {
            k: v
            for k, v in filter(lambda t: t[0] in filtered_item,
                               self.input_region.items())
        }
        if 'feature_id' in self.input_region:
            filtered_dict['geopolygon'] = Geometry(
                self.input_region['geom_feat'],
                CRS(self.input_region['crs_txt']))
            geopoly = filtered_dict['geopolygon']
        else:
            geopoly = query_geopolygon(**self.input_region)
        datasets = self.dc.find_datasets(product=product,
                                         time=time,
                                         group_by=group_by,
                                         **filtered_dict)
        group_by = query_group_by(group_by=group_by)
        sources = self.dc.group_datasets(datasets, group_by)
        output_resolution = [
            self.storage['resolution'][dim] for dim in output_crs.dimensions
        ]
        geopoly = geopoly.to_crs(output_crs)
        geobox = GeoBox.from_geopolygon(geopoly, resolution=output_resolution)

        return Tile(sources, geobox)
Example #4
0
    def group(self, datasets, **search_terms):
        # type: (QueryResult, Dict[str, Any]) -> DatasetPile
        """
        Datasets grouped by their timestamps.
        :param datasets: the `QueryResult` to fetch data from
        :param query: to specify a spatial sub-region
        """
        grid_spec = datasets.grid_spec

        if 'product' in self:
            # select only those inside the ROI
            # ROI could be smaller than the query for `query`
            spatial_query = reject_keys(search_terms, self._NON_SPATIAL_KEYS)
            selected = list(
                select_datasets_inside_polygon(
                    datasets.pile, query_geopolygon(**spatial_query)))

            # geobox
            merged = merge_search_terms(
                select_keys(self, self._NON_SPATIAL_KEYS),
                select_keys(spatial_query, self._NON_SPATIAL_KEYS))

            geobox = output_geobox(datasets=selected,
                                   grid_spec=grid_spec,
                                   **select_keys(merged, self._GEOBOX_KEYS),
                                   **spatial_query)

            # group by time
            group_query = query_group_by(
                **select_keys(merged, self._GROUPING_KEYS))

            def wrap(_, value):
                return QueryResult(value, grid_spec)

            # information needed for Datacube.load_data
            return DatasetPile(Datacube.group_datasets(selected, group_query),
                               geobox).map(wrap)

        elif 'transform' in self:
            return self._input.group(datasets, **search_terms)

        elif 'collate' in self:
            self._assert(
                len(datasets.pile) == len(self._children),
                "invalid dataset pile")

            def build(source_index, product, dataset_pile):
                grouped = product.group(dataset_pile, **search_terms)

                def tag(_, value):
                    in_position = [
                        value if i == source_index else None
                        for i, _ in enumerate(datasets.pile)
                    ]
                    return QueryResult(in_position, grid_spec)

                return grouped.map(tag)

            groups = [
                build(source_index, product, dataset_pile)
                for source_index, (product, dataset_pile) in enumerate(
                    zip(self._children, datasets.pile))
            ]

            return DatasetPile(
                xarray.concat([grouped.pile for grouped in groups],
                              dim='time'),
                select_unique([grouped.geobox for grouped in groups]))

        elif 'juxtapose' in self:
            self._assert(
                len(datasets.pile) == len(self._children),
                "invalid dataset pile")

            groups = [
                product.group(datasets, **search_terms)
                for product, datasets in zip(self._children, datasets.pile)
            ]

            aligned_piles = xarray.align(*[grouped.pile for grouped in groups])
            child_groups = [
                DatasetPile(aligned_piles[i], grouped.geobox)
                for i, grouped in enumerate(groups)
            ]

            def tuplify(indexes, _):
                return QueryResult([
                    grouped.pile.sel(**indexes).item()
                    for grouped in child_groups
                ], grid_spec)

            return DatasetPile(
                child_groups[0].map(tuplify).pile,
                select_unique([grouped.geobox for grouped in groups]))

        else:
            raise VirtualProductException("virtual product was not validated")
Example #5
0
    def group(self, datasets: VirtualDatasetBag,
              **search_terms: Dict[str, Any]) -> VirtualDatasetBox:
        """
        Datasets grouped by their timestamps.
        :param datasets: the `VirtualDatasetBag` to fetch data from
        :param query: to specify a spatial sub-region
        """
        grid_spec = datasets.grid_spec
        geopolygon = datasets.geopolygon

        if 'product' in self:
            # select only those inside the ROI
            # ROI could be smaller than the query for the `query` method
            if query_geopolygon(**search_terms) is not None:
                geopolygon = query_geopolygon(**search_terms)
                selected = list(
                    select_datasets_inside_polygon(datasets.pile, geopolygon))
            else:
                selected = list(datasets.pile)

            # geobox
            merged = merge_search_terms(
                select_keys(self, self._NON_SPATIAL_KEYS),
                select_keys(search_terms, self._NON_SPATIAL_KEYS))

            geobox = output_geobox(datasets=selected,
                                   grid_spec=grid_spec,
                                   geopolygon=geopolygon,
                                   **select_keys(merged, self._GEOBOX_KEYS))

            # group by time
            group_query = query_group_by(
                **select_keys(merged, self._GROUPING_KEYS))

            # information needed for Datacube.load_data
            return VirtualDatasetBox(
                Datacube.group_datasets(selected, group_query), geobox,
                datasets.product_definitions)

        elif 'transform' in self:
            return self._input.group(datasets, **search_terms)

        elif 'collate' in self:
            self._assert(
                'collate' in datasets.pile
                and len(datasets.pile['collate']) == len(self._children),
                "invalid dataset pile")

            def build(source_index, product, dataset_pile):
                grouped = product.group(
                    VirtualDatasetBag(dataset_pile, datasets.grid_spec,
                                      datasets.geopolygon,
                                      datasets.product_definitions),
                    **search_terms)

                def tag(_, value):
                    return {'collate': (source_index, value)}

                return grouped.map(tag)

            groups = [
                build(source_index, product, dataset_pile)
                for source_index, (product, dataset_pile) in enumerate(
                    zip(self._children, datasets.pile['collate']))
            ]

            return VirtualDatasetBox(
                xarray.concat([grouped.pile for grouped in groups],
                              dim='time'),
                select_unique([grouped.geobox for grouped in groups]),
                merge_dicts(
                    [grouped.product_definitions for grouped in groups]))

        elif 'juxtapose' in self:
            self._assert(
                'juxtapose' in datasets.pile
                and len(datasets.pile['juxtapose']) == len(self._children),
                "invalid dataset pile")

            groups = [
                product.group(
                    VirtualDatasetBag(dataset_pile, datasets.grid_spec,
                                      datasets.geopolygon,
                                      datasets.product_definitions),
                    **search_terms) for product, dataset_pile in zip(
                        self._children, datasets.pile['juxtapose'])
            ]

            aligned_piles = xarray.align(*[grouped.pile for grouped in groups])

            def tuplify(indexes, _):
                return {
                    'juxtapose':
                    [pile.sel(**indexes).item() for pile in aligned_piles]
                }

            return VirtualDatasetBox(
                xr_apply(aligned_piles[0], tuplify),
                select_unique([grouped.geobox for grouped in groups]),
                merge_dicts(
                    [grouped.product_definitions for grouped in groups]))

        else:
            raise VirtualProductException("virtual product was not validated")