def _product_group_(): # select only those inside the ROI # ROI could be smaller than the query for the `query` method if query_geopolygon(**search_terms) is not None: geopolygon = query_geopolygon(**search_terms) selected = list( select_datasets_inside_polygon(datasets.pile, geopolygon)) else: geopolygon = datasets.geopolygon selected = list(datasets.pile) # geobox merged = merge_search_terms( select_keys(self, self._NON_SPATIAL_KEYS), select_keys(search_terms, self._NON_SPATIAL_KEYS)) geobox = output_geobox(datasets=selected, grid_spec=datasets.grid_spec, geopolygon=geopolygon, **select_keys(merged, self._GEOBOX_KEYS)) # group by time group_query = query_group_by( **select_keys(merged, self._GROUPING_KEYS)) # information needed for Datacube.load_data return VirtualDatasetBox( Datacube.group_datasets(selected, group_query), geobox, datasets.product_definitions)
def __call__(self, index, product, time, group_by) -> Tile: # Do for a specific poly whose boundary is known output_crs = CRS(self.storage['crs']) filtered_items = [ 'geopolygon', 'lon', 'lat', 'longitude', 'latitude', 'x', 'y' ] filtered_dict = { k: v for k, v in self.input_region.items() if k in filtered_items } if self.feature is not None: filtered_dict['geopolygon'] = self.feature.geopolygon geopoly = filtered_dict['geopolygon'] else: geopoly = query_geopolygon(**self.input_region) dc = Datacube(index=index) datasets = dc.find_datasets(product=product, time=time, group_by=group_by, **filtered_dict) group_by = query_group_by(group_by=group_by) sources = dc.group_datasets(datasets, group_by) output_resolution = [ self.storage['resolution'][dim] for dim in output_crs.dimensions ] geopoly = geopoly.to_crs(output_crs) geobox = GeoBox.from_geopolygon(geopoly, resolution=output_resolution) return Tile(sources, geobox)
def __call__(self, product, time, group_by) -> Tile: # Do for a specific poly whose boundary is known output_crs = CRS(self.storage['crs']) filtered_item = [ 'geopolygon', 'lon', 'lat', 'longitude', 'latitude', 'x', 'y' ] filtered_dict = { k: v for k, v in filter(lambda t: t[0] in filtered_item, self.input_region.items()) } if 'feature_id' in self.input_region: filtered_dict['geopolygon'] = Geometry( self.input_region['geom_feat'], CRS(self.input_region['crs_txt'])) geopoly = filtered_dict['geopolygon'] else: geopoly = query_geopolygon(**self.input_region) datasets = self.dc.find_datasets(product=product, time=time, group_by=group_by, **filtered_dict) group_by = query_group_by(group_by=group_by) sources = self.dc.group_datasets(datasets, group_by) output_resolution = [ self.storage['resolution'][dim] for dim in output_crs.dimensions ] geopoly = geopoly.to_crs(output_crs) geobox = GeoBox.from_geopolygon(geopoly, resolution=output_resolution) return Tile(sources, geobox)
def group(self, datasets, **search_terms): # type: (QueryResult, Dict[str, Any]) -> DatasetPile """ Datasets grouped by their timestamps. :param datasets: the `QueryResult` to fetch data from :param query: to specify a spatial sub-region """ grid_spec = datasets.grid_spec if 'product' in self: # select only those inside the ROI # ROI could be smaller than the query for `query` spatial_query = reject_keys(search_terms, self._NON_SPATIAL_KEYS) selected = list( select_datasets_inside_polygon( datasets.pile, query_geopolygon(**spatial_query))) # geobox merged = merge_search_terms( select_keys(self, self._NON_SPATIAL_KEYS), select_keys(spatial_query, self._NON_SPATIAL_KEYS)) geobox = output_geobox(datasets=selected, grid_spec=grid_spec, **select_keys(merged, self._GEOBOX_KEYS), **spatial_query) # group by time group_query = query_group_by( **select_keys(merged, self._GROUPING_KEYS)) def wrap(_, value): return QueryResult(value, grid_spec) # information needed for Datacube.load_data return DatasetPile(Datacube.group_datasets(selected, group_query), geobox).map(wrap) elif 'transform' in self: return self._input.group(datasets, **search_terms) elif 'collate' in self: self._assert( len(datasets.pile) == len(self._children), "invalid dataset pile") def build(source_index, product, dataset_pile): grouped = product.group(dataset_pile, **search_terms) def tag(_, value): in_position = [ value if i == source_index else None for i, _ in enumerate(datasets.pile) ] return QueryResult(in_position, grid_spec) return grouped.map(tag) groups = [ build(source_index, product, dataset_pile) for source_index, (product, dataset_pile) in enumerate( zip(self._children, datasets.pile)) ] return DatasetPile( xarray.concat([grouped.pile for grouped in groups], dim='time'), select_unique([grouped.geobox for grouped in groups])) elif 'juxtapose' in self: self._assert( len(datasets.pile) == len(self._children), "invalid dataset pile") groups = [ product.group(datasets, **search_terms) for product, datasets in zip(self._children, datasets.pile) ] aligned_piles = xarray.align(*[grouped.pile for grouped in groups]) child_groups = [ DatasetPile(aligned_piles[i], grouped.geobox) for i, grouped in enumerate(groups) ] def tuplify(indexes, _): return QueryResult([ grouped.pile.sel(**indexes).item() for grouped in child_groups ], grid_spec) return DatasetPile( child_groups[0].map(tuplify).pile, select_unique([grouped.geobox for grouped in groups])) else: raise VirtualProductException("virtual product was not validated")
def group(self, datasets: VirtualDatasetBag, **search_terms: Dict[str, Any]) -> VirtualDatasetBox: """ Datasets grouped by their timestamps. :param datasets: the `VirtualDatasetBag` to fetch data from :param query: to specify a spatial sub-region """ grid_spec = datasets.grid_spec geopolygon = datasets.geopolygon if 'product' in self: # select only those inside the ROI # ROI could be smaller than the query for the `query` method if query_geopolygon(**search_terms) is not None: geopolygon = query_geopolygon(**search_terms) selected = list( select_datasets_inside_polygon(datasets.pile, geopolygon)) else: selected = list(datasets.pile) # geobox merged = merge_search_terms( select_keys(self, self._NON_SPATIAL_KEYS), select_keys(search_terms, self._NON_SPATIAL_KEYS)) geobox = output_geobox(datasets=selected, grid_spec=grid_spec, geopolygon=geopolygon, **select_keys(merged, self._GEOBOX_KEYS)) # group by time group_query = query_group_by( **select_keys(merged, self._GROUPING_KEYS)) # information needed for Datacube.load_data return VirtualDatasetBox( Datacube.group_datasets(selected, group_query), geobox, datasets.product_definitions) elif 'transform' in self: return self._input.group(datasets, **search_terms) elif 'collate' in self: self._assert( 'collate' in datasets.pile and len(datasets.pile['collate']) == len(self._children), "invalid dataset pile") def build(source_index, product, dataset_pile): grouped = product.group( VirtualDatasetBag(dataset_pile, datasets.grid_spec, datasets.geopolygon, datasets.product_definitions), **search_terms) def tag(_, value): return {'collate': (source_index, value)} return grouped.map(tag) groups = [ build(source_index, product, dataset_pile) for source_index, (product, dataset_pile) in enumerate( zip(self._children, datasets.pile['collate'])) ] return VirtualDatasetBox( xarray.concat([grouped.pile for grouped in groups], dim='time'), select_unique([grouped.geobox for grouped in groups]), merge_dicts( [grouped.product_definitions for grouped in groups])) elif 'juxtapose' in self: self._assert( 'juxtapose' in datasets.pile and len(datasets.pile['juxtapose']) == len(self._children), "invalid dataset pile") groups = [ product.group( VirtualDatasetBag(dataset_pile, datasets.grid_spec, datasets.geopolygon, datasets.product_definitions), **search_terms) for product, dataset_pile in zip( self._children, datasets.pile['juxtapose']) ] aligned_piles = xarray.align(*[grouped.pile for grouped in groups]) def tuplify(indexes, _): return { 'juxtapose': [pile.sel(**indexes).item() for pile in aligned_piles] } return VirtualDatasetBox( xr_apply(aligned_piles[0], tuplify), select_unique([grouped.geobox for grouped in groups]), merge_dicts( [grouped.product_definitions for grouped in groups])) else: raise VirtualProductException("virtual product was not validated")