def update( self, product_name: Optional[str], year: Optional[int] = None, month: Optional[int] = None, day: Optional[int] = None, generate_missing_children: Optional[bool] = True, force_refresh: Optional[bool] = False, ): """Update the given summary and return the new one""" product = self._product(product_name) get_child = self.get_or_update if generate_missing_children else self.get if year and month and day: # Don't store days, they're quick. return self._summariser.calculate_summary( product_name, _utils.as_time_range(year, month, day)) elif year and month: summary = self._summariser.calculate_summary( product_name, _utils.as_time_range(year, month)) elif year: summary = TimePeriodOverview.add_periods( get_child(product_name, year, month_, None, force_refresh=force_refresh) for month_ in range(1, 13)) elif product_name: if product.dataset_count > 0: years = range(product.time_earliest.year, product.time_latest.year + 1) else: years = [] summary = TimePeriodOverview.add_periods( get_child(product_name, year_, None, None, force_refresh=force_refresh) for year_ in years) else: summary = TimePeriodOverview.add_periods( get_child(product.name, None, None, None, force_refresh=force_refresh) for product in self.all_dataset_types()) self._do_put(product_name, year, month, day, summary) for listener in self._update_listeners: listener(product_name, year, month, day, summary) return summary
def datasets_geojson(product_name: str, year: int = None, month: int = None, day: int = None): limit = request.args.get("limit", default=500, type=int) if limit > _MAX_DATASET_RETURN: limit = _MAX_DATASET_RETURN time = _utils.as_time_range(year, month, day, tzinfo=_model.STORE.grouping_timezone) return as_geojson( dict( type="FeatureCollection", features=[ s.as_geojson() for s in _model.STORE.search_items( product_names=[product_name], time=time, limit=limit, order=ItemSort.UNSORTED, ) if s.geom_geojson is not None ], ))
def get_dataset_footprints(self, product_name: Optional[str], year: Optional[int] = None, month: Optional[int] = None, day: Optional[int] = None, limit: int = 500) -> Dict: """ Return a GeoJSON FeatureCollection of each dataset footprint in the time range. Each Dataset is a separate GeoJSON Feature (with embedded properties for id and tile/grid). """ params = {} if year: params['time'] = _utils.as_time_range( year, month, day, tzinfo=self.grouping_timezone, ) # Our table. Faster, but doesn't yet have some fields (labels etc). TODO # return self._summariser.get_dataset_footprints( # product_name, # time_range, # limit # ) datasets = self.index.datasets.search(limit=limit, product=product_name, **params) return _datasets_to_feature(datasets)
def datasets_geojson(product_name: str, year: int = None, month: int = None, day: int = None): limit = request.args.get( "limit", default=flask.current_app.config["CUBEDASH_DEFAULT_API_LIMIT"], type=int, ) hard_limit = flask.current_app.config["CUBEDASH_HARD_API_LIMIT"] if limit > hard_limit: limit = hard_limit time = _utils.as_time_range(year, month, day, tzinfo=_model.STORE.grouping_timezone) return as_geojson( dict( type="FeatureCollection", features=[ s.as_geojson() for s in _model.STORE.search_items( product_names=[product_name], time=time, limit=limit, order=ItemSort.UNSORTED, ) if s.geom_geojson is not None ], ), downloadable_filename_prefix=_utils.api_path_as_filename_prefix(), )
def find_datasets_for_region(self, product_name: str, region_code: str, year: int, month: int, day: int, limit: int) -> Iterable[Dataset]: time_range = _utils.as_time_range( year, month, day, tzinfo=self.grouping_timezone, ) return _extents.datasets_by_region(self._engine, self.index, product_name, region_code, time_range, limit)
def calculate_summary( self, product_name: str, year_month_day: Tuple[Optional[int], Optional[int], Optional[int]], product_refresh_time: datetime, ) -> TimePeriodOverview: """ Create a summary of the given product/time range. """ time = _utils.as_time_range(*year_month_day) log = self.log.bind(product_name=product_name, time=time) log.debug("summary.query") begin_time, end_time, where_clause = self._where(product_name, time) select_by_srid = (select(( func.ST_SRID(DATASET_SPATIAL.c.footprint).label("srid"), func.count().label("dataset_count"), func.ST_Transform( func.ST_Union(DATASET_SPATIAL.c.footprint), self._target_srid(), type_=Geometry(), ).label("footprint_geometry"), func.sum(DATASET_SPATIAL.c.size_bytes).label("size_bytes"), func.max(DATASET_SPATIAL.c.creation_time).label( "newest_dataset_creation_time"), )).where(where_clause).group_by("srid").alias("srid_summaries")) # Union all srid groups into one summary. result = self._engine.execute( select(( func.sum( select_by_srid.c.dataset_count).label("dataset_count"), func.array_agg(select_by_srid.c.srid).label("srids"), func.sum(select_by_srid.c.size_bytes).label("size_bytes"), func.ST_Union( func.ST_Buffer(select_by_srid.c.footprint_geometry, 0), type_=Geometry(srid=self._target_srid()), ).label("footprint_geometry"), func.max(select_by_srid.c.newest_dataset_creation_time).label( "newest_dataset_creation_time"), func.now().label("summary_gen_time"), ))) rows = result.fetchall() log.debug("summary.query.done", srid_rows=len(rows)) assert len(rows) == 1 row = dict(rows[0]) row["dataset_count"] = int( row["dataset_count"]) if row["dataset_count"] else 0 if row["footprint_geometry"] is not None: row["footprint_crs"] = self._get_srid_name( row["footprint_geometry"].srid) row["footprint_geometry"] = geo_shape.to_shape( row["footprint_geometry"]) else: row["footprint_crs"] = None row["crses"] = None if row["srids"] is not None: row["crses"] = {self._get_srid_name(s) for s in row["srids"]} del row["srids"] # Convert from Python Decimal if row["size_bytes"] is not None: row["size_bytes"] = int(row["size_bytes"]) has_data = row["dataset_count"] > 0 log.debug("counter.calc") # Initialise all requested days as zero day_counts = Counter({ d.date(): 0 for d in pd.date_range(begin_time, end_time, closed="left") }) region_counts = Counter() if has_data: day_counts.update( Counter({ day.date(): count for day, count in self._engine.execute( select([ func.date_trunc( "day", DATASET_SPATIAL.c.center_time.op( "AT TIME ZONE")(self.grouping_time_zone), ).label("day"), func.count(), ]).where(where_clause).group_by("day")) })) region_counts = Counter({ item: count for item, count in self._engine.execute( select([ DATASET_SPATIAL.c.region_code.label("region_code"), func.count(), ]).where(where_clause).group_by("region_code")) }) if product_refresh_time is None: raise RuntimeError( "Internal error: Newly-made time summaries should " "not have a null product refresh time.") year, month, day = year_month_day summary = TimePeriodOverview( **row, product_name=product_name, year=year, month=month, day=day, product_refresh_time=product_refresh_time, timeline_period="day", time_range=Range(begin_time, end_time), timeline_dataset_counts=day_counts, region_dataset_counts=region_counts, # TODO: filter invalid from the counts? footprint_count=row["dataset_count"] or 0, ) log.debug( "summary.calc.done", dataset_count=summary.dataset_count, footprints_missing=summary.dataset_count - summary.footprint_count, ) return summary