Ejemplo n.º 1
0
    def update(
        self,
        product_name: Optional[str],
        year: Optional[int] = None,
        month: Optional[int] = None,
        day: Optional[int] = None,
        generate_missing_children: Optional[bool] = True,
        force_refresh: Optional[bool] = False,
    ):
        """Update the given summary and return the new one"""
        product = self._product(product_name)
        get_child = self.get_or_update if generate_missing_children else self.get

        if year and month and day:
            # Don't store days, they're quick.
            return self._summariser.calculate_summary(
                product_name, _utils.as_time_range(year, month, day))
        elif year and month:
            summary = self._summariser.calculate_summary(
                product_name, _utils.as_time_range(year, month))
        elif year:
            summary = TimePeriodOverview.add_periods(
                get_child(product_name,
                          year,
                          month_,
                          None,
                          force_refresh=force_refresh)
                for month_ in range(1, 13))
        elif product_name:
            if product.dataset_count > 0:
                years = range(product.time_earliest.year,
                              product.time_latest.year + 1)
            else:
                years = []
            summary = TimePeriodOverview.add_periods(
                get_child(product_name,
                          year_,
                          None,
                          None,
                          force_refresh=force_refresh) for year_ in years)
        else:
            summary = TimePeriodOverview.add_periods(
                get_child(product.name,
                          None,
                          None,
                          None,
                          force_refresh=force_refresh)
                for product in self.all_dataset_types())

        self._do_put(product_name, year, month, day, summary)

        for listener in self._update_listeners:
            listener(product_name, year, month, day, summary)
        return summary
def datasets_geojson(product_name: str,
                     year: int = None,
                     month: int = None,
                     day: int = None):
    limit = request.args.get("limit", default=500, type=int)
    if limit > _MAX_DATASET_RETURN:
        limit = _MAX_DATASET_RETURN

    time = _utils.as_time_range(year,
                                month,
                                day,
                                tzinfo=_model.STORE.grouping_timezone)

    return as_geojson(
        dict(
            type="FeatureCollection",
            features=[
                s.as_geojson() for s in _model.STORE.search_items(
                    product_names=[product_name],
                    time=time,
                    limit=limit,
                    order=ItemSort.UNSORTED,
                ) if s.geom_geojson is not None
            ],
        ))
Ejemplo n.º 3
0
    def get_dataset_footprints(self,
                               product_name: Optional[str],
                               year: Optional[int] = None,
                               month: Optional[int] = None,
                               day: Optional[int] = None,
                               limit: int = 500) -> Dict:
        """
        Return a GeoJSON FeatureCollection of each dataset footprint in the time range.

        Each Dataset is a separate GeoJSON Feature (with embedded properties for id and tile/grid).
        """
        params = {}
        if year:
            params['time'] = _utils.as_time_range(
                year,
                month,
                day,
                tzinfo=self.grouping_timezone,
            )

        # Our table. Faster, but doesn't yet have some fields (labels etc). TODO
        # return self._summariser.get_dataset_footprints(
        #     product_name,
        #     time_range,
        #     limit
        # )

        datasets = self.index.datasets.search(limit=limit,
                                              product=product_name,
                                              **params)
        return _datasets_to_feature(datasets)
Ejemplo n.º 4
0
def datasets_geojson(product_name: str,
                     year: int = None,
                     month: int = None,
                     day: int = None):
    limit = request.args.get(
        "limit",
        default=flask.current_app.config["CUBEDASH_DEFAULT_API_LIMIT"],
        type=int,
    )
    hard_limit = flask.current_app.config["CUBEDASH_HARD_API_LIMIT"]
    if limit > hard_limit:
        limit = hard_limit

    time = _utils.as_time_range(year,
                                month,
                                day,
                                tzinfo=_model.STORE.grouping_timezone)

    return as_geojson(
        dict(
            type="FeatureCollection",
            features=[
                s.as_geojson() for s in _model.STORE.search_items(
                    product_names=[product_name],
                    time=time,
                    limit=limit,
                    order=ItemSort.UNSORTED,
                ) if s.geom_geojson is not None
            ],
        ),
        downloadable_filename_prefix=_utils.api_path_as_filename_prefix(),
    )
Ejemplo n.º 5
0
    def find_datasets_for_region(self, product_name: str, region_code: str,
                                 year: int, month: int, day: int,
                                 limit: int) -> Iterable[Dataset]:

        time_range = _utils.as_time_range(
            year,
            month,
            day,
            tzinfo=self.grouping_timezone,
        )
        return _extents.datasets_by_region(self._engine, self.index,
                                           product_name, region_code,
                                           time_range, limit)
    def calculate_summary(
        self,
        product_name: str,
        year_month_day: Tuple[Optional[int], Optional[int], Optional[int]],
        product_refresh_time: datetime,
    ) -> TimePeriodOverview:
        """
        Create a summary of the given product/time range.
        """
        time = _utils.as_time_range(*year_month_day)
        log = self.log.bind(product_name=product_name, time=time)
        log.debug("summary.query")

        begin_time, end_time, where_clause = self._where(product_name, time)
        select_by_srid = (select((
            func.ST_SRID(DATASET_SPATIAL.c.footprint).label("srid"),
            func.count().label("dataset_count"),
            func.ST_Transform(
                func.ST_Union(DATASET_SPATIAL.c.footprint),
                self._target_srid(),
                type_=Geometry(),
            ).label("footprint_geometry"),
            func.sum(DATASET_SPATIAL.c.size_bytes).label("size_bytes"),
            func.max(DATASET_SPATIAL.c.creation_time).label(
                "newest_dataset_creation_time"),
        )).where(where_clause).group_by("srid").alias("srid_summaries"))

        # Union all srid groups into one summary.
        result = self._engine.execute(
            select((
                func.sum(
                    select_by_srid.c.dataset_count).label("dataset_count"),
                func.array_agg(select_by_srid.c.srid).label("srids"),
                func.sum(select_by_srid.c.size_bytes).label("size_bytes"),
                func.ST_Union(
                    func.ST_Buffer(select_by_srid.c.footprint_geometry, 0),
                    type_=Geometry(srid=self._target_srid()),
                ).label("footprint_geometry"),
                func.max(select_by_srid.c.newest_dataset_creation_time).label(
                    "newest_dataset_creation_time"),
                func.now().label("summary_gen_time"),
            )))

        rows = result.fetchall()
        log.debug("summary.query.done", srid_rows=len(rows))

        assert len(rows) == 1
        row = dict(rows[0])
        row["dataset_count"] = int(
            row["dataset_count"]) if row["dataset_count"] else 0
        if row["footprint_geometry"] is not None:
            row["footprint_crs"] = self._get_srid_name(
                row["footprint_geometry"].srid)
            row["footprint_geometry"] = geo_shape.to_shape(
                row["footprint_geometry"])
        else:
            row["footprint_crs"] = None
        row["crses"] = None
        if row["srids"] is not None:
            row["crses"] = {self._get_srid_name(s) for s in row["srids"]}
        del row["srids"]

        # Convert from Python Decimal
        if row["size_bytes"] is not None:
            row["size_bytes"] = int(row["size_bytes"])

        has_data = row["dataset_count"] > 0

        log.debug("counter.calc")

        # Initialise all requested days as zero
        day_counts = Counter({
            d.date(): 0
            for d in pd.date_range(begin_time, end_time, closed="left")
        })
        region_counts = Counter()
        if has_data:
            day_counts.update(
                Counter({
                    day.date(): count
                    for day, count in self._engine.execute(
                        select([
                            func.date_trunc(
                                "day",
                                DATASET_SPATIAL.c.center_time.op(
                                    "AT TIME ZONE")(self.grouping_time_zone),
                            ).label("day"),
                            func.count(),
                        ]).where(where_clause).group_by("day"))
                }))
            region_counts = Counter({
                item: count
                for item, count in self._engine.execute(
                    select([
                        DATASET_SPATIAL.c.region_code.label("region_code"),
                        func.count(),
                    ]).where(where_clause).group_by("region_code"))
            })

        if product_refresh_time is None:
            raise RuntimeError(
                "Internal error: Newly-made time summaries should "
                "not have a null product refresh time.")

        year, month, day = year_month_day
        summary = TimePeriodOverview(
            **row,
            product_name=product_name,
            year=year,
            month=month,
            day=day,
            product_refresh_time=product_refresh_time,
            timeline_period="day",
            time_range=Range(begin_time, end_time),
            timeline_dataset_counts=day_counts,
            region_dataset_counts=region_counts,
            # TODO: filter invalid from the counts?
            footprint_count=row["dataset_count"] or 0,
        )

        log.debug(
            "summary.calc.done",
            dataset_count=summary.dataset_count,
            footprints_missing=summary.dataset_count - summary.footprint_count,
        )
        return summary