Example #1
0
    def get_tile(self, tile_x: int, tile_y: int) -> Tile:

        measure_time = self.measure_time
        tile_id = self.get_tile_id(tile_x, tile_y)
        tile_tag = self.__get_tile_tag(tile_id)
        tile_cache = self._tile_cache

        if tile_cache:
            with measure_time(tile_tag + 'queried in tile cache'):
                tile = tile_cache.get_value(tile_id)
            if tile is not None:
                if self._trace_perf:
                    LOG.info(tile_tag + 'restored from tile cache')
                return tile

        with measure_time(tile_tag + 'computed'):
            tw, th = self.tile_size
            tile = self.compute_tile(tile_x, tile_y,
                                     (tw * tile_x, th * tile_y, tw, th))

        if tile_cache:
            with measure_time(tile_tag + 'stored in tile cache'):
                tile_cache.put_value(tile_id, tile)

        return tile
Example #2
0
    async def get(self):

        prefix = self.get_query_argument('prefix', default=None)
        delimiter = self.get_query_argument('delimiter', default=None)
        max_keys = int(self.get_query_argument('max-keys', default='1000'))
        list_s3_bucket_params = dict(prefix=prefix, delimiter=delimiter,
                                     max_keys=max_keys)

        list_type = self.get_query_argument('list-type', default=None)
        if list_type is None:
            marker = self.get_query_argument('marker', default=None)
            list_s3_bucket_params.update(marker=marker)
            list_s3_bucket = list_s3_bucket_v1
        elif list_type == '2':
            start_after = self.get_query_argument('start-after', default=None)
            continuation_token = self.get_query_argument('continuation-token', default=None)
            list_s3_bucket_params.update(start_after=start_after, continuation_token=continuation_token)
            list_s3_bucket = list_s3_bucket_v2
        else:
            raise ServiceBadRequestError(f'Unknown bucket list type {list_type!r}')

        if _LOG_S3BUCKET_HANDLER:
            LOG.info(f'GET: list_s3_bucket_params={list_s3_bucket_params}')
        bucket_mapping = self.service_context.get_s3_bucket_mapping()
        list_bucket_result = list_s3_bucket(bucket_mapping, **list_s3_bucket_params)
        if _LOG_S3BUCKET_HANDLER:
            import json
            LOG.info(f'-->\n{json.dumps(list_bucket_result, indent=2)}')

        xml = list_bucket_result_to_xml(list_bucket_result)
        self.set_header('Content-Type', 'application/xml')
        self.write(xml)
        await self.flush()
Example #3
0
    def _maybe_load_config(self):
        config_file = self.config_file
        if config_file is None:
            return

        try:
            stat = os.stat(config_file)
        except OSError as e:
            if self.config_error is None:
                LOG.error(f'configuration file {config_file!r}: {e}')
                self.config_error = e
            return

        if self.context.config_mtime != stat.st_mtime:
            self.context.config_mtime = stat.st_mtime
            try:
                with open(config_file, encoding='utf-8') as stream:
                    self.context.config = yaml.safe_load(stream)
                self.config_error = None
                LOG.info(
                    f'configuration file {config_file!r} successfully loaded')
            except (yaml.YAMLError, OSError) as e:
                if self.config_error is None:
                    LOG.error(f'configuration file {config_file!r}: {e}')
                    self.config_error = e
Example #4
0
 def start(self):
     address = self.service_info['address']
     port = self.service_info['port']
     test_url = self.context.get_service_url(f"http://{address}:{port}", "datasets")
     LOG.info(f'service running, listening on {address}:{port}, try {test_url}')
     LOG.info(f'press CTRL+C to stop service')
     if len(self.context.config.get('Datasets', {})) == 0:
         LOG.warning('no datasets configured')
     tornado.ioloop.PeriodicCallback(self._try_shutdown, 100).start()
     IOLoop.current().start()
Example #5
0
def get_time_series_for_feature_collection(ctx: ServiceContext,
                                           ds_name: str,
                                           var_name: str,
                                           feature_collection: Dict,
                                           start_date: np.datetime64 = None,
                                           end_date: np.datetime64 = None,
                                           include_count: bool = False,
                                           include_stdev: bool = False,
                                           max_valids: int = None) -> Dict:
    """
    Get the time-series for the geometries of a given *feature_collection*.

    :param ctx: Service context object
    :param ds_name: The dataset identifier.
    :param var_name: The variable name.
    :param feature_collection: The feature collection.
    :param start_date: An optional start date.
    :param end_date: An optional end date.
    :param include_count: Whether to include the valid number of observations in the result.
    :param include_stdev: Whether to include the standard deviation in the result.
    :param max_valids: Optional number of valid points.
           If it is None (default), also missing values are returned as NaN;
           if it is -1 only valid values are returned;
           if it is a positive integer, the most recent valid values are returned.
    :return: Time-series data structure.
    """
    dataset = ctx.get_time_series_dataset(ds_name, var_name=var_name)
    features = GeoJSON.get_feature_collection_features(feature_collection)
    if features is None:
        raise ServiceBadRequestError("Invalid GeoJSON feature collection")
    shapes = []
    for feature in features:
        geometry = GeoJSON.get_feature_geometry(feature)
        try:
            geometry = shapely.geometry.shape(geometry)
        except (TypeError, ValueError) as e:
            raise ServiceBadRequestError(
                "Invalid GeoJSON feature collection") from e
        shapes.append(geometry)
    with measure_time() as time_result:
        result = _get_time_series_for_geometries(dataset,
                                                 var_name,
                                                 shapes,
                                                 start_date=start_date,
                                                 end_date=end_date,
                                                 include_count=include_count,
                                                 include_stdev=include_stdev,
                                                 max_valids=max_valids)
    if ctx.trace_perf:
        LOG.info(
            f'get_time_series_for_feature_collection: dataset id {ds_name}, variable {var_name},'
            f'size={len(result["results"])}, took {time_result.duration} seconds'
        )
    return result
Example #6
0
def _find_places(ctx: ServiceContext,
                 place_group_id: str,
                 base_url: str,
                 query_geometry: shapely.geometry.base.BaseGeometry = None,
                 query_expr: Any = None,
                 comb_op: str = "and") -> GeoJsonFeatureCollection:
    with measure_time() as cm:
        features = __find_places(ctx, place_group_id, base_url, query_geometry,
                                 query_expr, comb_op)
    LOG.info(f"{len(features)} places found within {cm.duration} seconds")
    return features
Example #7
0
 async def head(self, ds_id: str, path: str = ''):
     key, local_path = self._get_key_and_local_path(ds_id, path)
     if _LOG_S3BUCKET_HANDLER:
         LOG.info(f'HEAD: key={key!r}, local_path={local_path!r}')
     if local_path is None or not local_path.exists():
         await self._key_not_found(key)
         return
     self.set_header('ETag', str_to_etag(str(local_path)))
     self.set_header('Last-Modified', mtime_to_str(local_path.stat().st_mtime))
     if local_path.is_file():
         self.set_header('Content-Length', local_path.stat().st_size)
     else:
         self.set_header('Content-Length', 0)
     await self.finish()
Example #8
0
def get_time_series_for_geometry(ctx: ServiceContext,
                                 ds_name: str,
                                 var_name: str,
                                 geometry: Dict,
                                 start_date: np.datetime64 = None,
                                 end_date: np.datetime64 = None,
                                 include_count: bool = False,
                                 include_stdev: bool = False,
                                 max_valids: int = None) -> Dict:
    """
    Get the time-series for a given *geometry*.

    :param ctx: Service context object
    :param ds_name: The dataset identifier.
    :param var_name: The variable name.
    :param geometry: The geometry, usually a point or polygon.
    :param start_date: An optional start date.
    :param end_date: An optional end date.
    :param include_count: Whether to include the valid number of observations in the result.
    :param include_stdev: Whether to include the standard deviation in the result.
    :param max_valids: Optional number of valid points.
           If it is None (default), also missing values are returned as NaN;
           if it is -1 only valid values are returned;
           if it is a positive integer, the most recent valid values are returned.
    :return: Time-series data structure.
    """
    dataset = ctx.get_time_series_dataset(ds_name, var_name=var_name)
    if not GeoJSON.is_geometry(geometry):
        raise ServiceBadRequestError("Invalid GeoJSON geometry")
    if isinstance(geometry, dict):
        geometry = shapely.geometry.shape(geometry)
    with measure_time() as time_result:
        result = _get_time_series_for_geometry(dataset,
                                               var_name,
                                               geometry,
                                               start_date=start_date,
                                               end_date=end_date,
                                               include_count=include_count,
                                               include_stdev=include_stdev,
                                               max_valids=max_valids)

    if ctx.trace_perf:
        LOG.info(
            f'get_time_series_for_geometry: dataset id {ds_name}, variable {var_name}, '
            f'geometry type {geometry},'
            f'size={len(result["results"])}, took {time_result.duration} seconds'
        )
    return result
Example #9
0
    def _on_shutdown(self):

        LOG.info('stopping service...')

        # noinspection PyUnresolvedReferences,PyBroadException
        try:
            self.update_timer.cancel()
        except Exception:
            pass

        if self.server:
            self.server.stop()
            self.server = None

        IOLoop.current().stop()
        LOG.info('service stopped.')
Example #10
0
    def get_dataset_configs_from_stores(self) \
            -> List[DatasetConfigDict]:

        data_store_pool = self.get_data_store_pool()
        if data_store_pool is None:
            return []

        all_dataset_configs: List[DatasetConfigDict] = []
        for store_instance_id in data_store_pool.store_instance_ids:
            LOG.info(f'scanning store {store_instance_id!r}')
            data_store_config = data_store_pool.get_store_config(
                store_instance_id)
            data_store = data_store_pool.get_store(store_instance_id)
            store_dataset_ids = data_store.get_data_ids(data_type=DATASET_TYPE)
            for store_dataset_id in store_dataset_ids:
                dataset_config_base = {}
                store_dataset_configs: List[DatasetConfigDict] \
                    = data_store_config.user_data
                if store_dataset_configs:
                    for store_dataset_config in store_dataset_configs:
                        dataset_id_pattern = store_dataset_config.get(
                            'Path', '*')
                        if fnmatch.fnmatch(store_dataset_id,
                                           dataset_id_pattern):
                            dataset_config_base = store_dataset_config
                            break
                        else:
                            dataset_config_base = None
                if dataset_config_base is not None:
                    LOG.debug(f'selected dataset {store_dataset_id!r}')
                    dataset_config = dict(StoreInstanceId=store_instance_id,
                                          **dataset_config_base)
                    dataset_config['Path'] = store_dataset_id
                    dataset_config['Identifier'] = \
                        f'{store_instance_id}{STORE_DS_ID_SEPARATOR}' \
                        f'{store_dataset_id}'
                    all_dataset_configs.append(dataset_config)

        # Just for testing:
        debug_file = 'all_dataset_configs.json'
        with open(debug_file, 'w') as stream:
            json.dump(all_dataset_configs, stream)
            LOG.debug(f'wrote file {debug_file!r}')

        return all_dataset_configs
Example #11
0
def get_time_series_for_point(ctx: ServiceContext,
                              ds_name: str,
                              var_name: str,
                              lon: float,
                              lat: float,
                              start_date: np.datetime64 = None,
                              end_date: np.datetime64 = None,
                              max_valids: int = None) -> Dict:
    """
    Get the time-series for a given point.

    :param ctx: Service context object
    :param ds_name: The dataset identifier.
    :param var_name: The variable name.
    :param lon: The point's longitude in decimal degrees.
    :param lat: The point's latitude in decimal degrees.
    :param start_date: An optional start date.
    :param end_date: An optional end date.
    :param max_valids: Optional number of valid points.
           If it is None (default), also missing values are returned as NaN;
           if it is -1 only valid values are returned;
           if it is a positive integer, the most recent valid values are returned.
    :return: Time-series data structure.
    """
    dataset = ctx.get_time_series_dataset(ds_name, var_name=var_name)
    with measure_time() as time_result:
        result = _get_time_series_for_point(dataset,
                                            var_name,
                                            shapely.geometry.Point(lon, lat),
                                            start_date=start_date,
                                            end_date=end_date,
                                            max_valids=max_valids)
    if ctx.trace_perf:
        LOG.info(
            f'get_time_series_for_point:: dataset id {ds_name}, variable {var_name}, '
            f'geometry type {shapely.geometry.Point(lon, lat)}, size={len(result["results"])}, '
            f'took {time_result.duration} seconds')
    return result
Example #12
0
    def _maybe_load_config(self):
        config_file = self.config_file
        if config_file is None:
            return

        try:
            stat = os.stat(config_file)
        except OSError as e:
            if self.config_error is None:
                LOG.error(f'configuration file {config_file!r}: {e}')
                self.config_error = e
            return

        if self.context.config_mtime != stat.st_mtime:
            self.context.config_mtime = stat.st_mtime
            try:
                self.context.config = load_json_or_yaml_config(config_file)
                self.config_error = None
                LOG.info(f'configuration file {config_file!r} successfully loaded')
            except ValueError as e:
                if self.config_error is None:
                    LOG.error(f'configuration file {config_file!r}: {e}')
                    self.config_error = e
Example #13
0
 async def get(self, ds_id: str, path: str = ''):
     key, local_path = self._get_key_and_local_path(ds_id, path)
     if _LOG_S3BUCKET_HANDLER:
         LOG.info(f'GET: key={key!r}, local_path={local_path!r}')
     if local_path is None or not local_path.exists():
         await self._key_not_found(key)
         return
     self.set_header('ETag', str_to_etag(str(local_path)))
     self.set_header('Last-Modified', mtime_to_str(local_path.stat().st_mtime))
     self.set_header('Content-Type', 'binary/octet-stream')
     if local_path.is_file():
         self.set_header('Content-Length', local_path.stat().st_size)
         chunk_size = 1024 * 1024
         with open(str(local_path), 'rb') as fp:
             while True:
                 chunk = fp.read(chunk_size)
                 if len(chunk) == 0:
                     break
                 self.write(chunk)
                 await self.flush()
     else:
         self.set_header('Content-Length', 0)
         await self.finish()
Example #14
0
def get_ml_dataset_tile(ml_dataset: MultiLevelDataset,
                        var_name: str,
                        x: int,
                        y: int,
                        z: int,
                        labels: Mapping[str, Any] = None,
                        labels_are_indices: bool = False,
                        cmap_name: Union[str, Tuple[Optional[str]]] = None,
                        cmap_range: Union[Tuple[float, float],
                                          Tuple[Tuple[float, float],
                                                Tuple[float, float],
                                                Tuple[float, float]]] = None,
                        image_cache: MutableMapping[str, TiledImage] = None,
                        tile_cache: Cache = None,
                        trace_perf: bool = False,
                        exception_type: Type[Exception] = ValueError) -> Tile:
    labels = labels or {}
    ds_cache_id = hex(id(ml_dataset))
    image_id = '-'.join(
        map(str, [
            ds_cache_id, z, ml_dataset.ds_id, var_name, cmap_name, cmap_range
        ] + [
            f'{dim_name}={dim_value}'
            for dim_name, dim_value in labels.items()
        ]))

    measure_time = measure_time_cm(logger=LOG, disabled=not trace_perf)

    if image_cache and image_id in image_cache:
        image = image_cache[image_id]
    else:
        with measure_time() as measured_time:
            if var_name == 'rgb':
                image = new_rgb_image(ml_dataset, image_id, cmap_name,
                                      cmap_range, z, labels,
                                      labels_are_indices, tile_cache,
                                      trace_perf, exception_type)
            else:
                image = new_color_mapped_image(ml_dataset, image_id, var_name,
                                               cmap_name, cmap_range, z,
                                               labels, labels_are_indices,
                                               tile_cache, trace_perf,
                                               exception_type)

        if image_cache:
            image_cache[image_id] = image

        if trace_perf:
            tile_grid = ml_dataset.tile_grid
            LOG.info(
                f'Created tiled image {image_id!r} of size {image.size}.\n'
                f'Took {measured_time.duration:.2f} seconds. Tile grid:\n'
                f'  num_levels: {tile_grid.num_levels}\n'
                f'  num_level_zero_tiles: {tile_grid.get_num_tiles(0)}\n'
                f'  tile_size: {tile_grid.tile_size}\n'
                f'  extent: {tile_grid.extent}\n'
                f'  is_j_axis_up: {tile_grid.is_j_axis_up}')

    if trace_perf:
        LOG.info(f'>>> tile {image_id}/{z}/{y}/{x}')

    with measure_time() as measured_time:
        tile = image.get_tile(x, y)

    if trace_perf:
        LOG.info(f'<<< tile {image_id}/{z}/{y}/{x}:'
                 f' took ' + '%.2f seconds' % measured_time.duration)

    return tile
Example #15
0
def get_time_series(
    ctx: ServiceContext,
    ds_name: str,
    var_name: str,
    geo_json: GeoJsonObj,
    agg_methods: Union[str, Sequence[str]] = None,
    start_date: np.datetime64 = None,
    end_date: np.datetime64 = None,
    max_valids: int = None,
    incl_ancillary_vars: bool = False
) -> Union[TimeSeries, TimeSeriesCollection]:
    """
    Get the time-series for a given GeoJSON object *geo_json*.

    If *geo_json* is a single geometry or feature a list of time-series values is returned.
    If *geo_json* is a single geometry collection or feature collection a collection of lists of time-series values
    is returned so that geometry/feature collection and time-series collection elements are corresponding
    at same indices.

    A time series value always contains the key "time" whose value is an ISO date/time string. Other entries
    are values with varying keys depending on the geometry type and *agg_methods*. Their values may be
    either a bool, int, float or None.
    For point geometries the second key is "value".
    For non-point geometries that cover spatial areas, there will be values for all keys given by *agg_methods*.

    :param ctx: Service context object
    :param ds_name: The dataset identifier.
    :param var_name: The variable name.
    :param geo_json: The GeoJSON object that is a or has a geometry or collection of geometryies.
    :param start_date: An optional start date.
    :param end_date: An optional end date.
    :param agg_methods: Spatial aggregation methods for geometries that cover a spatial area.
    :param max_valids: Optional number of valid points.
           If it is None (default), also missing values are returned as NaN;
           if it is -1 only valid values are returned;
           if it is a positive integer, the most recent valid values are returned.
    :param incl_ancillary_vars: For point geometries, include values of ancillary variables, if any.
    :return: Time-series data structure.
    """
    agg_methods = timeseries.normalize_agg_methods(
        agg_methods, exception_type=ServiceBadRequestError)

    dataset = ctx.get_time_series_dataset(ds_name, var_name=var_name)
    geo_json_geometries, is_collection = _to_geo_json_geometries(geo_json)
    geometries = _to_shapely_geometries(geo_json_geometries)

    with measure_time() as time_result:
        results = _get_time_series_for_geometries(
            dataset,
            var_name,
            geometries,
            start_date=start_date,
            end_date=end_date,
            agg_methods=agg_methods,
            max_valids=max_valids,
            incl_ancillary_vars=incl_ancillary_vars)

    if ctx.trace_perf:
        LOG.info(
            f'get_time_series: dataset id {ds_name}, variable {var_name}, '
            f'{len(results)} x {len(results[0])} values, took {time_result.duration} seconds'
        )

    return results[0] if not is_collection and len(results) == 1 else results