def get_tile(self, tile_x: int, tile_y: int) -> Tile: measure_time = self.measure_time tile_id = self.get_tile_id(tile_x, tile_y) tile_tag = self.__get_tile_tag(tile_id) tile_cache = self._tile_cache if tile_cache: with measure_time(tile_tag + 'queried in tile cache'): tile = tile_cache.get_value(tile_id) if tile is not None: if self._trace_perf: LOG.info(tile_tag + 'restored from tile cache') return tile with measure_time(tile_tag + 'computed'): tw, th = self.tile_size tile = self.compute_tile(tile_x, tile_y, (tw * tile_x, th * tile_y, tw, th)) if tile_cache: with measure_time(tile_tag + 'stored in tile cache'): tile_cache.put_value(tile_id, tile) return tile
async def get(self): prefix = self.get_query_argument('prefix', default=None) delimiter = self.get_query_argument('delimiter', default=None) max_keys = int(self.get_query_argument('max-keys', default='1000')) list_s3_bucket_params = dict(prefix=prefix, delimiter=delimiter, max_keys=max_keys) list_type = self.get_query_argument('list-type', default=None) if list_type is None: marker = self.get_query_argument('marker', default=None) list_s3_bucket_params.update(marker=marker) list_s3_bucket = list_s3_bucket_v1 elif list_type == '2': start_after = self.get_query_argument('start-after', default=None) continuation_token = self.get_query_argument('continuation-token', default=None) list_s3_bucket_params.update(start_after=start_after, continuation_token=continuation_token) list_s3_bucket = list_s3_bucket_v2 else: raise ServiceBadRequestError(f'Unknown bucket list type {list_type!r}') if _LOG_S3BUCKET_HANDLER: LOG.info(f'GET: list_s3_bucket_params={list_s3_bucket_params}') bucket_mapping = self.service_context.get_s3_bucket_mapping() list_bucket_result = list_s3_bucket(bucket_mapping, **list_s3_bucket_params) if _LOG_S3BUCKET_HANDLER: import json LOG.info(f'-->\n{json.dumps(list_bucket_result, indent=2)}') xml = list_bucket_result_to_xml(list_bucket_result) self.set_header('Content-Type', 'application/xml') self.write(xml) await self.flush()
def _maybe_load_config(self): config_file = self.config_file if config_file is None: return try: stat = os.stat(config_file) except OSError as e: if self.config_error is None: LOG.error(f'configuration file {config_file!r}: {e}') self.config_error = e return if self.context.config_mtime != stat.st_mtime: self.context.config_mtime = stat.st_mtime try: with open(config_file, encoding='utf-8') as stream: self.context.config = yaml.safe_load(stream) self.config_error = None LOG.info( f'configuration file {config_file!r} successfully loaded') except (yaml.YAMLError, OSError) as e: if self.config_error is None: LOG.error(f'configuration file {config_file!r}: {e}') self.config_error = e
def start(self): address = self.service_info['address'] port = self.service_info['port'] test_url = self.context.get_service_url(f"http://{address}:{port}", "datasets") LOG.info(f'service running, listening on {address}:{port}, try {test_url}') LOG.info(f'press CTRL+C to stop service') if len(self.context.config.get('Datasets', {})) == 0: LOG.warning('no datasets configured') tornado.ioloop.PeriodicCallback(self._try_shutdown, 100).start() IOLoop.current().start()
def get_time_series_for_feature_collection(ctx: ServiceContext, ds_name: str, var_name: str, feature_collection: Dict, start_date: np.datetime64 = None, end_date: np.datetime64 = None, include_count: bool = False, include_stdev: bool = False, max_valids: int = None) -> Dict: """ Get the time-series for the geometries of a given *feature_collection*. :param ctx: Service context object :param ds_name: The dataset identifier. :param var_name: The variable name. :param feature_collection: The feature collection. :param start_date: An optional start date. :param end_date: An optional end date. :param include_count: Whether to include the valid number of observations in the result. :param include_stdev: Whether to include the standard deviation in the result. :param max_valids: Optional number of valid points. If it is None (default), also missing values are returned as NaN; if it is -1 only valid values are returned; if it is a positive integer, the most recent valid values are returned. :return: Time-series data structure. """ dataset = ctx.get_time_series_dataset(ds_name, var_name=var_name) features = GeoJSON.get_feature_collection_features(feature_collection) if features is None: raise ServiceBadRequestError("Invalid GeoJSON feature collection") shapes = [] for feature in features: geometry = GeoJSON.get_feature_geometry(feature) try: geometry = shapely.geometry.shape(geometry) except (TypeError, ValueError) as e: raise ServiceBadRequestError( "Invalid GeoJSON feature collection") from e shapes.append(geometry) with measure_time() as time_result: result = _get_time_series_for_geometries(dataset, var_name, shapes, start_date=start_date, end_date=end_date, include_count=include_count, include_stdev=include_stdev, max_valids=max_valids) if ctx.trace_perf: LOG.info( f'get_time_series_for_feature_collection: dataset id {ds_name}, variable {var_name},' f'size={len(result["results"])}, took {time_result.duration} seconds' ) return result
def _find_places(ctx: ServiceContext, place_group_id: str, base_url: str, query_geometry: shapely.geometry.base.BaseGeometry = None, query_expr: Any = None, comb_op: str = "and") -> GeoJsonFeatureCollection: with measure_time() as cm: features = __find_places(ctx, place_group_id, base_url, query_geometry, query_expr, comb_op) LOG.info(f"{len(features)} places found within {cm.duration} seconds") return features
async def head(self, ds_id: str, path: str = ''): key, local_path = self._get_key_and_local_path(ds_id, path) if _LOG_S3BUCKET_HANDLER: LOG.info(f'HEAD: key={key!r}, local_path={local_path!r}') if local_path is None or not local_path.exists(): await self._key_not_found(key) return self.set_header('ETag', str_to_etag(str(local_path))) self.set_header('Last-Modified', mtime_to_str(local_path.stat().st_mtime)) if local_path.is_file(): self.set_header('Content-Length', local_path.stat().st_size) else: self.set_header('Content-Length', 0) await self.finish()
def get_time_series_for_geometry(ctx: ServiceContext, ds_name: str, var_name: str, geometry: Dict, start_date: np.datetime64 = None, end_date: np.datetime64 = None, include_count: bool = False, include_stdev: bool = False, max_valids: int = None) -> Dict: """ Get the time-series for a given *geometry*. :param ctx: Service context object :param ds_name: The dataset identifier. :param var_name: The variable name. :param geometry: The geometry, usually a point or polygon. :param start_date: An optional start date. :param end_date: An optional end date. :param include_count: Whether to include the valid number of observations in the result. :param include_stdev: Whether to include the standard deviation in the result. :param max_valids: Optional number of valid points. If it is None (default), also missing values are returned as NaN; if it is -1 only valid values are returned; if it is a positive integer, the most recent valid values are returned. :return: Time-series data structure. """ dataset = ctx.get_time_series_dataset(ds_name, var_name=var_name) if not GeoJSON.is_geometry(geometry): raise ServiceBadRequestError("Invalid GeoJSON geometry") if isinstance(geometry, dict): geometry = shapely.geometry.shape(geometry) with measure_time() as time_result: result = _get_time_series_for_geometry(dataset, var_name, geometry, start_date=start_date, end_date=end_date, include_count=include_count, include_stdev=include_stdev, max_valids=max_valids) if ctx.trace_perf: LOG.info( f'get_time_series_for_geometry: dataset id {ds_name}, variable {var_name}, ' f'geometry type {geometry},' f'size={len(result["results"])}, took {time_result.duration} seconds' ) return result
def _on_shutdown(self): LOG.info('stopping service...') # noinspection PyUnresolvedReferences,PyBroadException try: self.update_timer.cancel() except Exception: pass if self.server: self.server.stop() self.server = None IOLoop.current().stop() LOG.info('service stopped.')
def get_dataset_configs_from_stores(self) \ -> List[DatasetConfigDict]: data_store_pool = self.get_data_store_pool() if data_store_pool is None: return [] all_dataset_configs: List[DatasetConfigDict] = [] for store_instance_id in data_store_pool.store_instance_ids: LOG.info(f'scanning store {store_instance_id!r}') data_store_config = data_store_pool.get_store_config( store_instance_id) data_store = data_store_pool.get_store(store_instance_id) store_dataset_ids = data_store.get_data_ids(data_type=DATASET_TYPE) for store_dataset_id in store_dataset_ids: dataset_config_base = {} store_dataset_configs: List[DatasetConfigDict] \ = data_store_config.user_data if store_dataset_configs: for store_dataset_config in store_dataset_configs: dataset_id_pattern = store_dataset_config.get( 'Path', '*') if fnmatch.fnmatch(store_dataset_id, dataset_id_pattern): dataset_config_base = store_dataset_config break else: dataset_config_base = None if dataset_config_base is not None: LOG.debug(f'selected dataset {store_dataset_id!r}') dataset_config = dict(StoreInstanceId=store_instance_id, **dataset_config_base) dataset_config['Path'] = store_dataset_id dataset_config['Identifier'] = \ f'{store_instance_id}{STORE_DS_ID_SEPARATOR}' \ f'{store_dataset_id}' all_dataset_configs.append(dataset_config) # Just for testing: debug_file = 'all_dataset_configs.json' with open(debug_file, 'w') as stream: json.dump(all_dataset_configs, stream) LOG.debug(f'wrote file {debug_file!r}') return all_dataset_configs
def get_time_series_for_point(ctx: ServiceContext, ds_name: str, var_name: str, lon: float, lat: float, start_date: np.datetime64 = None, end_date: np.datetime64 = None, max_valids: int = None) -> Dict: """ Get the time-series for a given point. :param ctx: Service context object :param ds_name: The dataset identifier. :param var_name: The variable name. :param lon: The point's longitude in decimal degrees. :param lat: The point's latitude in decimal degrees. :param start_date: An optional start date. :param end_date: An optional end date. :param max_valids: Optional number of valid points. If it is None (default), also missing values are returned as NaN; if it is -1 only valid values are returned; if it is a positive integer, the most recent valid values are returned. :return: Time-series data structure. """ dataset = ctx.get_time_series_dataset(ds_name, var_name=var_name) with measure_time() as time_result: result = _get_time_series_for_point(dataset, var_name, shapely.geometry.Point(lon, lat), start_date=start_date, end_date=end_date, max_valids=max_valids) if ctx.trace_perf: LOG.info( f'get_time_series_for_point:: dataset id {ds_name}, variable {var_name}, ' f'geometry type {shapely.geometry.Point(lon, lat)}, size={len(result["results"])}, ' f'took {time_result.duration} seconds') return result
def _maybe_load_config(self): config_file = self.config_file if config_file is None: return try: stat = os.stat(config_file) except OSError as e: if self.config_error is None: LOG.error(f'configuration file {config_file!r}: {e}') self.config_error = e return if self.context.config_mtime != stat.st_mtime: self.context.config_mtime = stat.st_mtime try: self.context.config = load_json_or_yaml_config(config_file) self.config_error = None LOG.info(f'configuration file {config_file!r} successfully loaded') except ValueError as e: if self.config_error is None: LOG.error(f'configuration file {config_file!r}: {e}') self.config_error = e
async def get(self, ds_id: str, path: str = ''): key, local_path = self._get_key_and_local_path(ds_id, path) if _LOG_S3BUCKET_HANDLER: LOG.info(f'GET: key={key!r}, local_path={local_path!r}') if local_path is None or not local_path.exists(): await self._key_not_found(key) return self.set_header('ETag', str_to_etag(str(local_path))) self.set_header('Last-Modified', mtime_to_str(local_path.stat().st_mtime)) self.set_header('Content-Type', 'binary/octet-stream') if local_path.is_file(): self.set_header('Content-Length', local_path.stat().st_size) chunk_size = 1024 * 1024 with open(str(local_path), 'rb') as fp: while True: chunk = fp.read(chunk_size) if len(chunk) == 0: break self.write(chunk) await self.flush() else: self.set_header('Content-Length', 0) await self.finish()
def get_ml_dataset_tile(ml_dataset: MultiLevelDataset, var_name: str, x: int, y: int, z: int, labels: Mapping[str, Any] = None, labels_are_indices: bool = False, cmap_name: Union[str, Tuple[Optional[str]]] = None, cmap_range: Union[Tuple[float, float], Tuple[Tuple[float, float], Tuple[float, float], Tuple[float, float]]] = None, image_cache: MutableMapping[str, TiledImage] = None, tile_cache: Cache = None, trace_perf: bool = False, exception_type: Type[Exception] = ValueError) -> Tile: labels = labels or {} ds_cache_id = hex(id(ml_dataset)) image_id = '-'.join( map(str, [ ds_cache_id, z, ml_dataset.ds_id, var_name, cmap_name, cmap_range ] + [ f'{dim_name}={dim_value}' for dim_name, dim_value in labels.items() ])) measure_time = measure_time_cm(logger=LOG, disabled=not trace_perf) if image_cache and image_id in image_cache: image = image_cache[image_id] else: with measure_time() as measured_time: if var_name == 'rgb': image = new_rgb_image(ml_dataset, image_id, cmap_name, cmap_range, z, labels, labels_are_indices, tile_cache, trace_perf, exception_type) else: image = new_color_mapped_image(ml_dataset, image_id, var_name, cmap_name, cmap_range, z, labels, labels_are_indices, tile_cache, trace_perf, exception_type) if image_cache: image_cache[image_id] = image if trace_perf: tile_grid = ml_dataset.tile_grid LOG.info( f'Created tiled image {image_id!r} of size {image.size}.\n' f'Took {measured_time.duration:.2f} seconds. Tile grid:\n' f' num_levels: {tile_grid.num_levels}\n' f' num_level_zero_tiles: {tile_grid.get_num_tiles(0)}\n' f' tile_size: {tile_grid.tile_size}\n' f' extent: {tile_grid.extent}\n' f' is_j_axis_up: {tile_grid.is_j_axis_up}') if trace_perf: LOG.info(f'>>> tile {image_id}/{z}/{y}/{x}') with measure_time() as measured_time: tile = image.get_tile(x, y) if trace_perf: LOG.info(f'<<< tile {image_id}/{z}/{y}/{x}:' f' took ' + '%.2f seconds' % measured_time.duration) return tile
def get_time_series( ctx: ServiceContext, ds_name: str, var_name: str, geo_json: GeoJsonObj, agg_methods: Union[str, Sequence[str]] = None, start_date: np.datetime64 = None, end_date: np.datetime64 = None, max_valids: int = None, incl_ancillary_vars: bool = False ) -> Union[TimeSeries, TimeSeriesCollection]: """ Get the time-series for a given GeoJSON object *geo_json*. If *geo_json* is a single geometry or feature a list of time-series values is returned. If *geo_json* is a single geometry collection or feature collection a collection of lists of time-series values is returned so that geometry/feature collection and time-series collection elements are corresponding at same indices. A time series value always contains the key "time" whose value is an ISO date/time string. Other entries are values with varying keys depending on the geometry type and *agg_methods*. Their values may be either a bool, int, float or None. For point geometries the second key is "value". For non-point geometries that cover spatial areas, there will be values for all keys given by *agg_methods*. :param ctx: Service context object :param ds_name: The dataset identifier. :param var_name: The variable name. :param geo_json: The GeoJSON object that is a or has a geometry or collection of geometryies. :param start_date: An optional start date. :param end_date: An optional end date. :param agg_methods: Spatial aggregation methods for geometries that cover a spatial area. :param max_valids: Optional number of valid points. If it is None (default), also missing values are returned as NaN; if it is -1 only valid values are returned; if it is a positive integer, the most recent valid values are returned. :param incl_ancillary_vars: For point geometries, include values of ancillary variables, if any. :return: Time-series data structure. """ agg_methods = timeseries.normalize_agg_methods( agg_methods, exception_type=ServiceBadRequestError) dataset = ctx.get_time_series_dataset(ds_name, var_name=var_name) geo_json_geometries, is_collection = _to_geo_json_geometries(geo_json) geometries = _to_shapely_geometries(geo_json_geometries) with measure_time() as time_result: results = _get_time_series_for_geometries( dataset, var_name, geometries, start_date=start_date, end_date=end_date, agg_methods=agg_methods, max_valids=max_valids, incl_ancillary_vars=incl_ancillary_vars) if ctx.trace_perf: LOG.info( f'get_time_series: dataset id {ds_name}, variable {var_name}, ' f'{len(results)} x {len(results[0])} values, took {time_result.duration} seconds' ) return results[0] if not is_collection and len(results) == 1 else results