Esempio n. 1
0
def _get_time_series_dataset(ctx: ServiceContext,
                             ds_name: str,
                             var_name: str = None):
    descriptor = ctx.get_dataset_descriptor(ds_name)
    ts_ds_name = descriptor.get('TimeSeriesDataset', ds_name)
    return ctx.get_dataset(ts_ds_name,
                           expected_var_names=[var_name] if var_name else None)
Esempio n. 2
0
def __find_places(ctx: ServiceContext,
                  place_group_id: str,
                  base_url: str,
                  query_geometry: shapely.geometry.base.BaseGeometry = None,
                  query_expr: Any = None,
                  comb_op: str = "and") -> GeoJsonFeatureCollection:
    if comb_op is not None and comb_op != "and":
        raise NotImplementedError("comb_op not yet supported")

    if place_group_id == ALL_PLACES:
        place_groups = ctx.get_global_place_groups(base_url, load_features=True)
        features = []
        for place_group in place_groups:
            features.extend(place_group['features'])
        feature_collection = dict(type="FeatureCollection", features=features)
    else:
        feature_collection = ctx.get_global_place_group(place_group_id, base_url, load_features=True)
        feature_collection = dict(type="FeatureCollection", features=feature_collection['features'])

    if query_geometry is None:
        if query_expr is None:
            return feature_collection
        else:
            raise NotImplementedError()
    else:
        matching_places = []
        if query_expr is None:
            for feature in feature_collection['features']:
                geometry = shapely.geometry.shape(feature['geometry'])
                if geometry.intersects(query_geometry):
                    matching_places.append(feature)
        else:
            raise NotImplementedError()
        return dict(type="FeatureCollection", features=matching_places)
Esempio n. 3
0
def get_time_series_info(ctx: ServiceContext) -> Dict:
    """
    Get time-series meta-information for variables.

    :param ctx: Service context object
    :return: a dictionary with a single entry "layers" which is a list of entries that are
             dictionaries containing a variable's "name", "dates", and "bounds".
    """
    time_series_info = {'layers': []}
    descriptors = ctx.get_dataset_descriptors()
    for descriptor in descriptors:
        if 'Identifier' in descriptor:
            if descriptor.get('Hidden'):
                continue
            dataset = ctx.get_dataset(descriptor['Identifier'])
            if 'time' not in dataset.variables:
                continue
            xmin, ymin, xmax, ymax = get_dataset_bounds(dataset)
            time_data = dataset.variables['time'].data
            time_stamps = []
            for time in time_data:
                time_stamps.append(timestamp_to_iso_string(time))
            var_names = sorted(dataset.data_vars)
            for var_name in var_names:
                ds_id = descriptor['Identifier']
                variable_dict = {
                    'name': f'{ds_id}.{var_name}',
                    'dates': time_stamps,
                    'bounds': dict(xmin=xmin, ymin=ymin, xmax=xmax, ymax=ymax)
                }
                time_series_info['layers'].append(variable_dict)
    return time_series_info
Esempio n. 4
0
def new_test_service_context(config_file_name: str = 'config.yml',
                             ml_dataset_openers: Dict[str, MultiLevelDatasetOpener] = None) -> ServiceContext:
    ctx = ServiceContext(base_dir=get_res_test_dir(), ml_dataset_openers=ml_dataset_openers)
    config_file = os.path.join(ctx.base_dir, config_file_name)
    with open(config_file, encoding='utf-8') as fp:
        ctx.config = yaml.safe_load(fp)
    return ctx
Esempio n. 5
0
def new_demo_service_context(
    ml_dataset_openers: Dict[str, MultiLevelDatasetOpener] = None
) -> ServiceContext:
    ctx = ServiceContext(base_dir=get_res_demo_dir(),
                         ml_dataset_openers=ml_dataset_openers)
    config_file = os.path.join(ctx.base_dir, 'config.yml')
    with open(config_file) as fp:
        ctx.config = yaml.safe_load(fp)
    return ctx
Esempio n. 6
0
def _get_time_series_dataset(ctx: ServiceContext,
                             ds_id: str,
                             var_name: str = None):
    descriptor = ctx.get_dataset_descriptor(ds_id)
    ts_ds_name = descriptor.get('TimeSeriesDataset', ds_id)
    try:
        # Try to get more efficient, time-chunked dataset
        return ctx.get_dataset(
            ts_ds_name, expected_var_names=[var_name] if var_name else None)
    except ServiceResourceNotFoundError:
        # This happens, if the dataset pointed to by 'TimeSeriesDataset'
        # does not contain the variable given by var_name.
        return ctx.get_dataset(
            ds_id, expected_var_names=[var_name] if var_name else None)
Esempio n. 7
0
def get_legend(ctx: ServiceContext, ds_id: str, var_name: str,
               params: RequestParams):
    cmap_name = params.get_query_argument('cbar', default=None)
    cmap_vmin = params.get_query_argument_float('vmin', default=None)
    cmap_vmax = params.get_query_argument_float('vmax', default=None)
    cmap_w = params.get_query_argument_int('width', default=None)
    cmap_h = params.get_query_argument_int('height', default=None)
    if cmap_name is None or cmap_vmin is None or cmap_vmax is None or cmap_w is None or cmap_h is None:
        default_cmap_cbar, (default_cmap_vmin,
                            default_cmap_vmax) = ctx.get_color_mapping(
                                ds_id, var_name)
        cmap_name = cmap_name or default_cmap_cbar
        cmap_vmin = cmap_vmin or default_cmap_vmin
        cmap_vmax = cmap_vmax or default_cmap_vmax
        cmap_w = cmap_w or DEFAULT_CMAP_WIDTH
        cmap_h = cmap_h or DEFAULT_CMAP_HEIGHT

    try:
        _, cmap = get_cmap(cmap_name)
    except ValueError:
        raise ServiceResourceNotFoundError(
            f"color bar {cmap_name!r} not found")

    fig = matplotlib.figure.Figure(figsize=(cmap_w, cmap_h))
    ax1 = fig.add_subplot(1, 1, 1)
    if '.cpd' in cmap_name:
        norm, ticks = get_norm(cmap_name)
    else:
        norm = matplotlib.colors.Normalize(vmin=cmap_vmin, vmax=cmap_vmax)
        ticks = None

    image_legend = matplotlib.colorbar.ColorbarBase(ax1,
                                                    format='%.1f',
                                                    ticks=ticks,
                                                    cmap=cmap,
                                                    norm=norm,
                                                    orientation='vertical')

    image_legend_label = ctx.get_legend_label(ds_id, var_name)
    if image_legend_label is not None:
        image_legend.set_label(image_legend_label)

    fig.patch.set_facecolor('white')
    fig.patch.set_alpha(0.0)
    fig.tight_layout()

    buffer = io.BytesIO()
    fig.savefig(buffer, format='png')

    return buffer.getvalue()
Esempio n. 8
0
def get_datasets(ctx: ServiceContext,
                 details: bool = False,
                 client: str = None,
                 point: Tuple[float, float] = None,
                 base_url: str = None) -> Dict:
    dataset_descriptors = ctx.get_dataset_descriptors()

    dataset_dicts = list()
    for dataset_descriptor in dataset_descriptors:
        if dataset_descriptor.get('Hidden'):
            continue

        ds_id = dataset_descriptor['Identifier']

        dataset_dict = dict(id=ds_id)

        if 'Title' in dataset_descriptor:
            ds_title = dataset_descriptor['Title']
            if ds_title and isinstance(ds_title, str):
                dataset_dict['title'] = ds_title
            else:
                dataset_dict['title'] = ds_id

        if 'BoundingBox' in dataset_descriptor:
            ds_bbox = dataset_descriptor['BoundingBox']
            if ds_bbox \
                    and len(ds_bbox) == 4 \
                    and all(map(lambda c: isinstance(c, float) or isinstance(c, int), ds_bbox)):
                dataset_dict['bbox'] = ds_bbox

        dataset_dicts.append(dataset_dict)

    if details or point:
        for dataset_dict in dataset_dicts:
            ds_id = dataset_dict["id"]
            if point:
                ds = ctx.get_dataset(ds_id)
                if "bbox" not in dataset_dict:
                    dataset_dict["bbox"] = list(get_dataset_bounds(ds))
            if details:
                dataset_dict.update(get_dataset(ctx, ds_id, client, base_url))

    if point:
        is_point_in_dataset_bbox = functools.partial(_is_point_in_dataset_bbox,
                                                     point)
        # noinspection PyTypeChecker
        dataset_dicts = list(filter(is_point_in_dataset_bbox, dataset_dicts))

    return dict(datasets=dataset_dicts)
Esempio n. 9
0
def get_dataset_place_groups(ctx: ServiceContext, ds_id: str,
                             base_url: str) -> List[GeoJsonFeatureCollection]:
    # Do not load or return features, just place group (metadata).
    place_groups = ctx.get_dataset_place_groups(ds_id,
                                                base_url,
                                                load_features=False)
    return _filter_place_groups(place_groups, del_features=True)
Esempio n. 10
0
def get_dataset_place_group(ctx: ServiceContext, ds_id: str,
                            place_group_id: str) -> GeoJsonFeatureCollection:
    # Load and return features for specific place group.
    place_group = ctx.get_dataset_place_group(ds_id,
                                              place_group_id,
                                              load_features=True)
    return _filter_place_group(place_group, del_features=False)
Esempio n. 11
0
def get_dataset_tile_grid(ctx: ServiceContext, ds_id: str, var_name: str,
                          tile_client: str, base_url: str) -> Dict[str, Any]:
    tile_grid = ctx.get_tile_grid(ds_id)
    if tile_client == 'ol4' or tile_client == 'cesium':
        return get_tile_source_options(tile_grid,
                                       get_dataset_tile_url(
                                           ctx, ds_id, var_name, base_url),
                                       client=tile_client)
    else:
        raise ServiceBadRequestError(f'Unknown tile client "{tile_client}"')
Esempio n. 12
0
def get_dataset_tile_url(ctx: ServiceContext,
                         ds_id: str,
                         var_name: str,
                         base_url: str):
    return ctx.get_service_url(base_url,
                               'datasets',
                               urllib.parse.quote_plus(ds_id),
                               'vars',
                               urllib.parse.quote_plus(var_name),
                               'tiles',
                               '{z}/{x}/{y}.png')
Esempio n. 13
0
def get_time_series_for_feature_collection(ctx: ServiceContext,
                                           ds_name: str,
                                           var_name: str,
                                           feature_collection: Dict,
                                           start_date: np.datetime64 = None,
                                           end_date: np.datetime64 = None,
                                           include_count: bool = False,
                                           include_stdev: bool = False,
                                           max_valids: int = None) -> Dict:
    """
    Get the time-series for the geometries of a given *feature_collection*.

    :param ctx: Service context object
    :param ds_name: The dataset identifier.
    :param var_name: The variable name.
    :param feature_collection: The feature collection.
    :param start_date: An optional start date.
    :param end_date: An optional end date.
    :param include_count: Whether to include the valid number of observations in the result.
    :param include_stdev: Whether to include the standard deviation in the result.
    :param max_valids: Optional number of valid points.
           If it is None (default), also missing values are returned as NaN;
           if it is -1 only valid values are returned;
           if it is a positive integer, the most recent valid values are returned.
    :return: Time-series data structure.
    """
    dataset = ctx.get_time_series_dataset(ds_name, var_name=var_name)
    features = GeoJSON.get_feature_collection_features(feature_collection)
    if features is None:
        raise ServiceBadRequestError("Invalid GeoJSON feature collection")
    shapes = []
    for feature in features:
        geometry = GeoJSON.get_feature_geometry(feature)
        try:
            geometry = shapely.geometry.shape(geometry)
        except (TypeError, ValueError) as e:
            raise ServiceBadRequestError(
                "Invalid GeoJSON feature collection") from e
        shapes.append(geometry)
    with measure_time() as time_result:
        result = _get_time_series_for_geometries(dataset,
                                                 var_name,
                                                 shapes,
                                                 start_date=start_date,
                                                 end_date=end_date,
                                                 include_count=include_count,
                                                 include_stdev=include_stdev,
                                                 max_valids=max_valids)
    if ctx.trace_perf:
        LOG.info(
            f'get_time_series_for_feature_collection: dataset id {ds_name}, variable {var_name},'
            f'size={len(result["results"])}, took {time_result.duration} seconds'
        )
    return result
Esempio n. 14
0
def get_dataset_tile(ctx: ServiceContext, ds_id: str, var_name: str, x: str,
                     y: str, z: str, params: RequestParams):
    x = RequestParams.to_int('x', x)
    y = RequestParams.to_int('y', y)
    z = RequestParams.to_int('z', z)

    tile_comp_mode = params.get_query_argument_int('mode', ctx.tile_comp_mode)
    trace_perf = params.get_query_argument_int('debug', ctx.trace_perf) != 0

    cmap_name = params.get_query_argument('cbar', default=None)
    cmap_vmin = params.get_query_argument_float('vmin', default=None)
    cmap_vmax = params.get_query_argument_float('vmax', default=None)
    if cmap_name is None or cmap_vmin is None or cmap_vmax is None:
        default_cmap_name, default_cmap_vmin, default_cmap_vmax = ctx.get_color_mapping(
            ds_id, var_name)
        cmap_name = cmap_name or default_cmap_name
        cmap_vmin = cmap_vmin or default_cmap_vmin
        cmap_vmax = cmap_vmax or default_cmap_vmax

    ml_dataset = ctx.get_ml_dataset(ds_id)
    var = ml_dataset.base_dataset[var_name]
    labels = parse_non_spatial_labels(params.get_query_arguments(),
                                      var.dims,
                                      var.coords,
                                      allow_slices=False,
                                      exception_type=ServiceBadRequestError)

    return get_ml_dataset_tile(ml_dataset,
                               var_name,
                               x,
                               y,
                               z,
                               labels=labels,
                               cmap_name=cmap_name,
                               cmap_vmin=cmap_vmin,
                               cmap_vmax=cmap_vmax,
                               image_cache=ctx.image_cache,
                               tile_cache=ctx.tile_cache,
                               tile_comp_mode=tile_comp_mode,
                               trace_perf=trace_perf,
                               exception_type=ServiceBadRequestError)
Esempio n. 15
0
def find_dataset_places(ctx: ServiceContext,
                        place_group_id: str,
                        ds_id: str,
                        query_expr: Any = None,
                        comb_op: str = "and") -> GeoJsonFeatureCollection:
    dataset = ctx.get_dataset(ds_id)
    query_geometry = get_dataset_geometry(dataset)
    return _find_places(ctx,
                        place_group_id,
                        query_geometry=query_geometry,
                        query_expr=query_expr,
                        comb_op=comb_op)
Esempio n. 16
0
def get_time_series_for_geometry(ctx: ServiceContext,
                                 ds_name: str,
                                 var_name: str,
                                 geometry: Dict,
                                 start_date: np.datetime64 = None,
                                 end_date: np.datetime64 = None,
                                 include_count: bool = False,
                                 include_stdev: bool = False,
                                 max_valids: int = None) -> Dict:
    """
    Get the time-series for a given *geometry*.

    :param ctx: Service context object
    :param ds_name: The dataset identifier.
    :param var_name: The variable name.
    :param geometry: The geometry, usually a point or polygon.
    :param start_date: An optional start date.
    :param end_date: An optional end date.
    :param include_count: Whether to include the valid number of observations in the result.
    :param include_stdev: Whether to include the standard deviation in the result.
    :param max_valids: Optional number of valid points.
           If it is None (default), also missing values are returned as NaN;
           if it is -1 only valid values are returned;
           if it is a positive integer, the most recent valid values are returned.
    :return: Time-series data structure.
    """
    dataset = ctx.get_time_series_dataset(ds_name, var_name=var_name)
    if not GeoJSON.is_geometry(geometry):
        raise ServiceBadRequestError("Invalid GeoJSON geometry")
    if isinstance(geometry, dict):
        geometry = shapely.geometry.shape(geometry)
    with measure_time() as time_result:
        result = _get_time_series_for_geometry(dataset,
                                               var_name,
                                               geometry,
                                               start_date=start_date,
                                               end_date=end_date,
                                               include_count=include_count,
                                               include_stdev=include_stdev,
                                               max_valids=max_valids)

    if ctx.trace_perf:
        LOG.info(
            f'get_time_series_for_geometry: dataset id {ds_name}, variable {var_name}, '
            f'geometry type {geometry},'
            f'size={len(result["results"])}, took {time_result.duration} seconds'
        )
    return result
Esempio n. 17
0
def get_dataset_coordinates(ctx: ServiceContext, ds_id: str,
                            dim_name: str) -> Dict:
    ds, var = ctx.get_dataset_and_coord_variable(ds_id, dim_name)
    values = list()
    if np.issubdtype(var.dtype, np.floating):
        converter = float
    elif np.issubdtype(var.dtype, np.integer):
        converter = int
    else:
        converter = timestamp_to_iso_string
    for value in var.values:
        values.append(converter(value))
    return dict(name=dim_name,
                size=len(values),
                dtype=str(var.dtype),
                coordinates=values)
Esempio n. 18
0
    def test_get_colorbars(self):
        ctx = ServiceContext()

        response = get_color_bars(ctx, 'application/json')
        self.assertIsInstance(response, str)
        self.assertTrue(len(response) > 40)
        self.assertEqual('[\n  [\n    "Perceptually Uniform Sequenti', response[0:40])

        response = get_color_bars(ctx, 'text/html')
        self.assertIsInstance(response, str)
        self.assertTrue(len(response) > 40)
        self.assertEqual('<!DOCTYPE html>\n<html lang="en">\n<head><', response[0:40])

        with self.assertRaises(ServiceBadRequestError) as cm:
            get_color_bars(ctx, 'text/xml')
        self.assertEqual(400, cm.exception.status_code)
        self.assertEqual("Format 'text/xml' not supported for color bars", cm.exception.reason)
Esempio n. 19
0
    def test_get_ne2_tile_grid(self):
        ctx = ServiceContext()
        tile_grid = get_ne2_tile_grid(ctx, 'ol4', 'http://bibo')
        self.assertEqual({
            'url': self.base_url + '/ne2/tiles/{z}/{x}/{y}.jpg',
            'projection': 'EPSG:4326',
            'minZoom': 0,
            'maxZoom': 2,
            'tileGrid': {'extent': [-180.0, -90.0, 180.0, 90.0],
                         'origin': [-180.0, 90.0],
                         'resolutions': [0.703125, 0.3515625, 0.17578125],
                         'tileSize': [256, 256]},
        }, tile_grid)

        with self.assertRaises(ServiceBadRequestError) as cm:
            get_ne2_tile_grid(ctx, 'cesium', 'http://bibo')
        self.assertEqual(400, cm.exception.status_code)
        self.assertEqual("Unknown tile client 'cesium'", cm.exception.reason)
Esempio n. 20
0
    def test_point_ts_perf(self):
        TEST_CUBE = 'ts_test.zarr'

        if not os.path.isdir(TEST_CUBE):
            from xcube.core.new import new_cube
            cube = new_cube(time_periods=2000, variables=dict(analysed_sst=280.4))
            cube = cube.chunk(dict(time=1, lon=90, lat=90))
            cube.to_zarr(TEST_CUBE)

        ctx = ServiceContext(
            base_dir='.',
            config=dict(
                Datasets=[
                    dict(Identifier='ts_test',
                         FileSystem='file',
                         Path=TEST_CUBE,
                         Format='zarr')
                ]
            ))

        N = 5
        import random
        import time
        time_sum = 0.0
        for i in range(N):
            lon = -180 + 360 * random.random()
            lat = -90 + 180 * random.random()

            t1 = time.perf_counter()
            result = get_time_series(ctx, 'ts_test', 'analysed_sst', dict(type='Point', coordinates=[lon, lat]))
            t2 = time.perf_counter()

            self.assertIsInstance(result, list)
            self.assertEqual(2000, len(result))

            time_delta = t2 - t1
            time_sum += time_delta
            print(f'test {i + 1} took {time_delta} seconds')

        print(f'all tests took {time_sum / N} seconds in average')
Esempio n. 21
0
def get_time_series_for_point(ctx: ServiceContext,
                              ds_name: str,
                              var_name: str,
                              lon: float,
                              lat: float,
                              start_date: np.datetime64 = None,
                              end_date: np.datetime64 = None,
                              max_valids: int = None) -> Dict:
    """
    Get the time-series for a given point.

    :param ctx: Service context object
    :param ds_name: The dataset identifier.
    :param var_name: The variable name.
    :param lon: The point's longitude in decimal degrees.
    :param lat: The point's latitude in decimal degrees.
    :param start_date: An optional start date.
    :param end_date: An optional end date.
    :param max_valids: Optional number of valid points.
           If it is None (default), also missing values are returned as NaN;
           if it is -1 only valid values are returned;
           if it is a positive integer, the most recent valid values are returned.
    :return: Time-series data structure.
    """
    dataset = ctx.get_time_series_dataset(ds_name, var_name=var_name)
    with measure_time() as time_result:
        result = _get_time_series_for_point(dataset,
                                            var_name,
                                            shapely.geometry.Point(lon, lat),
                                            start_date=start_date,
                                            end_date=end_date,
                                            max_valids=max_valids)
    if ctx.trace_perf:
        LOG.info(
            f'get_time_series_for_point:: dataset id {ds_name}, variable {var_name}, '
            f'geometry type {shapely.geometry.Point(lon, lat)}, size={len(result["results"])}, '
            f'took {time_result.duration} seconds')
    return result
Esempio n. 22
0
def get_dataset(ctx: ServiceContext,
                ds_id: str,
                client=None,
                base_url: str = None) -> GeoJsonFeatureCollection:
    dataset_descriptor = ctx.get_dataset_descriptor(ds_id)

    ds_id = dataset_descriptor['Identifier']
    ds_title = dataset_descriptor['Title']
    dataset_dict = dict(id=ds_id, title=ds_title)

    ds = ctx.get_dataset(ds_id)

    if "bbox" not in dataset_dict:
        dataset_dict["bbox"] = list(get_dataset_bounds(ds))

    variable_dicts = []
    for var_name in ds.data_vars:
        var = ds.data_vars[var_name]
        dims = var.dims
        if len(dims) < 3 or dims[0] != 'time' or dims[-2] != 'lat' or dims[
                -1] != 'lon':
            continue

        variable_dict = dict(id=f'{ds_id}.{var_name}',
                             name=var_name,
                             dims=list(dims),
                             shape=list(var.shape),
                             dtype=str(var.dtype),
                             units=var.attrs.get('units', ''),
                             title=var.attrs.get(
                                 'title', var.attrs.get('long_name',
                                                        var_name)))

        if client is not None:
            tile_grid = ctx.get_tile_grid(ds_id)
            tile_xyz_source_options = get_tile_source_options(
                tile_grid,
                get_dataset_tile_url(ctx, ds_id, var_name, base_url),
                client=client)
            variable_dict["tileSourceOptions"] = tile_xyz_source_options

        cbar, vmin, vmax = ctx.get_color_mapping(ds_id, var_name)
        variable_dict["colorBarName"] = cbar
        variable_dict["colorBarMin"] = vmin
        variable_dict["colorBarMax"] = vmax

        variable_dicts.append(variable_dict)

    dataset_dict["variables"] = variable_dicts

    dim_names = ds.data_vars[list(
        ds.data_vars)[0]].dims if len(ds.data_vars) > 0 else ds.dims.keys()
    dataset_dict["dimensions"] = [
        get_dataset_coordinates(ctx, ds_id, dim_name) for dim_name in dim_names
    ]

    place_groups = ctx.get_dataset_place_groups(ds_id)
    if place_groups:
        dataset_dict["placeGroups"] = _filter_place_groups(place_groups,
                                                           del_features=True)

    return dataset_dict
Esempio n. 23
0
def get_dataset(ctx: ServiceContext,
                ds_id: str,
                client=None,
                base_url: str = None,
                granted_scopes: Set[str] = None) -> Dict:
    granted_scopes = granted_scopes or set()

    dataset_descriptor = ctx.get_dataset_descriptor(ds_id)
    ds_id = dataset_descriptor['Identifier']

    if 'read:dataset:*' not in granted_scopes:
        required_scopes = ctx.get_required_dataset_scopes(dataset_descriptor)
        assert_scopes(required_scopes, granted_scopes or set())

    ds_title = dataset_descriptor['Title']
    dataset_dict = dict(id=ds_id, title=ds_title)

    ds = ctx.get_dataset(ds_id)

    if "bbox" not in dataset_dict:
        dataset_dict["bbox"] = list(get_dataset_bounds(ds))

    variable_dicts = []
    for var_name in ds.data_vars:
        var = ds.data_vars[var_name]
        dims = var.dims
        if len(dims) < 3 or dims[0] != 'time' or dims[-2] != 'lat' or dims[
                -1] != 'lon':
            continue

        if 'read:variable:*' not in granted_scopes:
            required_scopes = ctx.get_required_variable_scopes(
                dataset_descriptor, var_name)
            if not check_scopes(required_scopes, granted_scopes):
                continue

        variable_dict = dict(id=f'{ds_id}.{var_name}',
                             name=var_name,
                             dims=list(dims),
                             shape=list(var.shape),
                             dtype=str(var.dtype),
                             units=var.attrs.get('units', ''),
                             title=var.attrs.get(
                                 'title', var.attrs.get('long_name',
                                                        var_name)))

        if client is not None:
            tile_grid = ctx.get_tile_grid(ds_id)
            tile_xyz_source_options = get_tile_source_options(
                tile_grid,
                get_dataset_tile_url(ctx, ds_id, var_name, base_url),
                client=client)
            variable_dict["tileSourceOptions"] = tile_xyz_source_options

        cmap_name, (cmap_vmin,
                    cmap_vmax) = ctx.get_color_mapping(ds_id, var_name)
        variable_dict["colorBarName"] = cmap_name
        variable_dict["colorBarMin"] = cmap_vmin
        variable_dict["colorBarMax"] = cmap_vmax

        if hasattr(var.data, '_repr_html_'):
            variable_dict["htmlRepr"] = var.data._repr_html_()

        variable_dict["attrs"] = {
            key: var.attrs[key]
            for key in sorted(list(var.attrs.keys()))
        }

        variable_dicts.append(variable_dict)

    ctx.get_rgb_color_mapping(ds_id)

    dataset_dict["variables"] = variable_dicts

    rgb_var_names, rgb_norm_ranges = ctx.get_rgb_color_mapping(ds_id)
    if any(rgb_var_names):
        rgb_schema = {'varNames': rgb_var_names, 'normRanges': rgb_norm_ranges}
        if client is not None:
            tile_grid = ctx.get_tile_grid(ds_id)
            tile_xyz_source_options = get_tile_source_options(
                tile_grid,
                get_dataset_tile_url(ctx, ds_id, 'rgb', base_url),
                client=client)
            rgb_schema["tileSourceOptions"] = tile_xyz_source_options
        dataset_dict["rgbSchema"] = rgb_schema

    dim_names = ds.data_vars[list(
        ds.data_vars)[0]].dims if len(ds.data_vars) > 0 else ds.dims.keys()
    dataset_dict["dimensions"] = [
        get_dataset_coordinates(ctx, ds_id, dim_name) for dim_name in dim_names
    ]

    dataset_dict["attrs"] = {
        key: ds.attrs[key]
        for key in sorted(list(ds.attrs.keys()))
    }

    dataset_attributions = dataset_descriptor.get(
        'DatasetAttribution', ctx.config.get('DatasetAttribution'))
    if dataset_attributions is not None:
        if isinstance(dataset_attributions, str):
            dataset_attributions = [dataset_attributions]
        dataset_dict['attributions'] = dataset_attributions

    place_groups = ctx.get_dataset_place_groups(ds_id, base_url)
    if place_groups:
        dataset_dict["placeGroups"] = _filter_place_groups(place_groups,
                                                           del_features=True)

    return dataset_dict
Esempio n. 24
0
class Service:
    """
    A web service that provides a remote API to some application.
    """

    def __init__(self,
                 application: Application,
                 prefix: str = None,
                 address: str = DEFAULT_ADDRESS,
                 port: int = DEFAULT_PORT,
                 cube_paths: List[str] = None,
                 styles: Dict[str, Tuple] = None,
                 config_file: Optional[str] = None,
                 base_dir: Optional[str] = None,
                 tile_cache_size: Optional[str] = DEFAULT_TILE_CACHE_SIZE,
                 tile_comp_mode: int = DEFAULT_TILE_COMP_MODE,
                 update_period: Optional[float] = DEFAULT_UPDATE_PERIOD,
                 trace_perf: bool = DEFAULT_TRACE_PERF,
                 log_file_prefix: str = DEFAULT_LOG_PREFIX,
                 log_to_stderr: bool = False,
                 aws_prof: str = None,
                 aws_env: bool = False) -> None:

        """
        Start a tile service.

        The *service_info_file*, if given, represents the service in the filesystem, similar to
        the ``/var/run/`` directory on Linux systems.

        If the service file exist and its information is compatible with the requested *port*, *address*, *caller*, then
        this function simply returns without taking any other actions.

        :param application: The Tornado web application
        :param address: the address
        :param port: the port number
        :param cube_paths: optional list of cube paths
        :param config_file: optional configuration file
        :param base_dir: optional base directory
        :param update_period: if not-None, time of idleness in seconds before service is updated
        :param log_file_prefix: Log file prefix, default is "xcube-serve.log"
        :param log_to_stderr: Whether logging should be shown on stderr
        :return: service information dictionary
        """
        if config_file and cube_paths:
            raise ValueError("config_file and cube_paths cannot be given both")
        if config_file and styles:
            raise ValueError("config_file and styles cannot be given both")
        if config_file and aws_prof:
            raise ValueError("config_file and aws_profile cannot be given both")
        if config_file and aws_env:
            raise ValueError("config_file and aws_env cannot be given both")

        global SNAP_CPD_LIST
        if config_file:
            SNAP_CPD_LIST = _get_custom_color_list(config_file)

        log_dir = os.path.dirname(log_file_prefix)
        if log_dir and not os.path.isdir(log_dir):
            os.makedirs(log_dir, exist_ok=True)

        options = tornado.options.options
        options.log_file_prefix = log_file_prefix or DEFAULT_LOG_PREFIX
        options.log_to_stderr = log_to_stderr
        enable_pretty_logging()

        tile_cache_capacity = parse_mem_size(tile_cache_size)

        config = None
        if cube_paths:
            config = new_default_config(cube_paths, styles, aws_prof=aws_prof, aws_env=aws_env)

        self.config_file = os.path.abspath(config_file) if config_file else None
        self.update_period = update_period
        self.update_timer = None
        self.config_error = None
        self.service_info = dict(port=port,
                                 address=address,
                                 started=datetime.now().isoformat(sep=' '),
                                 pid=os.getpid())

        self.context = ServiceContext(prefix=prefix,
                                      config=config,
                                      base_dir=base_dir,
                                      trace_perf=trace_perf,
                                      tile_comp_mode=tile_comp_mode,
                                      tile_cache_capacity=tile_cache_capacity)
        self._maybe_load_config()

        application.service_context = self.context
        application.time_of_last_activity = time.process_time()
        self.application = application

        # Register handlers for common termination signals
        signal.signal(signal.SIGINT, self._sig_handler)
        signal.signal(signal.SIGTERM, self._sig_handler)

        self.server = application.listen(port, address=address or 'localhost')
        # Ensure we have the same event loop in all threads
        asyncio.set_event_loop_policy(_GlobalEventLoopPolicy(asyncio.get_event_loop()))
        self._maybe_load_config()
        self._maybe_install_update_check()
        self._shutdown_requested = False

    def start(self):
        address = self.service_info['address']
        port = self.service_info['port']
        test_url = self.context.get_service_url(f"http://{address}:{port}", "datasets")
        LOG.info(f'service running, listening on {address}:{port}, try {test_url}')
        LOG.info(f'press CTRL+C to stop service')
        if not self.context.config.get('Datasets', []) \
                and not self.context.config.get('DataStores', []):
            LOG.warning('no datasets or data stores configured')
        tornado.ioloop.PeriodicCallback(self._try_shutdown, 100).start()
        IOLoop.current().start()

    def stop(self, kill=False):
        """
        Stops the Tornado web server.
        """
        if kill:
            sys.exit(0)
        else:
            IOLoop.current().add_callback(self._on_shutdown)

    def _on_shutdown(self):

        LOG.info('stopping service...')

        # noinspection PyUnresolvedReferences,PyBroadException
        try:
            self.update_timer.cancel()
        except Exception:
            pass

        if self.server:
            self.server.stop()
            self.server = None

        IOLoop.current().stop()
        LOG.info('service stopped.')

    def _try_shutdown(self):
        if self._shutdown_requested:
            self._on_shutdown()

    # noinspection PyUnusedLocal
    def _sig_handler(self, sig, frame):
        LOG.warning(f'caught signal {sig}')
        self._shutdown_requested = True

    def _maybe_install_update_check(self):
        if self.config_file is None or self.update_period is None or self.update_period <= 0:
            return
        IOLoop.current().call_later(self.update_period, self._maybe_check_for_updates)

    def _maybe_check_for_updates(self):
        self._maybe_load_config()
        self._maybe_install_update_check()

    def _maybe_load_config(self):
        config_file = self.config_file
        if config_file is None:
            return

        try:
            stat = os.stat(config_file)
        except OSError as e:
            if self.config_error is None:
                LOG.error(f'configuration file {config_file!r}: {e}')
                self.config_error = e
            return

        if self.context.config_mtime != stat.st_mtime:
            self.context.config_mtime = stat.st_mtime
            try:
                self.context.config = load_json_or_yaml_config(config_file)
                self.config_error = None
                LOG.info(f'configuration file {config_file!r} successfully loaded')
            except ValueError as e:
                if self.config_error is None:
                    LOG.error(f'configuration file {config_file!r}: {e}')
                    self.config_error = e
Esempio n. 25
0
def get_datasets(ctx: ServiceContext,
                 details: bool = False,
                 client: str = None,
                 point: Tuple[float, float] = None,
                 base_url: str = None,
                 granted_scopes: Set[str] = None) -> Dict:
    granted_scopes = granted_scopes or set()

    dataset_descriptors = ctx.get_dataset_descriptors()

    dataset_dicts = list()
    for dataset_descriptor in dataset_descriptors:

        ds_id = dataset_descriptor['Identifier']

        if dataset_descriptor.get('Hidden'):
            continue

        if 'read:dataset:*' not in granted_scopes:
            required_scopes = ctx.get_required_dataset_scopes(
                dataset_descriptor)
            is_substitute = dataset_descriptor.get('AccessControl', {}).get(
                'IsSubstitute', False)
            if not check_scopes(required_scopes,
                                granted_scopes,
                                is_substitute=is_substitute):
                continue

        dataset_dict = dict(id=ds_id)

        if 'Title' in dataset_descriptor:
            ds_title = dataset_descriptor['Title']
            if ds_title and isinstance(ds_title, str):
                dataset_dict['title'] = ds_title
            else:
                dataset_dict['title'] = ds_id

        if 'BoundingBox' in dataset_descriptor:
            ds_bbox = dataset_descriptor['BoundingBox']
            if ds_bbox \
                    and len(ds_bbox) == 4 \
                    and all(map(lambda c: isinstance(c, float) or isinstance(c, int), ds_bbox)):
                dataset_dict['bbox'] = ds_bbox

        dataset_dicts.append(dataset_dict)

    if details or point:
        for dataset_dict in dataset_dicts:
            ds_id = dataset_dict["id"]
            if point:
                ds = ctx.get_dataset(ds_id)
                if "bbox" not in dataset_dict:
                    dataset_dict["bbox"] = list(get_dataset_bounds(ds))
            if details:
                dataset_dict.update(
                    get_dataset(ctx,
                                ds_id,
                                client,
                                base_url,
                                granted_scopes=granted_scopes))

    if point:
        is_point_in_dataset_bbox = functools.partial(_is_point_in_dataset_bbox,
                                                     point)
        # noinspection PyTypeChecker
        dataset_dicts = list(filter(is_point_in_dataset_bbox, dataset_dicts))

    return dict(datasets=dataset_dicts)
Esempio n. 26
0
def get_ne2_tile_url(ctx: ServiceContext, base_url: str):
    return ctx.get_service_url(base_url, 'ne2', 'tiles', '{z}/{x}/{y}.jpg')
Esempio n. 27
0
def get_time_series(
    ctx: ServiceContext,
    ds_name: str,
    var_name: str,
    geo_json: GeoJsonObj,
    agg_methods: Union[str, Sequence[str]] = None,
    start_date: np.datetime64 = None,
    end_date: np.datetime64 = None,
    max_valids: int = None,
    incl_ancillary_vars: bool = False
) -> Union[TimeSeries, TimeSeriesCollection]:
    """
    Get the time-series for a given GeoJSON object *geo_json*.

    If *geo_json* is a single geometry or feature a list of time-series values is returned.
    If *geo_json* is a single geometry collection or feature collection a collection of lists of time-series values
    is returned so that geometry/feature collection and time-series collection elements are corresponding
    at same indices.

    A time series value always contains the key "time" whose value is an ISO date/time string. Other entries
    are values with varying keys depending on the geometry type and *agg_methods*. Their values may be
    either a bool, int, float or None.
    For point geometries the second key is "value".
    For non-point geometries that cover spatial areas, there will be values for all keys given by *agg_methods*.

    :param ctx: Service context object
    :param ds_name: The dataset identifier.
    :param var_name: The variable name.
    :param geo_json: The GeoJSON object that is a or has a geometry or collection of geometryies.
    :param start_date: An optional start date.
    :param end_date: An optional end date.
    :param agg_methods: Spatial aggregation methods for geometries that cover a spatial area.
    :param max_valids: Optional number of valid points.
           If it is None (default), also missing values are returned as NaN;
           if it is -1 only valid values are returned;
           if it is a positive integer, the most recent valid values are returned.
    :param incl_ancillary_vars: For point geometries, include values of ancillary variables, if any.
    :return: Time-series data structure.
    """
    agg_methods = timeseries.normalize_agg_methods(
        agg_methods, exception_type=ServiceBadRequestError)

    dataset = ctx.get_time_series_dataset(ds_name, var_name=var_name)
    geo_json_geometries, is_collection = _to_geo_json_geometries(geo_json)
    geometries = _to_shapely_geometries(geo_json_geometries)

    with measure_time() as time_result:
        results = _get_time_series_for_geometries(
            dataset,
            var_name,
            geometries,
            start_date=start_date,
            end_date=end_date,
            agg_methods=agg_methods,
            max_valids=max_valids,
            incl_ancillary_vars=incl_ancillary_vars)

    if ctx.trace_perf:
        LOG.info(
            f'get_time_series: dataset id {ds_name}, variable {var_name}, '
            f'{len(results)} x {len(results[0])} values, took {time_result.duration} seconds'
        )

    return results[0] if not is_collection and len(results) == 1 else results
Esempio n. 28
0
def get_dataset_tile(ctx: ServiceContext, ds_id: str, var_name: str, x: str,
                     y: str, z: str, params: RequestParams):
    x = RequestParams.to_int('x', x)
    y = RequestParams.to_int('y', y)
    z = RequestParams.to_int('z', z)

    tile_comp_mode = params.get_query_argument_int('mode', ctx.tile_comp_mode)
    trace_perf = params.get_query_argument_int('debug', ctx.trace_perf) != 0

    ml_dataset = ctx.get_ml_dataset(ds_id)
    if var_name == 'rgb':
        norm_vmin = params.get_query_argument_float('vmin', default=0.0)
        norm_vmax = params.get_query_argument_float('vmax', default=1.0)
        var_names, norm_ranges = ctx.get_rgb_color_mapping(
            ds_id, norm_range=(norm_vmin, norm_vmax))
        components = ('r', 'g', 'b')
        for i in range(3):
            c = components[i]
            var_names[i] = params.get_query_argument(c, default=var_names[i])
            norm_ranges[i] = params.get_query_argument_float(f'{c}vmin', default=norm_ranges[i][0]), \
                             params.get_query_argument_float(f'{c}vmax', default=norm_ranges[i][1])
        cmap_name = tuple(var_names)
        cmap_range = tuple(norm_ranges)
        for name in var_names:
            if name and name not in ml_dataset.base_dataset:
                raise ServiceBadRequestError(
                    f'Variable {name!r} not found in dataset {ds_id!r}')
        var = None
        for name in var_names:
            if name and name in ml_dataset.base_dataset:
                var = ml_dataset.base_dataset[name]
                break
        if var is None:
            raise ServiceBadRequestError(
                f'No variable in dataset {ds_id!r} specified for RGB')
    else:
        cmap_name = params.get_query_argument('cbar', default=None)
        cmap_vmin = params.get_query_argument_float('vmin', default=None)
        cmap_vmax = params.get_query_argument_float('vmax', default=None)
        if cmap_name is None or cmap_vmin is None or cmap_vmax is None:
            default_cmap_name, (default_cmap_vmin,
                                default_cmap_vmax) = ctx.get_color_mapping(
                                    ds_id, var_name)
            cmap_name = cmap_name or default_cmap_name
            cmap_vmin = cmap_vmin or default_cmap_vmin
            cmap_vmax = cmap_vmax or default_cmap_vmax
        cmap_range = cmap_vmin, cmap_vmax
        if var_name not in ml_dataset.base_dataset:
            raise ServiceBadRequestError(
                f'Variable {var_name!r} not found in dataset {ds_id!r}')
        var = ml_dataset.base_dataset[var_name]

    labels = parse_non_spatial_labels(params.get_query_arguments(),
                                      var.dims,
                                      var.coords,
                                      allow_slices=False,
                                      exception_type=ServiceBadRequestError)

    return get_ml_dataset_tile(ml_dataset,
                               var_name,
                               x,
                               y,
                               z,
                               labels=labels,
                               cmap_name=cmap_name,
                               cmap_range=cmap_range,
                               image_cache=ctx.image_cache,
                               tile_cache=ctx.tile_cache,
                               tile_comp_mode=tile_comp_mode,
                               trace_perf=trace_perf,
                               exception_type=ServiceBadRequestError)
Esempio n. 29
0
    def __init__(self,
                 application: Application,
                 prefix: str = None,
                 address: str = DEFAULT_ADDRESS,
                 port: int = DEFAULT_PORT,
                 cube_paths: List[str] = None,
                 styles: Dict[str, Tuple] = None,
                 config_file: Optional[str] = None,
                 base_dir: Optional[str] = None,
                 tile_cache_size: Optional[str] = DEFAULT_TILE_CACHE_SIZE,
                 tile_comp_mode: int = DEFAULT_TILE_COMP_MODE,
                 update_period: Optional[float] = DEFAULT_UPDATE_PERIOD,
                 trace_perf: bool = DEFAULT_TRACE_PERF,
                 log_file_prefix: str = DEFAULT_LOG_PREFIX,
                 log_to_stderr: bool = False,
                 aws_prof: str = None,
                 aws_env: bool = False) -> None:

        """
        Start a tile service.

        The *service_info_file*, if given, represents the service in the filesystem, similar to
        the ``/var/run/`` directory on Linux systems.

        If the service file exist and its information is compatible with the requested *port*, *address*, *caller*, then
        this function simply returns without taking any other actions.

        :param application: The Tornado web application
        :param address: the address
        :param port: the port number
        :param cube_paths: optional list of cube paths
        :param config_file: optional configuration file
        :param base_dir: optional base directory
        :param update_period: if not-None, time of idleness in seconds before service is updated
        :param log_file_prefix: Log file prefix, default is "xcube-serve.log"
        :param log_to_stderr: Whether logging should be shown on stderr
        :return: service information dictionary
        """
        if config_file and cube_paths:
            raise ValueError("config_file and cube_paths cannot be given both")
        if config_file and styles:
            raise ValueError("config_file and styles cannot be given both")
        if config_file and aws_prof:
            raise ValueError("config_file and aws_profile cannot be given both")
        if config_file and aws_env:
            raise ValueError("config_file and aws_env cannot be given both")

        global SNAP_CPD_LIST
        if config_file:
            SNAP_CPD_LIST = _get_custom_color_list(config_file)

        log_dir = os.path.dirname(log_file_prefix)
        if log_dir and not os.path.isdir(log_dir):
            os.makedirs(log_dir, exist_ok=True)

        options = tornado.options.options
        options.log_file_prefix = log_file_prefix or DEFAULT_LOG_PREFIX
        options.log_to_stderr = log_to_stderr
        enable_pretty_logging()

        tile_cache_capacity = parse_mem_size(tile_cache_size)

        config = None
        if cube_paths:
            config = new_default_config(cube_paths, styles, aws_prof=aws_prof, aws_env=aws_env)

        self.config_file = os.path.abspath(config_file) if config_file else None
        self.update_period = update_period
        self.update_timer = None
        self.config_error = None
        self.service_info = dict(port=port,
                                 address=address,
                                 started=datetime.now().isoformat(sep=' '),
                                 pid=os.getpid())

        self.context = ServiceContext(prefix=prefix,
                                      config=config,
                                      base_dir=base_dir,
                                      trace_perf=trace_perf,
                                      tile_comp_mode=tile_comp_mode,
                                      tile_cache_capacity=tile_cache_capacity)
        self._maybe_load_config()

        application.service_context = self.context
        application.time_of_last_activity = time.process_time()
        self.application = application

        # Register handlers for common termination signals
        signal.signal(signal.SIGINT, self._sig_handler)
        signal.signal(signal.SIGTERM, self._sig_handler)

        self.server = application.listen(port, address=address or 'localhost')
        # Ensure we have the same event loop in all threads
        asyncio.set_event_loop_policy(_GlobalEventLoopPolicy(asyncio.get_event_loop()))
        self._maybe_load_config()
        self._maybe_install_update_check()
        self._shutdown_requested = False
Esempio n. 30
0
def get_dataset_tile_url(ctx: ServiceContext, ds_id: str, var_name: str,
                         base_url: str):
    return ctx.get_service_url(base_url, 'datasets', ds_id, 'vars', var_name,
                               'tiles', '{z}/{x}/{y}.png')