コード例 #1
0
    def to_dict(self) -> dict:
        """
        Convert this hypercube into a dictionary that can be converted into
        a valid JSON representation

        >>> example = {
        ...     "id": "test_data",
        ...     "data": [
        ...         [[0.0, 0.1], [0.2, 0.3]],
        ...         [[0.0, 0.1], [0.2, 0.3]],
        ...     ],
        ...     "dimension": [
        ...         {"name": "time", "coordinates": ["2001-01-01", "2001-01-02"]},
        ...         {"name": "X", "coordinates": [50.0, 60.0]},
        ...         {"name": "Y"},
        ...     ],
        ... }
        """
        xd = self._array.to_dict()
        return dict_no_none({
            "id": xd.get("name"),
            "data": xd.get("data"),
            "description": deep_get(xd, "attrs", "description", default=None),
            "dimensions": [
                dict_no_none(
                    name=dim,
                    coordinates=deep_get(xd, "coords", dim, "data", default=None)
                )
                for dim in xd.get("dims", [])
            ]
        })
コード例 #2
0
 def get(self, *keys, default=None) -> Union[dict, str, int]:
     """Load JSON file and do deep get with given keys."""
     result = deep_get(self.load(), *keys, default=default)
     if isinstance(result, Exception) or (isinstance(result, type) and issubclass(result, Exception)):
         # pylint: disable=raising-bad-type
         raise result
     return result
コード例 #3
0
def test_layer_metadata(id, layer):
    # TODO: move/copy to openeo-deploy project?
    assert "bands" not in layer
    assert deep_get(layer, "properties", "cube:dimensions",
                    default=None) is None
    assert deep_get(layer, "properties", "eo:bands", default=None) is None
    eo_bands = [
        b["name"] for b in deep_get(layer, "summaries", 'eo:bands', default=[])
    ]
    cube_dimension_bands = []
    for cube_dim in layer.get("cube:dimensions", {}).values():
        if cube_dim["type"] == "bands":
            cube_dimension_bands = cube_dim["values"]
    if eo_bands:
        assert eo_bands == cube_dimension_bands

    def valid_bbox(bbox):
        return len(bbox) == 4 and bbox[0] <= bbox[2] and bbox[1] <= bbox[3]

    assert schema.Schema({
        "spatial": {
            "bbox": [schema.And([schema.Or(int, float)], valid_bbox)]
        },
        "temporal": {
            "interval": [[schema.Or(str, None)]]
        }
    }).validate(layer["extent"])

    gps_metadata = GeopysparkCubeMetadata(layer)
    gps_metadata = gps_metadata.filter_bands([cube_dimension_bands[0]])
    titles = gps_metadata.opensearch_link_titles
    if gps_metadata.band_dimension.band_aliases[0] is not None and len(
            gps_metadata.band_dimension.band_aliases[0]) > 0:
        assert titles[0] == gps_metadata.band_dimension.band_aliases[0][0]
    else:
        assert titles[0] == cube_dimension_bands[0]
コード例 #4
0
 def get(self, *args, default=None):
     return deep_get(self._orig_metadata, *args, default=default)
コード例 #5
0
    def _parse_dimensions(
            cls,
            spec: dict,
            complain: Callable[[str],
                               None] = warnings.warn) -> List[Dimension]:
        """
        Extract data cube dimension metadata from STAC-like description of a collection.

        Dimension metadata comes from different places in spec:
        - 'cube:dimensions' has dimension names (e.g. 'x', 'y', 't'), dimension extent info
            and band names for band dimensions
        - 'eo:bands' has more detailed band information like "common" name and wavelength info

        This helper tries to normalize/combine these sources.

        :param spec: STAC like collection metadata dict
        :param complain: handler for warnings
        :return list: list of `Dimension` objects

        """

        # Dimension info is in `cube:dimensions` (or 0.4-style `properties/cube:dimensions`)
        cube_dimensions = (
            deep_get(spec, 'cube:dimensions', default=None)
            or deep_get(spec, 'properties', 'cube:dimensions', default=None)
            or {})
        if not cube_dimensions:
            complain("No cube:dimensions metadata")
        dimensions = []
        for name, info in cube_dimensions.items():
            dim_type = info.get("type")
            if dim_type == "spatial":
                dimensions.append(
                    SpatialDimension(
                        name=name,
                        extent=info.get("extent"),
                        crs=info.get("reference_system",
                                     SpatialDimension.DEFAULT_CRS)))
            elif dim_type == "temporal":
                dimensions.append(
                    TemporalDimension(name=name, extent=info.get("extent")))
            elif dim_type == "bands":
                bands = [Band(b, None, None) for b in info.get("values", [])]
                if not bands:
                    complain("No band names in dimension {d!r}".format(d=name))
                dimensions.append(BandDimension(name=name, bands=bands))
            else:
                complain("Unknown dimension type {t!r}".format(t=dim_type))
                dimensions.append(Dimension(name=name, type=dim_type))

        # Detailed band information: `summaries/eo:bands` (and 0.4 style `properties/eo:bands`)
        eo_bands = (deep_get(spec, "summaries", "eo:bands", default=None)
                    or deep_get(spec, "properties", "eo:bands", default=None))
        if eo_bands:
            # center_wavelength is in micrometer according to spec
            bands_detailed = [
                Band(b['name'], b.get('common_name'),
                     b.get('center_wavelength')) for b in eo_bands
            ]
            # Update band dimension with more detailed info
            band_dimensions = [d for d in dimensions if d.type == "bands"]
            if len(band_dimensions) == 1:
                dim = band_dimensions[0]
                # Update band values from 'cube:dimensions' with more detailed 'eo:bands' info
                eo_band_names = [b.name for b in bands_detailed]
                cube_dimension_band_names = [b.name for b in dim.bands]
                if eo_band_names == cube_dimension_band_names:
                    dim.bands = bands_detailed
                else:
                    complain("Band name mismatch: {a} != {b}".format(
                        a=cube_dimension_band_names, b=eo_band_names))
            elif len(band_dimensions) == 0:
                if len(dimensions) == 0:
                    complain(
                        "Assuming name 'bands' for anonymous band dimension.")
                    dimensions.append(
                        BandDimension(name="bands", bands=bands_detailed))
                else:
                    complain(
                        "No 'bands' dimension in 'cube:dimensions' while having 'eo:bands'"
                    )
            else:
                complain("Multiple dimensions of type 'bands'")

        return dimensions
コード例 #6
0
def test_deep_get_mixed():
    d = {
        "foo": (11, [222, 33], {"z": 42, -4: 44}),
        "bar": [{"a": [5, 8]}, {"b": ("ar", 6, 8)}]
    }
    assert deep_get(d, "foo", 0) == 11
    assert deep_get(d, "foo", 1) == [222, 33]
    assert deep_get(d, "foo", 1, 0) == 222
    assert deep_get(d, "foo", 1, 1) == 33
    assert deep_get(d, "foo", 2, "z") == 42
    assert deep_get(d, "foo", 2, -4) == 44
    with pytest.raises(DeepKeyError, match=re.escape("-4 (from deep key ('foo', -4))")):
        deep_get(d, "foo", -4)
    with pytest.raises(DeepKeyError, match=re.escape("10 (from deep key ('foo', 10))")):
        deep_get(d, "foo", 10)
    assert deep_get(d, "bar", 0, "a", 1) == 8
    assert deep_get(d, "bar", 1, "b", 0) == "ar"
    with pytest.raises(DeepKeyError, match=re.escape("2 (from deep key ('bar', 2, 22, 222))")):
        deep_get(d, "bar", 2, 22, 222)
コード例 #7
0
def test_deep_get_dict():
    d = {
        "foo": "bar",
        "dims": {"x": 5, "y": {"amount": 3, "unit": "cm"}},
        "conversions": {4: 2, 6: {9: 3, 99: 7}},
    }
    assert deep_get(d, "foo") == "bar"
    with pytest.raises(DeepKeyError, match=re.escape("1 (from deep key ('foo', 1))")):
        deep_get(d, "foo", 1)
    with pytest.raises(DeepKeyError, match=re.escape("'bar' (from deep key ('bar',))")):
        deep_get(d, "bar")
    assert deep_get(d, "dims") == {"x": 5, "y": {"amount": 3, "unit": "cm"}}
    assert deep_get(d, "dims", "x") == 5
    with pytest.raises(DeepKeyError, match=re.escape("'unit' (from deep key ('dims', 'x', 'unit'))")):
        deep_get(d, "dims", "x", "unit")
    assert deep_get(d, "dims", "x", "unit", default="cm") == "cm"
    assert deep_get(d, "dims", "y", "amount") == 3
    assert deep_get(d, "dims", "y", "unit") == "cm"
    assert deep_get(d, "conversions", 4) == 2
    assert deep_get(d, "conversions", 6, 99) == 7
コード例 #8
0
def _normalize_collection_metadata(metadata: dict, api_version: ComparableVersion, full=False) -> dict:
    """
    Make sure the given collection metadata roughly complies to desirec version of OpenEO spec.
    """
    # Make copy and remove all "private" fields
    metadata = copy.deepcopy(metadata)
    metadata = {k: v for (k, v) in metadata.items() if not k.startswith('_')}

    # Metadata should at least contain an id.
    if "id" not in metadata:
        _log.error("Collection metadata should have 'id' field: {m!r}".format(m=metadata))
        raise KeyError("id")
    collection_id = metadata["id"]

    # Version dependent metadata conversions
    cube_dims_100 = deep_get(metadata, "cube:dimensions", default=None)
    cube_dims_040 = deep_get(metadata, "properties", "cube:dimensions", default=None)
    eo_bands_100 = deep_get(metadata, "summaries", "eo:bands", default=None)
    eo_bands_040 = deep_get(metadata, "properties", "eo:bands", default=None)
    if api_version.below("1.0.0"):
        if full and not cube_dims_040 and cube_dims_100:
            metadata.setdefault("properties", {})
            metadata["properties"]["cube:dimensions"] = cube_dims_100
        if full and not eo_bands_040 and eo_bands_100:
            metadata.setdefault("properties", {})
            metadata["properties"]["eo:bands"] = eo_bands_100
    else:
        if full and not cube_dims_100 and cube_dims_040:
            _log.warning("Collection metadata 'cube:dimensions' in API 0.4 style instead of 1.0 style")
            metadata["cube:dimensions"] = cube_dims_040
        if full and not eo_bands_100 and eo_bands_040:
            _log.warning("Collection metadata 'eo:bands' in API 0.4 style instead of 1.0 style")
            metadata.setdefault("summaries", {})
            metadata["summaries"]["eo:bands"] = eo_bands_040

    # Make sure some required fields are set.
    metadata.setdefault("stac_version", "0.9.0" if api_version.at_least("1.0.0") else "0.6.2")
    metadata.setdefault("links", [])
    metadata.setdefault("description", collection_id)
    metadata.setdefault("license", "proprietary")
    # Warn about missing fields where simple defaults are not feasible.
    fallbacks = {
        "extent": {"spatial": [0, 0, 0, 0], "temporal": [None, None]},
    }
    if full:
        if api_version.at_least("1.0.0"):
            fallbacks["cube:dimensions"] = {}
            fallbacks["summaries"] = {}
        else:
            fallbacks["properties"] = {}
            fallbacks["other_properties"] = {}

    for key, value in fallbacks.items():
        if key not in metadata:
            _log.warning("Collection {c!r} metadata does not have field {k!r}.".format(c=collection_id, k=key))
            metadata[key] = value

    if not full:
        basic_keys = [
            "stac_version", "stac_extensions", "id", "title", "description", "keywords", "version",
            "deprecated", "license", "providers", "extent", "links"
        ]
        metadata = {k: v for k, v in metadata.items() if k in basic_keys}

    return metadata
コード例 #9
0
def _merge_layers_with_common_name(metadata):
    common_names = set(
        map(lambda f: f["common_name"],
            filter(lambda m: m.get("common_name"), metadata.values())))
    for common_name in common_names:
        common_name_metadatas = list(
            filter(lambda c: c.get("common_name") == common_name,
                   metadata.values()))
        default_metadata = next(
            filter(
                lambda m: deep_get(m,
                                   "_vito",
                                   "data_source",
                                   "default_provider:backend",
                                   default=False), common_name_metadatas))
        default_metadata = default_metadata or common_name_metadatas[0]
        new_metadata = deepcopy(default_metadata)
        default_metadata["_vito"]["data_source"].pop(
            "default_provider:backend", None)
        new_metadata["_vito"]["data_source"]["provider:backend"] = [
            new_metadata["_vito"]["data_source"]["provider:backend"]
        ]
        for common_name_metadata in common_name_metadatas:
            if not common_name_metadata["id"] == new_metadata["id"]:
                new_metadata["_vito"]["data_source"]["provider:backend"] += [
                    common_name_metadata["_vito"]["data_source"]
                    ["provider:backend"]
                ]
                new_metadata["providers"] += common_name_metadata["providers"]
                new_metadata["links"] += common_name_metadata["links"]
                for b in common_name_metadata["cube:dimensions"]["bands"][
                        "values"]:
                    if b not in new_metadata["cube:dimensions"]["bands"][
                            "values"]:
                        new_metadata["cube:dimensions"]["bands"]["values"] += [
                            b
                        ]
                        new_metadata["summaries"]["eo:bands"] += list(
                            filter(
                                lambda m: m["name"] == b,
                                common_name_metadata["summaries"]["eo:bands"]))
                    else:
                        new_metadata_band = next(
                            filter(lambda m: m["name"] == b,
                                   new_metadata["summaries"]["eo:bands"]))
                        common_metadata_band = next(
                            filter(
                                lambda m: m["name"] == b,
                                common_name_metadata["summaries"]["eo:bands"]))
                        new_metadata_band["aliases"] = (new_metadata_band.get("aliases") or []) + \
                                                       (common_metadata_band.get("aliases") or [])

                new_metadata_spatial_extent = new_metadata["extent"][
                    "spatial"]["bbox"]
                common_name_metadata_spatial_extent = common_name_metadata[
                    "extent"]["spatial"]["bbox"]
                new_metadata["extent"]["spatial"]["bbox"] = [[
                    min(new_metadata_spatial_extent[0][0],
                        common_name_metadata_spatial_extent[0][0]),
                    min(new_metadata_spatial_extent[0][1],
                        common_name_metadata_spatial_extent[0][1]),
                    max(new_metadata_spatial_extent[0][2],
                        common_name_metadata_spatial_extent[0][2]),
                    max(new_metadata_spatial_extent[0][3],
                        common_name_metadata_spatial_extent[0][3])
                ]]
                new_metadata_temporal_extent = new_metadata["extent"][
                    "temporal"]["interval"]
                common_name_metadata_temporal_extent = common_name_metadata[
                    "extent"]["temporal"]["interval"]
                default_date = datetime(2017, 1, 1, tzinfo=tzutc())
                new_start = min(
                    dp.parse(new_metadata_temporal_extent[0][0],
                             default=default_date),
                    dp.parse(common_name_metadata_temporal_extent[0][0],
                             default=default_date)).isoformat()
                if not new_metadata_temporal_extent[0][1]:
                    new_end = common_name_metadata_temporal_extent[0][1]
                elif not common_name_metadata_temporal_extent[0][1]:
                    new_end = new_metadata_temporal_extent[0][1]
                else:
                    new_end = max(
                        dp.parse(new_metadata_temporal_extent[0][1],
                                 default=default_date),
                        dp.parse(common_name_metadata_temporal_extent[0][1],
                                 default=default_date))
                if new_end:
                    new_end = new_end.isoformat()
                new_metadata["extent"]["temporal"]["interval"] = [[
                    new_start, new_end
                ]]

        new_metadata["id"] = common_name

        metadata[common_name] = new_metadata

    return metadata
コード例 #10
0
def get_layer_catalog(opensearch_enrich=False) -> GeoPySparkLayerCatalog:
    """
    Get layer catalog (from JSON files)
    """
    metadata: Dict[str, dict] = {}

    def read_catalog_file(catalog_file) -> Dict[str, dict]:
        return {coll["id"]: coll for coll in read_json(catalog_file)}

    catalog_files = ConfigParams().layer_catalog_metadata_files
    for path in catalog_files:
        logger.info(f"Reading layer catalog metadata from {path}")
        metadata = dict_merge_recursive(metadata,
                                        read_catalog_file(path),
                                        overwrite=True)

    if opensearch_enrich:
        opensearch_metadata = {}
        sh_collection_metadatas = None
        opensearch_instances = {}

        def opensearch_instance(endpoint: str) -> OpenSearch:
            endpoint = endpoint.lower()
            opensearch = opensearch_instances.get(os_endpoint)

            if opensearch is not None:
                return opensearch

            if "oscars" in endpoint or "terrascope" in endpoint or "vito.be" in endpoint:
                opensearch = OpenSearchOscars(endpoint=endpoint)
            elif "creodias" in endpoint:
                opensearch = OpenSearchCreodias(endpoint=endpoint)
            else:
                raise ValueError(endpoint)

            opensearch_instances[endpoint] = opensearch
            return opensearch

        for cid, collection_metadata in metadata.items():
            data_source = deep_get(collection_metadata,
                                   "_vito",
                                   "data_source",
                                   default={})
            os_cid = data_source.get("opensearch_collection_id")
            if os_cid:
                os_endpoint = data_source.get(
                    "opensearch_endpoint") or ConfigParams(
                    ).default_opensearch_endpoint
                logger.info(
                    f"Updating {cid} metadata from {os_endpoint}:{os_cid}")
                try:
                    opensearch_metadata[cid] = opensearch_instance(
                        os_endpoint).get_metadata(collection_id=os_cid)
                except Exception:
                    logger.warning(traceback.format_exc())
            elif data_source.get("type") == "sentinel-hub":
                sh_cid = data_source.get("collection_id")

                if sh_cid is None:
                    continue

                try:
                    sh_stac_endpoint = "https://collections.eurodatacube.com/stac/index.json"

                    if sh_collection_metadatas is None:
                        sh_collections = requests.get(sh_stac_endpoint).json()
                        sh_collection_metadatas = [
                            requests.get(c["link"]).json()
                            for c in sh_collections
                        ]

                    sh_metadata = next(
                        filter(lambda m: m["datasource_type"] == sh_cid,
                               sh_collection_metadatas))
                    logger.info(
                        f"Updating {cid} metadata from {sh_stac_endpoint}:{sh_metadata['id']}"
                    )
                    opensearch_metadata[cid] = sh_metadata
                    if not data_source.get("endpoint"):
                        endpoint = opensearch_metadata[cid]["providers"][0][
                            "url"]
                        endpoint = endpoint if endpoint.startswith(
                            "http") else "https://{}".format(endpoint)
                        data_source["endpoint"] = endpoint
                    data_source["dataset_id"] = data_source.get(
                        "dataset_id"
                    ) or opensearch_metadata[cid]["datasource_type"]
                except StopIteration:
                    logger.warning(
                        f"No STAC data available for collection with id {sh_cid}"
                    )

        if opensearch_metadata:
            metadata = dict_merge_recursive(opensearch_metadata,
                                            metadata,
                                            overwrite=True)

    metadata = _merge_layers_with_common_name(metadata)

    return GeoPySparkLayerCatalog(all_metadata=list(metadata.values()))