def to_dict(self) -> dict: """ Convert this hypercube into a dictionary that can be converted into a valid JSON representation >>> example = { ... "id": "test_data", ... "data": [ ... [[0.0, 0.1], [0.2, 0.3]], ... [[0.0, 0.1], [0.2, 0.3]], ... ], ... "dimension": [ ... {"name": "time", "coordinates": ["2001-01-01", "2001-01-02"]}, ... {"name": "X", "coordinates": [50.0, 60.0]}, ... {"name": "Y"}, ... ], ... } """ xd = self._array.to_dict() return dict_no_none({ "id": xd.get("name"), "data": xd.get("data"), "description": deep_get(xd, "attrs", "description", default=None), "dimensions": [ dict_no_none( name=dim, coordinates=deep_get(xd, "coords", dim, "data", default=None) ) for dim in xd.get("dims", []) ] })
def get(self, *keys, default=None) -> Union[dict, str, int]: """Load JSON file and do deep get with given keys.""" result = deep_get(self.load(), *keys, default=default) if isinstance(result, Exception) or (isinstance(result, type) and issubclass(result, Exception)): # pylint: disable=raising-bad-type raise result return result
def test_layer_metadata(id, layer): # TODO: move/copy to openeo-deploy project? assert "bands" not in layer assert deep_get(layer, "properties", "cube:dimensions", default=None) is None assert deep_get(layer, "properties", "eo:bands", default=None) is None eo_bands = [ b["name"] for b in deep_get(layer, "summaries", 'eo:bands', default=[]) ] cube_dimension_bands = [] for cube_dim in layer.get("cube:dimensions", {}).values(): if cube_dim["type"] == "bands": cube_dimension_bands = cube_dim["values"] if eo_bands: assert eo_bands == cube_dimension_bands def valid_bbox(bbox): return len(bbox) == 4 and bbox[0] <= bbox[2] and bbox[1] <= bbox[3] assert schema.Schema({ "spatial": { "bbox": [schema.And([schema.Or(int, float)], valid_bbox)] }, "temporal": { "interval": [[schema.Or(str, None)]] } }).validate(layer["extent"]) gps_metadata = GeopysparkCubeMetadata(layer) gps_metadata = gps_metadata.filter_bands([cube_dimension_bands[0]]) titles = gps_metadata.opensearch_link_titles if gps_metadata.band_dimension.band_aliases[0] is not None and len( gps_metadata.band_dimension.band_aliases[0]) > 0: assert titles[0] == gps_metadata.band_dimension.band_aliases[0][0] else: assert titles[0] == cube_dimension_bands[0]
def get(self, *args, default=None): return deep_get(self._orig_metadata, *args, default=default)
def _parse_dimensions( cls, spec: dict, complain: Callable[[str], None] = warnings.warn) -> List[Dimension]: """ Extract data cube dimension metadata from STAC-like description of a collection. Dimension metadata comes from different places in spec: - 'cube:dimensions' has dimension names (e.g. 'x', 'y', 't'), dimension extent info and band names for band dimensions - 'eo:bands' has more detailed band information like "common" name and wavelength info This helper tries to normalize/combine these sources. :param spec: STAC like collection metadata dict :param complain: handler for warnings :return list: list of `Dimension` objects """ # Dimension info is in `cube:dimensions` (or 0.4-style `properties/cube:dimensions`) cube_dimensions = ( deep_get(spec, 'cube:dimensions', default=None) or deep_get(spec, 'properties', 'cube:dimensions', default=None) or {}) if not cube_dimensions: complain("No cube:dimensions metadata") dimensions = [] for name, info in cube_dimensions.items(): dim_type = info.get("type") if dim_type == "spatial": dimensions.append( SpatialDimension( name=name, extent=info.get("extent"), crs=info.get("reference_system", SpatialDimension.DEFAULT_CRS))) elif dim_type == "temporal": dimensions.append( TemporalDimension(name=name, extent=info.get("extent"))) elif dim_type == "bands": bands = [Band(b, None, None) for b in info.get("values", [])] if not bands: complain("No band names in dimension {d!r}".format(d=name)) dimensions.append(BandDimension(name=name, bands=bands)) else: complain("Unknown dimension type {t!r}".format(t=dim_type)) dimensions.append(Dimension(name=name, type=dim_type)) # Detailed band information: `summaries/eo:bands` (and 0.4 style `properties/eo:bands`) eo_bands = (deep_get(spec, "summaries", "eo:bands", default=None) or deep_get(spec, "properties", "eo:bands", default=None)) if eo_bands: # center_wavelength is in micrometer according to spec bands_detailed = [ Band(b['name'], b.get('common_name'), b.get('center_wavelength')) for b in eo_bands ] # Update band dimension with more detailed info band_dimensions = [d for d in dimensions if d.type == "bands"] if len(band_dimensions) == 1: dim = band_dimensions[0] # Update band values from 'cube:dimensions' with more detailed 'eo:bands' info eo_band_names = [b.name for b in bands_detailed] cube_dimension_band_names = [b.name for b in dim.bands] if eo_band_names == cube_dimension_band_names: dim.bands = bands_detailed else: complain("Band name mismatch: {a} != {b}".format( a=cube_dimension_band_names, b=eo_band_names)) elif len(band_dimensions) == 0: if len(dimensions) == 0: complain( "Assuming name 'bands' for anonymous band dimension.") dimensions.append( BandDimension(name="bands", bands=bands_detailed)) else: complain( "No 'bands' dimension in 'cube:dimensions' while having 'eo:bands'" ) else: complain("Multiple dimensions of type 'bands'") return dimensions
def test_deep_get_mixed(): d = { "foo": (11, [222, 33], {"z": 42, -4: 44}), "bar": [{"a": [5, 8]}, {"b": ("ar", 6, 8)}] } assert deep_get(d, "foo", 0) == 11 assert deep_get(d, "foo", 1) == [222, 33] assert deep_get(d, "foo", 1, 0) == 222 assert deep_get(d, "foo", 1, 1) == 33 assert deep_get(d, "foo", 2, "z") == 42 assert deep_get(d, "foo", 2, -4) == 44 with pytest.raises(DeepKeyError, match=re.escape("-4 (from deep key ('foo', -4))")): deep_get(d, "foo", -4) with pytest.raises(DeepKeyError, match=re.escape("10 (from deep key ('foo', 10))")): deep_get(d, "foo", 10) assert deep_get(d, "bar", 0, "a", 1) == 8 assert deep_get(d, "bar", 1, "b", 0) == "ar" with pytest.raises(DeepKeyError, match=re.escape("2 (from deep key ('bar', 2, 22, 222))")): deep_get(d, "bar", 2, 22, 222)
def test_deep_get_dict(): d = { "foo": "bar", "dims": {"x": 5, "y": {"amount": 3, "unit": "cm"}}, "conversions": {4: 2, 6: {9: 3, 99: 7}}, } assert deep_get(d, "foo") == "bar" with pytest.raises(DeepKeyError, match=re.escape("1 (from deep key ('foo', 1))")): deep_get(d, "foo", 1) with pytest.raises(DeepKeyError, match=re.escape("'bar' (from deep key ('bar',))")): deep_get(d, "bar") assert deep_get(d, "dims") == {"x": 5, "y": {"amount": 3, "unit": "cm"}} assert deep_get(d, "dims", "x") == 5 with pytest.raises(DeepKeyError, match=re.escape("'unit' (from deep key ('dims', 'x', 'unit'))")): deep_get(d, "dims", "x", "unit") assert deep_get(d, "dims", "x", "unit", default="cm") == "cm" assert deep_get(d, "dims", "y", "amount") == 3 assert deep_get(d, "dims", "y", "unit") == "cm" assert deep_get(d, "conversions", 4) == 2 assert deep_get(d, "conversions", 6, 99) == 7
def _normalize_collection_metadata(metadata: dict, api_version: ComparableVersion, full=False) -> dict: """ Make sure the given collection metadata roughly complies to desirec version of OpenEO spec. """ # Make copy and remove all "private" fields metadata = copy.deepcopy(metadata) metadata = {k: v for (k, v) in metadata.items() if not k.startswith('_')} # Metadata should at least contain an id. if "id" not in metadata: _log.error("Collection metadata should have 'id' field: {m!r}".format(m=metadata)) raise KeyError("id") collection_id = metadata["id"] # Version dependent metadata conversions cube_dims_100 = deep_get(metadata, "cube:dimensions", default=None) cube_dims_040 = deep_get(metadata, "properties", "cube:dimensions", default=None) eo_bands_100 = deep_get(metadata, "summaries", "eo:bands", default=None) eo_bands_040 = deep_get(metadata, "properties", "eo:bands", default=None) if api_version.below("1.0.0"): if full and not cube_dims_040 and cube_dims_100: metadata.setdefault("properties", {}) metadata["properties"]["cube:dimensions"] = cube_dims_100 if full and not eo_bands_040 and eo_bands_100: metadata.setdefault("properties", {}) metadata["properties"]["eo:bands"] = eo_bands_100 else: if full and not cube_dims_100 and cube_dims_040: _log.warning("Collection metadata 'cube:dimensions' in API 0.4 style instead of 1.0 style") metadata["cube:dimensions"] = cube_dims_040 if full and not eo_bands_100 and eo_bands_040: _log.warning("Collection metadata 'eo:bands' in API 0.4 style instead of 1.0 style") metadata.setdefault("summaries", {}) metadata["summaries"]["eo:bands"] = eo_bands_040 # Make sure some required fields are set. metadata.setdefault("stac_version", "0.9.0" if api_version.at_least("1.0.0") else "0.6.2") metadata.setdefault("links", []) metadata.setdefault("description", collection_id) metadata.setdefault("license", "proprietary") # Warn about missing fields where simple defaults are not feasible. fallbacks = { "extent": {"spatial": [0, 0, 0, 0], "temporal": [None, None]}, } if full: if api_version.at_least("1.0.0"): fallbacks["cube:dimensions"] = {} fallbacks["summaries"] = {} else: fallbacks["properties"] = {} fallbacks["other_properties"] = {} for key, value in fallbacks.items(): if key not in metadata: _log.warning("Collection {c!r} metadata does not have field {k!r}.".format(c=collection_id, k=key)) metadata[key] = value if not full: basic_keys = [ "stac_version", "stac_extensions", "id", "title", "description", "keywords", "version", "deprecated", "license", "providers", "extent", "links" ] metadata = {k: v for k, v in metadata.items() if k in basic_keys} return metadata
def _merge_layers_with_common_name(metadata): common_names = set( map(lambda f: f["common_name"], filter(lambda m: m.get("common_name"), metadata.values()))) for common_name in common_names: common_name_metadatas = list( filter(lambda c: c.get("common_name") == common_name, metadata.values())) default_metadata = next( filter( lambda m: deep_get(m, "_vito", "data_source", "default_provider:backend", default=False), common_name_metadatas)) default_metadata = default_metadata or common_name_metadatas[0] new_metadata = deepcopy(default_metadata) default_metadata["_vito"]["data_source"].pop( "default_provider:backend", None) new_metadata["_vito"]["data_source"]["provider:backend"] = [ new_metadata["_vito"]["data_source"]["provider:backend"] ] for common_name_metadata in common_name_metadatas: if not common_name_metadata["id"] == new_metadata["id"]: new_metadata["_vito"]["data_source"]["provider:backend"] += [ common_name_metadata["_vito"]["data_source"] ["provider:backend"] ] new_metadata["providers"] += common_name_metadata["providers"] new_metadata["links"] += common_name_metadata["links"] for b in common_name_metadata["cube:dimensions"]["bands"][ "values"]: if b not in new_metadata["cube:dimensions"]["bands"][ "values"]: new_metadata["cube:dimensions"]["bands"]["values"] += [ b ] new_metadata["summaries"]["eo:bands"] += list( filter( lambda m: m["name"] == b, common_name_metadata["summaries"]["eo:bands"])) else: new_metadata_band = next( filter(lambda m: m["name"] == b, new_metadata["summaries"]["eo:bands"])) common_metadata_band = next( filter( lambda m: m["name"] == b, common_name_metadata["summaries"]["eo:bands"])) new_metadata_band["aliases"] = (new_metadata_band.get("aliases") or []) + \ (common_metadata_band.get("aliases") or []) new_metadata_spatial_extent = new_metadata["extent"][ "spatial"]["bbox"] common_name_metadata_spatial_extent = common_name_metadata[ "extent"]["spatial"]["bbox"] new_metadata["extent"]["spatial"]["bbox"] = [[ min(new_metadata_spatial_extent[0][0], common_name_metadata_spatial_extent[0][0]), min(new_metadata_spatial_extent[0][1], common_name_metadata_spatial_extent[0][1]), max(new_metadata_spatial_extent[0][2], common_name_metadata_spatial_extent[0][2]), max(new_metadata_spatial_extent[0][3], common_name_metadata_spatial_extent[0][3]) ]] new_metadata_temporal_extent = new_metadata["extent"][ "temporal"]["interval"] common_name_metadata_temporal_extent = common_name_metadata[ "extent"]["temporal"]["interval"] default_date = datetime(2017, 1, 1, tzinfo=tzutc()) new_start = min( dp.parse(new_metadata_temporal_extent[0][0], default=default_date), dp.parse(common_name_metadata_temporal_extent[0][0], default=default_date)).isoformat() if not new_metadata_temporal_extent[0][1]: new_end = common_name_metadata_temporal_extent[0][1] elif not common_name_metadata_temporal_extent[0][1]: new_end = new_metadata_temporal_extent[0][1] else: new_end = max( dp.parse(new_metadata_temporal_extent[0][1], default=default_date), dp.parse(common_name_metadata_temporal_extent[0][1], default=default_date)) if new_end: new_end = new_end.isoformat() new_metadata["extent"]["temporal"]["interval"] = [[ new_start, new_end ]] new_metadata["id"] = common_name metadata[common_name] = new_metadata return metadata
def get_layer_catalog(opensearch_enrich=False) -> GeoPySparkLayerCatalog: """ Get layer catalog (from JSON files) """ metadata: Dict[str, dict] = {} def read_catalog_file(catalog_file) -> Dict[str, dict]: return {coll["id"]: coll for coll in read_json(catalog_file)} catalog_files = ConfigParams().layer_catalog_metadata_files for path in catalog_files: logger.info(f"Reading layer catalog metadata from {path}") metadata = dict_merge_recursive(metadata, read_catalog_file(path), overwrite=True) if opensearch_enrich: opensearch_metadata = {} sh_collection_metadatas = None opensearch_instances = {} def opensearch_instance(endpoint: str) -> OpenSearch: endpoint = endpoint.lower() opensearch = opensearch_instances.get(os_endpoint) if opensearch is not None: return opensearch if "oscars" in endpoint or "terrascope" in endpoint or "vito.be" in endpoint: opensearch = OpenSearchOscars(endpoint=endpoint) elif "creodias" in endpoint: opensearch = OpenSearchCreodias(endpoint=endpoint) else: raise ValueError(endpoint) opensearch_instances[endpoint] = opensearch return opensearch for cid, collection_metadata in metadata.items(): data_source = deep_get(collection_metadata, "_vito", "data_source", default={}) os_cid = data_source.get("opensearch_collection_id") if os_cid: os_endpoint = data_source.get( "opensearch_endpoint") or ConfigParams( ).default_opensearch_endpoint logger.info( f"Updating {cid} metadata from {os_endpoint}:{os_cid}") try: opensearch_metadata[cid] = opensearch_instance( os_endpoint).get_metadata(collection_id=os_cid) except Exception: logger.warning(traceback.format_exc()) elif data_source.get("type") == "sentinel-hub": sh_cid = data_source.get("collection_id") if sh_cid is None: continue try: sh_stac_endpoint = "https://collections.eurodatacube.com/stac/index.json" if sh_collection_metadatas is None: sh_collections = requests.get(sh_stac_endpoint).json() sh_collection_metadatas = [ requests.get(c["link"]).json() for c in sh_collections ] sh_metadata = next( filter(lambda m: m["datasource_type"] == sh_cid, sh_collection_metadatas)) logger.info( f"Updating {cid} metadata from {sh_stac_endpoint}:{sh_metadata['id']}" ) opensearch_metadata[cid] = sh_metadata if not data_source.get("endpoint"): endpoint = opensearch_metadata[cid]["providers"][0][ "url"] endpoint = endpoint if endpoint.startswith( "http") else "https://{}".format(endpoint) data_source["endpoint"] = endpoint data_source["dataset_id"] = data_source.get( "dataset_id" ) or opensearch_metadata[cid]["datasource_type"] except StopIteration: logger.warning( f"No STAC data available for collection with id {sh_cid}" ) if opensearch_metadata: metadata = dict_merge_recursive(opensearch_metadata, metadata, overwrite=True) metadata = _merge_layers_with_common_name(metadata) return GeoPySparkLayerCatalog(all_metadata=list(metadata.values()))