def test_merge_recursive_preserve_input(): a = {1: {2: 3}} b = {1: {4: 5}} result = dict_merge_recursive(a, b) assert result == {1: {2: 3, 4: 5}} assert a == {1: {2: 3}} assert b == {1: {4: 5}}
def get_layer_catalog( service_registry: AbstractServiceRegistry = None ) -> GeoPySparkLayerCatalog: """ Get layer catalog (from JSON files) """ catalog_files = ConfigParams().layer_catalog_metadata_files logger.info( "Reading layer catalog metadata from {f!r}".format(f=catalog_files[0])) metadata = read_json(catalog_files[0]) if len(catalog_files) > 1: # Merge metadata recursively metadata = {l["id"]: l for l in metadata} for path in catalog_files[1:]: logger.info( "Updating layer catalog metadata from {f!r}".format(f=path)) updates = {l["id"]: l for l in read_json(path)} metadata = dict_merge_recursive(metadata, updates, overwrite=True) metadata = list(metadata.values()) return GeoPySparkLayerCatalog(all_metadata=metadata, service_registry=service_registry or InMemoryServiceRegistry())
def test_merge_recursive_default(a, b, expected): assert dict_merge_recursive(a, b) == expected
def test_merge_recursive_overwrite_conflict(a, b, expected): with pytest.raises(ValueError): result = dict_merge_recursive(a, b) result = dict_merge_recursive(a, b, overwrite=True) assert result == expected
def test_merge_recursive_overwrite(a, b, expected): result = dict_merge_recursive(a, b, overwrite=True) assert result == expected
def get_layer_catalog(opensearch_enrich=False) -> GeoPySparkLayerCatalog: """ Get layer catalog (from JSON files) """ metadata: Dict[str, dict] = {} def read_catalog_file(catalog_file) -> Dict[str, dict]: return {coll["id"]: coll for coll in read_json(catalog_file)} catalog_files = ConfigParams().layer_catalog_metadata_files for path in catalog_files: logger.info(f"Reading layer catalog metadata from {path}") metadata = dict_merge_recursive(metadata, read_catalog_file(path), overwrite=True) if opensearch_enrich: opensearch_metadata = {} sh_collection_metadatas = None opensearch_instances = {} def opensearch_instance(endpoint: str) -> OpenSearch: endpoint = endpoint.lower() opensearch = opensearch_instances.get(os_endpoint) if opensearch is not None: return opensearch if "oscars" in endpoint or "terrascope" in endpoint or "vito.be" in endpoint: opensearch = OpenSearchOscars(endpoint=endpoint) elif "creodias" in endpoint: opensearch = OpenSearchCreodias(endpoint=endpoint) else: raise ValueError(endpoint) opensearch_instances[endpoint] = opensearch return opensearch for cid, collection_metadata in metadata.items(): data_source = deep_get(collection_metadata, "_vito", "data_source", default={}) os_cid = data_source.get("opensearch_collection_id") if os_cid: os_endpoint = data_source.get( "opensearch_endpoint") or ConfigParams( ).default_opensearch_endpoint logger.info( f"Updating {cid} metadata from {os_endpoint}:{os_cid}") try: opensearch_metadata[cid] = opensearch_instance( os_endpoint).get_metadata(collection_id=os_cid) except Exception: logger.warning(traceback.format_exc()) elif data_source.get("type") == "sentinel-hub": sh_cid = data_source.get("collection_id") if sh_cid is None: continue try: sh_stac_endpoint = "https://collections.eurodatacube.com/stac/index.json" if sh_collection_metadatas is None: sh_collections = requests.get(sh_stac_endpoint).json() sh_collection_metadatas = [ requests.get(c["link"]).json() for c in sh_collections ] sh_metadata = next( filter(lambda m: m["datasource_type"] == sh_cid, sh_collection_metadatas)) logger.info( f"Updating {cid} metadata from {sh_stac_endpoint}:{sh_metadata['id']}" ) opensearch_metadata[cid] = sh_metadata if not data_source.get("endpoint"): endpoint = opensearch_metadata[cid]["providers"][0][ "url"] endpoint = endpoint if endpoint.startswith( "http") else "https://{}".format(endpoint) data_source["endpoint"] = endpoint data_source["dataset_id"] = data_source.get( "dataset_id" ) or opensearch_metadata[cid]["datasource_type"] except StopIteration: logger.warning( f"No STAC data available for collection with id {sh_cid}" ) if opensearch_metadata: metadata = dict_merge_recursive(opensearch_metadata, metadata, overwrite=True) metadata = _merge_layers_with_common_name(metadata) return GeoPySparkLayerCatalog(all_metadata=list(metadata.values()))