def test_merge_recursive_preserve_input():
    a = {1: {2: 3}}
    b = {1: {4: 5}}
    result = dict_merge_recursive(a, b)
    assert result == {1: {2: 3, 4: 5}}
    assert a == {1: {2: 3}}
    assert b == {1: {4: 5}}
Beispiel #2
0
def get_layer_catalog(
    service_registry: AbstractServiceRegistry = None
) -> GeoPySparkLayerCatalog:
    """
    Get layer catalog (from JSON files)
    """
    catalog_files = ConfigParams().layer_catalog_metadata_files
    logger.info(
        "Reading layer catalog metadata from {f!r}".format(f=catalog_files[0]))
    metadata = read_json(catalog_files[0])
    if len(catalog_files) > 1:
        # Merge metadata recursively
        metadata = {l["id"]: l for l in metadata}
        for path in catalog_files[1:]:
            logger.info(
                "Updating layer catalog metadata from {f!r}".format(f=path))
            updates = {l["id"]: l for l in read_json(path)}
            metadata = dict_merge_recursive(metadata, updates, overwrite=True)
        metadata = list(metadata.values())

    return GeoPySparkLayerCatalog(all_metadata=metadata,
                                  service_registry=service_registry
                                  or InMemoryServiceRegistry())
def test_merge_recursive_default(a, b, expected):
    assert dict_merge_recursive(a, b) == expected
def test_merge_recursive_overwrite_conflict(a, b, expected):
    with pytest.raises(ValueError):
        result = dict_merge_recursive(a, b)
    result = dict_merge_recursive(a, b, overwrite=True)
    assert result == expected
def test_merge_recursive_overwrite(a, b, expected):
    result = dict_merge_recursive(a, b, overwrite=True)
    assert result == expected
Beispiel #6
0
def get_layer_catalog(opensearch_enrich=False) -> GeoPySparkLayerCatalog:
    """
    Get layer catalog (from JSON files)
    """
    metadata: Dict[str, dict] = {}

    def read_catalog_file(catalog_file) -> Dict[str, dict]:
        return {coll["id"]: coll for coll in read_json(catalog_file)}

    catalog_files = ConfigParams().layer_catalog_metadata_files
    for path in catalog_files:
        logger.info(f"Reading layer catalog metadata from {path}")
        metadata = dict_merge_recursive(metadata,
                                        read_catalog_file(path),
                                        overwrite=True)

    if opensearch_enrich:
        opensearch_metadata = {}
        sh_collection_metadatas = None
        opensearch_instances = {}

        def opensearch_instance(endpoint: str) -> OpenSearch:
            endpoint = endpoint.lower()
            opensearch = opensearch_instances.get(os_endpoint)

            if opensearch is not None:
                return opensearch

            if "oscars" in endpoint or "terrascope" in endpoint or "vito.be" in endpoint:
                opensearch = OpenSearchOscars(endpoint=endpoint)
            elif "creodias" in endpoint:
                opensearch = OpenSearchCreodias(endpoint=endpoint)
            else:
                raise ValueError(endpoint)

            opensearch_instances[endpoint] = opensearch
            return opensearch

        for cid, collection_metadata in metadata.items():
            data_source = deep_get(collection_metadata,
                                   "_vito",
                                   "data_source",
                                   default={})
            os_cid = data_source.get("opensearch_collection_id")
            if os_cid:
                os_endpoint = data_source.get(
                    "opensearch_endpoint") or ConfigParams(
                    ).default_opensearch_endpoint
                logger.info(
                    f"Updating {cid} metadata from {os_endpoint}:{os_cid}")
                try:
                    opensearch_metadata[cid] = opensearch_instance(
                        os_endpoint).get_metadata(collection_id=os_cid)
                except Exception:
                    logger.warning(traceback.format_exc())
            elif data_source.get("type") == "sentinel-hub":
                sh_cid = data_source.get("collection_id")

                if sh_cid is None:
                    continue

                try:
                    sh_stac_endpoint = "https://collections.eurodatacube.com/stac/index.json"

                    if sh_collection_metadatas is None:
                        sh_collections = requests.get(sh_stac_endpoint).json()
                        sh_collection_metadatas = [
                            requests.get(c["link"]).json()
                            for c in sh_collections
                        ]

                    sh_metadata = next(
                        filter(lambda m: m["datasource_type"] == sh_cid,
                               sh_collection_metadatas))
                    logger.info(
                        f"Updating {cid} metadata from {sh_stac_endpoint}:{sh_metadata['id']}"
                    )
                    opensearch_metadata[cid] = sh_metadata
                    if not data_source.get("endpoint"):
                        endpoint = opensearch_metadata[cid]["providers"][0][
                            "url"]
                        endpoint = endpoint if endpoint.startswith(
                            "http") else "https://{}".format(endpoint)
                        data_source["endpoint"] = endpoint
                    data_source["dataset_id"] = data_source.get(
                        "dataset_id"
                    ) or opensearch_metadata[cid]["datasource_type"]
                except StopIteration:
                    logger.warning(
                        f"No STAC data available for collection with id {sh_cid}"
                    )

        if opensearch_metadata:
            metadata = dict_merge_recursive(opensearch_metadata,
                                            metadata,
                                            overwrite=True)

    metadata = _merge_layers_with_common_name(metadata)

    return GeoPySparkLayerCatalog(all_metadata=list(metadata.values()))