Beispiel #1
0
def test_dataset_measurement_paths():
    format = 'GeoTiff'

    ds = mk_sample_dataset(
        [dict(name=n, path=n + '.tiff') for n in 'a b c'.split(' ')],
        uri='file:///tmp/datataset.yml',
        format=format)

    assert ds.local_uri == ds.uris[0]
    assert ds.uri_scheme == 'file'
    assert ds.format == format
    paths = measurement_paths(ds)

    for k, v in paths.items():
        assert v == 'file:///tmp/' + k + '.tiff'

    ds.uris = None
    assert ds.local_uri is None
    with pytest.raises(ValueError):
        measurement_paths(ds)
Beispiel #2
0
def fuse_ds(ds_1: Dataset,
            ds_2: Dataset,
            product: Optional[DatasetType] = None) -> Dataset:
    """
    This function fuses two datasets. It requires that:
      - the products are fusable
      - grids with the same name are identical
      - labels are in the format 'product_suffix' with identical suffixes
      - CRSs' are identical
      - datetimes are identical
      - $schemas are identical 
    """

    doc_1, doc_2 = ds_1.metadata_doc, ds_2.metadata_doc

    if product is None:
        product = fuse_products(ds_1.type, ds_2.type)

    fused_doc = dict()

    fused_doc["id"] = str(
        odc_uuid(product.name, "0.0.0", sources=[doc_1["id"], doc_2["id"]]))
    fused_doc["lineage"] = {"source_datasets": [doc_1["id"], doc_2["id"]]}

    # check that all grids with the same name are identical
    common_grids = set(doc_1["grids"].keys()).intersection(
        doc_2["grids"].keys())
    assert all(doc_1["grids"][g] == doc_2["grids"][g] for g in common_grids)

    # TODO: handle the case that grids have conflicts in a seperate function
    fused_doc["grids"] = {**doc_1["grids"], **doc_2["grids"]}

    label_suffix = doc_1["label"].replace(doc_1["product"]["name"], "")
    assert label_suffix == doc_2["label"].replace(doc_2["product"]["name"], "")
    fused_doc["label"] = f"{product.name}{label_suffix}"

    equal_keys = ["$schema", "crs"]
    for key in equal_keys:
        assert doc_1[key] == doc_2[key]
        fused_doc[key] = doc_1[key]

    fused_doc["properties"] = dict()
    assert doc_1["properties"]["datetime"] == doc_2["properties"][
        "datetime"]  # datetime is the only manditory property

    # copy over all identical properties
    for key, val in doc_1["properties"].items():
        if val == doc_2["properties"].get(key, None):
            fused_doc["properties"][key] = val

    fused_doc["measurements"] = {
        **doc_1["measurements"],
        **doc_2["measurements"]
    }
    for key, path in {
            **measurement_paths(ds_1),
            **measurement_paths(ds_2)
    }.items():
        fused_doc["measurements"][key]["path"] = path

    fused_ds = Dataset(product, prep_eo3(fused_doc), uris=[""])
    return fused_ds