def upload_metadata(granule_id):
    """
    Creates and uploads metadata in stac and eo3 formats.
    :param granule_id: the id of the granule in format 'date/tile_id'
    :return: serialized stac metadata
    """

    local_path = Path(NCI_DIR) / granule_id
    granule_s3_path = get_granule_s3_path(granule_id)

    s3_path = f"s3://{S3_BUCKET}/{granule_s3_path}/"
    s3_eo3_path = f"{s3_path}eo3-ARD-METADATA.yaml"
    s3_stac_path = f"{s3_path}stac-ARD-METADATA.json"

    eo3 = create_eo3(local_path, granule_id)
    stac = to_stac_item(
        eo3,
        stac_item_destination_url=s3_stac_path,
        odc_dataset_metadata_url=s3_eo3_path,
        dataset_location=s3_path,
    )
    stac_dump = json.dumps(stac, default=json_fallback, indent=4)

    s3_dump(
        yaml.safe_dump(serialise.to_doc(eo3), default_flow_style=False), 
        s3_eo3_path, 
        ACL="bucket-owner-full-control",
        ContentType="text/vnd.yaml"
    )

    return stac_dump, s3_stac_path
Esempio n. 2
0
def process_dataset(s3_obj):
    
    s3_eo3_path = s3_obj.url
    s3_stac_path = s3_eo3_path.replace("eo3", "stac")
    s3_stac_path = s3_stac_path.replace("yaml", "json")
    s3_path = s3_eo3_path.replace("eo3-ARD-METADATA.yaml", "")
    granule = os.path.join(*s3_eo3_path.split('/')[5:-1])
    nci_path = os.path.join(NCI_DIR, *s3_eo3_path.split('/')[5:-1], "ARD-METADATA.yaml")
    
    if "S2A_OPER_MSI_ARD" in granule:
        platform = "SENTINEL_2A"
    elif "S2B_OPER_MSI_ARD" in granule:
        platform = "SENTINEL_2B"
    else:
        raise ValueError(
            f"Expected granule id to contain either 'S2A_OPER_MSI_ARD' or 'S2B_OPER_MSI_ARD', found '{granule}'"
        )
    
    with open(nci_path) as fin:
        eo_metadata = yaml.safe_load(fin)
    
    eo3_metadata = yaml.safe_load(s3_obj.data)
    
    eo3_metadata["properties"]["odc:region_code"] = eo_metadata["provider"]["reference_code"]
    eo3_metadata["properties"]["gqa:cep90"] = eo_metadata["gqa"]["residual"]["cep90"]
    eo3_metadata["properties"]["gqa:error_message"] = eo_metadata["gqa"]["error_message"]
    eo3_metadata["properties"]["gqa:final_gcp_count"] = eo_metadata["gqa"]["final_gcp_count"]
    eo3_metadata["properties"]["gqa:ref_source"] = eo_metadata["gqa"]["ref_source"]
    eo3_metadata["properties"]["sentinel:datatake_start_datetime"] = granule.split("_")[-4]
    eo3_metadata["properties"]["eo:platform"] = platform
    eo3_metadata["properties"]["eo:instrument"] = "MSI"
    
    for key in ["abs_iterative_mean", "abs", "iterative_mean", "iterative_stddev", "mean", "stddev"]:
        eo3_metadata["properties"][f"gqa:{key}_xy"] = eo_metadata["gqa"]["residual"][key]["xy"]

    eo3 = serialise.from_doc(eo3_metadata)
    stac = to_stac_item(
        eo3,
        stac_item_destination_url=s3_stac_path,
        odc_dataset_metadata_url=s3_eo3_path,
        dataset_location=s3_path,
    )
    stac_dump = json.dumps(stac, default=json_fallback, indent=4)
    eo3_dump = yaml.safe_dump(eo3_metadata, default_flow_style=False)

    s3_dump(
        eo3_dump, 
        s3_eo3_path, 
        ACL="bucket-owner-full-control",
        ContentType="text/vnd.yaml",
    )

    s3_dump(
        stac_dump, 
        s3_stac_path, 
        ACL="bucket-owner-full-control",
        ContentType="application/json"
    )
Esempio n. 3
0
def dc_to_stac(
    dataset: DatasetDoc,
    input_metadata: Path,
    output_path: Path,
    stac_base_url: str,
    explorer_base_url: str,
    do_validate: bool,
) -> dict:
    """
    Backwards compatibility wrapper as some other projects started using this
    method of the script.

    It's better to call eodatasets3.stac.to_stac_item() directly.
    """

    stac_destination_url = urljoin(stac_base_url, output_path.name)

    # Following previous behaviour -- fallback to the stac destination path.
    dataset_location = (dataset.locations[0]
                        if dataset.locations else stac_destination_url)

    doc = eo3stac.to_stac_item(
        dataset,
        stac_item_destination_url=stac_destination_url,
        # This is potentially surprising.
        #     We just assume that they're uploading the odc document to the
        #     same public folder (and with the same name.)
        #     But we need to keep it for backwards compatibility.
        odc_dataset_metadata_url=urljoin(stac_base_url, input_metadata.name),
        explorer_base_url=explorer_base_url,
        dataset_location=dataset_location,
    )
    if do_validate:
        eo3stac.validate_item(doc)

    return doc
Esempio n. 4
0
def as_stac_item(dataset: DatasetItem):
    """
    Get a dict corresponding to a stac item
    """
    ds: Dataset = dataset.odc_dataset

    if ds is not None and is_doc_eo3(ds.metadata_doc):
        dataset_doc = serialise.from_doc(ds.metadata_doc, skip_validation=True)
        dataset_doc.locations = ds.uris

        # Geometry is optional in eo3, and needs to be calculated from grids if missing.
        # We can use ODC's own calculation that happens on index.
        if dataset_doc.geometry is None:
            fallback_extent = ds.extent
            if fallback_extent is not None:
                dataset_doc.geometry = fallback_extent.geom
                dataset_doc.crs = str(ds.crs)

        if ds.sources:
            dataset_doc.lineage = {
                classifier: [d.id]
                for classifier, d in ds.sources
            }
        # Does ODC still put legacy lineage into indexed documents?
        elif ("source_datasets" in dataset_doc.lineage) and len(
                dataset_doc.lineage) == 1:
            # From old to new lineage type.
            dataset_doc.lineage = {
                classifier: [dataset["id"]]
                for classifier, dataset in
                dataset_doc.lineage["source_datasets"]
            }

    else:
        # eo1 to eo3

        dataset_doc = DatasetDoc(
            id=dataset.dataset_id,
            # Filled-in below.
            label=None,
            product=ProductDoc(dataset.product_name),
            locations=ds.uris if ds is not None else None,
            crs=str(dataset.geometry.crs),
            geometry=dataset.geometry.geom,
            grids=None,
            # TODO: Convert these from stac to eo3
            properties=Eo3Dict({
                "datetime":
                utc(dataset.center_time),
                **(dict(_build_properties(ds.metadata)) if ds else {}),
                "odc:processing_datetime":
                utc(dataset.creation_time),
            }),
            measurements={
                name: _band_to_measurement(
                    b,
                    dataset_location=ds.uris[0]
                    if ds is not None and ds.uris else None,
                )
                for name, b in ds.measurements.items()
            } if ds is not None else {},
            accessories=_accessories_from_eo1(ds.metadata_doc)
            if ds is not None else {},
            # TODO: Fill in lineage. The datacube API only gives us full datasets, which is
            #       expensive. We only need a list of IDs here.
            lineage={},
        )

    if dataset_doc.label is None and ds is not None:
        dataset_doc.label = _utils.dataset_label(ds)

    item_doc = eo3stac.to_stac_item(
        dataset=dataset_doc,
        stac_item_destination_url=url_for(
            ".item",
            collection=dataset.product_name,
            dataset_id=dataset.dataset_id,
        ),
        odc_dataset_metadata_url=url_for("dataset.raw_doc",
                                         id_=dataset.dataset_id),
        explorer_base_url=url_for("default_redirect"),
    )
    # Add the region code that Explorer inferred.
    # (Explorer's region codes predate ODC's and support
    #  many more products.
    item_doc["properties"]["cubedash:region_code"] = dataset.region_code

    return item_doc