def upload_metadata(granule_id): """ Creates and uploads metadata in stac and eo3 formats. :param granule_id: the id of the granule in format 'date/tile_id' :return: serialized stac metadata """ local_path = Path(NCI_DIR) / granule_id granule_s3_path = get_granule_s3_path(granule_id) s3_path = f"s3://{S3_BUCKET}/{granule_s3_path}/" s3_eo3_path = f"{s3_path}eo3-ARD-METADATA.yaml" s3_stac_path = f"{s3_path}stac-ARD-METADATA.json" eo3 = create_eo3(local_path, granule_id) stac = to_stac_item( eo3, stac_item_destination_url=s3_stac_path, odc_dataset_metadata_url=s3_eo3_path, dataset_location=s3_path, ) stac_dump = json.dumps(stac, default=json_fallback, indent=4) s3_dump( yaml.safe_dump(serialise.to_doc(eo3), default_flow_style=False), s3_eo3_path, ACL="bucket-owner-full-control", ContentType="text/vnd.yaml" ) return stac_dump, s3_stac_path
def process_dataset(s3_obj): s3_eo3_path = s3_obj.url s3_stac_path = s3_eo3_path.replace("eo3", "stac") s3_stac_path = s3_stac_path.replace("yaml", "json") s3_path = s3_eo3_path.replace("eo3-ARD-METADATA.yaml", "") granule = os.path.join(*s3_eo3_path.split('/')[5:-1]) nci_path = os.path.join(NCI_DIR, *s3_eo3_path.split('/')[5:-1], "ARD-METADATA.yaml") if "S2A_OPER_MSI_ARD" in granule: platform = "SENTINEL_2A" elif "S2B_OPER_MSI_ARD" in granule: platform = "SENTINEL_2B" else: raise ValueError( f"Expected granule id to contain either 'S2A_OPER_MSI_ARD' or 'S2B_OPER_MSI_ARD', found '{granule}'" ) with open(nci_path) as fin: eo_metadata = yaml.safe_load(fin) eo3_metadata = yaml.safe_load(s3_obj.data) eo3_metadata["properties"]["odc:region_code"] = eo_metadata["provider"]["reference_code"] eo3_metadata["properties"]["gqa:cep90"] = eo_metadata["gqa"]["residual"]["cep90"] eo3_metadata["properties"]["gqa:error_message"] = eo_metadata["gqa"]["error_message"] eo3_metadata["properties"]["gqa:final_gcp_count"] = eo_metadata["gqa"]["final_gcp_count"] eo3_metadata["properties"]["gqa:ref_source"] = eo_metadata["gqa"]["ref_source"] eo3_metadata["properties"]["sentinel:datatake_start_datetime"] = granule.split("_")[-4] eo3_metadata["properties"]["eo:platform"] = platform eo3_metadata["properties"]["eo:instrument"] = "MSI" for key in ["abs_iterative_mean", "abs", "iterative_mean", "iterative_stddev", "mean", "stddev"]: eo3_metadata["properties"][f"gqa:{key}_xy"] = eo_metadata["gqa"]["residual"][key]["xy"] eo3 = serialise.from_doc(eo3_metadata) stac = to_stac_item( eo3, stac_item_destination_url=s3_stac_path, odc_dataset_metadata_url=s3_eo3_path, dataset_location=s3_path, ) stac_dump = json.dumps(stac, default=json_fallback, indent=4) eo3_dump = yaml.safe_dump(eo3_metadata, default_flow_style=False) s3_dump( eo3_dump, s3_eo3_path, ACL="bucket-owner-full-control", ContentType="text/vnd.yaml", ) s3_dump( stac_dump, s3_stac_path, ACL="bucket-owner-full-control", ContentType="application/json" )
def dc_to_stac( dataset: DatasetDoc, input_metadata: Path, output_path: Path, stac_base_url: str, explorer_base_url: str, do_validate: bool, ) -> dict: """ Backwards compatibility wrapper as some other projects started using this method of the script. It's better to call eodatasets3.stac.to_stac_item() directly. """ stac_destination_url = urljoin(stac_base_url, output_path.name) # Following previous behaviour -- fallback to the stac destination path. dataset_location = (dataset.locations[0] if dataset.locations else stac_destination_url) doc = eo3stac.to_stac_item( dataset, stac_item_destination_url=stac_destination_url, # This is potentially surprising. # We just assume that they're uploading the odc document to the # same public folder (and with the same name.) # But we need to keep it for backwards compatibility. odc_dataset_metadata_url=urljoin(stac_base_url, input_metadata.name), explorer_base_url=explorer_base_url, dataset_location=dataset_location, ) if do_validate: eo3stac.validate_item(doc) return doc
def as_stac_item(dataset: DatasetItem): """ Get a dict corresponding to a stac item """ ds: Dataset = dataset.odc_dataset if ds is not None and is_doc_eo3(ds.metadata_doc): dataset_doc = serialise.from_doc(ds.metadata_doc, skip_validation=True) dataset_doc.locations = ds.uris # Geometry is optional in eo3, and needs to be calculated from grids if missing. # We can use ODC's own calculation that happens on index. if dataset_doc.geometry is None: fallback_extent = ds.extent if fallback_extent is not None: dataset_doc.geometry = fallback_extent.geom dataset_doc.crs = str(ds.crs) if ds.sources: dataset_doc.lineage = { classifier: [d.id] for classifier, d in ds.sources } # Does ODC still put legacy lineage into indexed documents? elif ("source_datasets" in dataset_doc.lineage) and len( dataset_doc.lineage) == 1: # From old to new lineage type. dataset_doc.lineage = { classifier: [dataset["id"]] for classifier, dataset in dataset_doc.lineage["source_datasets"] } else: # eo1 to eo3 dataset_doc = DatasetDoc( id=dataset.dataset_id, # Filled-in below. label=None, product=ProductDoc(dataset.product_name), locations=ds.uris if ds is not None else None, crs=str(dataset.geometry.crs), geometry=dataset.geometry.geom, grids=None, # TODO: Convert these from stac to eo3 properties=Eo3Dict({ "datetime": utc(dataset.center_time), **(dict(_build_properties(ds.metadata)) if ds else {}), "odc:processing_datetime": utc(dataset.creation_time), }), measurements={ name: _band_to_measurement( b, dataset_location=ds.uris[0] if ds is not None and ds.uris else None, ) for name, b in ds.measurements.items() } if ds is not None else {}, accessories=_accessories_from_eo1(ds.metadata_doc) if ds is not None else {}, # TODO: Fill in lineage. The datacube API only gives us full datasets, which is # expensive. We only need a list of IDs here. lineage={}, ) if dataset_doc.label is None and ds is not None: dataset_doc.label = _utils.dataset_label(ds) item_doc = eo3stac.to_stac_item( dataset=dataset_doc, stac_item_destination_url=url_for( ".item", collection=dataset.product_name, dataset_id=dataset.dataset_id, ), odc_dataset_metadata_url=url_for("dataset.raw_doc", id_=dataset.dataset_id), explorer_base_url=url_for("default_redirect"), ) # Add the region code that Explorer inferred. # (Explorer's region codes predate ODC's and support # many more products. item_doc["properties"]["cubedash:region_code"] = dataset.region_code return item_doc