def _read_wagl_metadata(p: DatasetAssembler, granule_group: h5py.Group): try: wagl_path, *ancil_paths = [ pth for pth in (_find_h5_paths(granule_group, "SCALAR")) if "METADATA" in pth ] except ValueError: raise ValueError("No nbar metadata found in granule") [wagl_doc] = loads_yaml(granule_group[wagl_path][()]) try: p.processed = get_path(wagl_doc, ("system_information", "time_processed")) except PathAccessError: raise ValueError( f"WAGL dataset contains no time processed. Path {wagl_path}") for i, path in enumerate(ancil_paths, start=2): wagl_doc.setdefault(f"wagl_{i}", {}).update( list(loads_yaml(granule_group[path][()]))[0]["ancillary"]) p.properties["dea:dataset_maturity"] = _determine_maturity( p.datetime, p.processed, wagl_doc) _take_software_versions(p, wagl_doc) p.extend_user_metadata("wagl", wagl_doc)
def test_dea_c3_naming_conventions(tmp_path: Path): """ A sample scene for Alchemist C3 processing that tests the naming conventions. """ p = DatasetAssembler(tmp_path, naming_conventions="dea_c3") p.platform = "landsat-7" p.datetime = datetime(1998, 7, 30) p.product_family = "wo" p.processed = "1998-07-30T12:23:23" p.maturity = "interim" p.producer = "ga.gov.au" p.region_code = "090081" # Try missing few fields and expect ValueError with pytest.raises( ValueError, match="Need more properties to fulfill naming conventions."): p.done() # Put back the missed ones p.dataset_version = "1.6.0" p.collection_number = "3" # Collection number returned as integer via the getter. assert p.collection_number == 3 # Success case dataset_id, metadata_path = p.done() metadata_path_offset = metadata_path.relative_to(tmp_path).as_posix() assert ( metadata_path_offset == "ga_ls_wo_3/1-6-0/090/081/1998/07/30/ga_ls_wo_3_090081_1998-07-30_interim.odc-metadata.yaml" )
def test_add_source_dataset(tmp_path: Path, inherit_geom): from eodatasets3 import serialise p = DatasetAssembler(tmp_path, naming_conventions="dea_c3") source_dataset = serialise.from_path( Path(__file__).parent / "data/LC08_L1TP_089080_20160302_20170328_01_T1.yaml") p.add_source_dataset(source_dataset, auto_inherit_properties=True, inherit_geometry=inherit_geom) p.maturity = "interim" p.collection_number = "3" p.dataset_version = "1.6.0" p.producer = "ga.gov.au" p.processed = "1998-07-30T12:23:23" p.product_family = "wofs" p.write_measurement( "water", Path(__file__).parent / "data/wofs/ga_ls_wofs_3_099081_2020-07-26_interim_water_clipped.tif", ) id, path = p.done() output = serialise.from_path(path) if inherit_geom: # POLYGON((609615 - 3077085, 378285 - 3077085, 378285 - 3310515, 609615 - 3310515, 609615 - 3077085)) assert output.geometry == source_dataset.geometry else: # POLYGON((684285 - 3439275, 684285 - 3444495, 689925 - 3444495, 689925 - 3439275, 684285 - 3439275)) # Geometry is not set from the source dataset, but instead from the added wofs measurement assert output.geometry != source_dataset.geometry
def _apply_wagl_metadata(p: DatasetAssembler, wagl_doc: Dict): source = wagl_doc["source_datasets"] p.datetime = source["acquisition_datetime"] p.platform = source["platform_id"] p.instrument = source["sensor_id"] try: p.processed = get_path(wagl_doc, ("system_information", "time_processed")) except PathAccessError: raise RuntimeError("WAGL dataset contains no processed time.") _take_software_versions(p, wagl_doc) p.extend_user_metadata("wagl", wagl_doc)
def test_s2_naming_conventions(tmp_path: Path): """A minimal dataset with sentinel platform/instrument""" p = DatasetAssembler(tmp_path, naming_conventions="dea_s2") p.platform = "sentinel-2a" p.instrument = "msi" p.datetime = datetime(2018, 11, 4) p.product_family = "blueberries" p.processed = "2018-11-05T12:23:23" p.producer = "ga.gov.au" p.dataset_version = "1.0.0" p.region_code = "Oz" p.properties["odc:file_format"] = "GeoTIFF" p.properties[ "sentinel:sentinel_tile_id"] = "S2A_OPER_MSI_L1C_TL_SGS__20170822T015626_A011310_T54KYU_N02.05" p.note_source_datasets( "telemetry", # Accepts multiple, and they can be strings or UUIDs: "ca705033-0fc4-4f38-a47e-f425dfb4d0c7", uuid.UUID("3781e90f-b677-40af-9439-b40f6e4dfadd"), ) # The property normaliser should have extracted inner fields assert p.properties["sentinel:datatake_start_datetime"] == datetime( 2017, 8, 22, 1, 56, 26, tzinfo=timezone.utc) dataset_id, metadata_path = p.done() # The s2 naming conventions have an extra subfolder of the datatake start time. metadata_path_offset = metadata_path.relative_to(tmp_path).as_posix() assert metadata_path_offset == ( "ga_s2am_blueberries_1/Oz/2018/11/04/20170822T015626/" "ga_s2am_blueberries_1-0-0_Oz_2018-11-04.odc-metadata.yaml") assert_expected_eo3_path( { "$schema": "https://schemas.opendatacube.org/dataset", "accessories": {}, "id": dataset_id, "label": "ga_s2am_blueberries_1-0-0_Oz_2018-11-04", "product": { "href": "https://collections.dea.ga.gov.au/product/ga_s2am_blueberries_1", "name": "ga_s2am_blueberries_1", }, "properties": { "datetime": datetime(2018, 11, 4, 0, 0), "eo:instrument": "msi", "eo:platform": "sentinel-2a", "odc:dataset_version": "1.0.0", "odc:file_format": "GeoTIFF", "odc:processing_datetime": datetime(2018, 11, 5, 12, 23, 23), "odc:producer": "ga.gov.au", "odc:product_family": "blueberries", "odc:region_code": "Oz", "sentinel:datatake_start_datetime": datetime(2017, 8, 22, 1, 56, 26), "sentinel:sentinel_tile_id": "S2A_OPER_MSI_L1C_TL_SGS__20170822T015626_A011310_T54KYU_N02.05", }, "lineage": { "telemetry": [ "ca705033-0fc4-4f38-a47e-f425dfb4d0c7", "3781e90f-b677-40af-9439-b40f6e4dfadd", ] }, }, expected_path=metadata_path, )
def test_s2_naming_conventions(tmp_path: Path): """A minimal dataset with sentinel platform/instrument""" p = DatasetAssembler(tmp_path, naming_conventions="dea_s2") p.platform = "sentinel-2a" p.instrument = "msi" p.datetime = datetime(2018, 11, 4) p.product_family = "blueberries" p.processed = "2018-11-05T12:23:23" p.producer = "ga.gov.au" p.dataset_version = "1.0.0" p.region_code = "Oz" p.properties["odc:file_format"] = "GeoTIFF" p.properties[ "sentinel:sentinel_tile_id"] = "S2A_OPER_MSI_L1C_TL_SGS__20170822T015626_A011310_T54KYU_N02.05" # The property normaliser should have extracted inner fields assert p.properties["sentinel:datatake_start_datetime"] == datetime( 2017, 8, 22, 1, 56, 26, tzinfo=timezone.utc) dataset_id, metadata_path = p.done() # The s2 naming conventions have an extra subfolder of the datatake start time. metadata_path_offset = metadata_path.relative_to(tmp_path).as_posix() assert metadata_path_offset == ( "ga_s2am_blueberries_1/Oz/2018/11/04/20170822T015626/" "ga_s2am_blueberries_1-0-0_Oz_2018-11-04.odc-metadata.yaml") assert_same_as_file( { "$schema": "https://schemas.opendatacube.org/dataset", "accessories": { "checksum:sha1": { "path": "ga_s2am_blueberries_1-0-0_Oz_2018-11-04.sha1" }, "metadata:processor": { "path": "ga_s2am_blueberries_1-0-0_Oz_2018-11-04.proc-info.yaml" }, }, "id": dataset_id, "label": "ga_s2am_blueberries_1-0-0_Oz_2018-11-04", "lineage": {}, "product": { "href": "https://collections.dea.ga.gov.au/product/ga_s2am_blueberries_1", "name": "ga_s2am_blueberries_1", }, "properties": { "datetime": datetime(2018, 11, 4, 0, 0), "eo:instrument": "msi", "eo:platform": "sentinel-2a", "odc:dataset_version": "1.0.0", "odc:file_format": "GeoTIFF", "odc:processing_datetime": datetime(2018, 11, 5, 12, 23, 23), "odc:producer": "ga.gov.au", "odc:product_family": "blueberries", "odc:region_code": "Oz", "sentinel:datatake_start_datetime": datetime(2017, 8, 22, 1, 56, 26), "sentinel:sentinel_tile_id": "S2A_OPER_MSI_L1C_TL_SGS__20170822T015626_A011310_T54KYU_N02.05", }, }, generated_file=metadata_path, )