def test_package(): output_path = temp_dir() run_packaging_cli([ hardlink_arg(output_path, source_dataset), 'pqa', '--parent', str(parent_dataset), str(source_dataset), str(output_path) ]) output_dataset = output_path.joinpath('LS8_OLITIRS_PQ_P55_GAPQ01-032_090_081_20140726') assert_file_structure(output_path, { 'LS8_OLITIRS_PQ_P55_GAPQ01-032_090_081_20140726': { 'browse.jpg': '', 'browse.fr.jpg': '', 'product': { 'LS8_OLITIRS_PQ_P55_GAPQ01-032_090_081_20140726.tif': '', }, 'ga-metadata.yaml': '', 'package.sha1': '' } }) # Load metadata file and compare it to expected. output_checksum_path = output_dataset.joinpath('ga-metadata.yaml') assert output_checksum_path.exists() md = yaml.load(output_checksum_path.open('r')) # ID is different every time: check not none, and clear it. assert md['id'] is not None md['id'] = None # Check metadata is as expected. EXPECTED_METADATA['size_bytes'] = directory_size(output_dataset / 'product') add_default_software_versions(EXPECTED_METADATA) assert_same( md, EXPECTED_METADATA ) # TODO: Assert correct checksums? They shouldn't change in theory. But they may with gdal versions etc. # Check all files are listed in checksum file. output_checksum_path = output_dataset.joinpath('package.sha1') assert output_checksum_path.exists() checksummed_filenames = load_checksum_filenames(output_checksum_path) assert checksummed_filenames == [ 'browse.fr.jpg', 'browse.jpg', 'ga-metadata.yaml', 'product/LS8_OLITIRS_PQ_P55_GAPQ01-032_090_081_20140726.tif', ]
def test_minimal_generated_naming_package(tmp_path: Path, l1_ls8_folder: Path): """ What's the minimum number of fields we can set and still generate file/product names to produce a package? """ out = tmp_path / "out" out.mkdir() [blue_geotiff_path] = l1_ls8_folder.rglob("L*_B2.TIF") with DatasetAssembler(out) as p: p.datetime = datetime(2019, 7, 4, 13, 7, 5) p.product_family = "quaternarius" p.processed_now() p.write_measurement("blue", blue_geotiff_path) # A friendly __str__ for notebook/terminal users: assert str(p) == dedent( f""" Assembling quaternarius (unfinished) - 1 measurements: blue - 4 properties: datetime, odc:file_format, odc:processing_datetime, odc:prod... Writing to location: {out}/quaternarius/2019/07/04/quaternarius_2019-07-04.odc-metadata.yaml """ ) # p.done() will validate the dataset and write it to the destination atomically. dataset_id, metadata_path = p.done() assert dataset_id is not None assert_file_structure( out, { "quaternarius": { "2019": { "07": { "04": { # Set a dataset version to get rid of 'beta' label. "quaternarius_2019-07-04.odc-metadata.yaml": "", "quaternarius_2019-07-04.proc-info.yaml": "", "quaternarius_2019-07-04_blue.tif": "", "quaternarius_2019-07-04.sha1": "", } } } } }, )
def test_copy_callbacks_called(self): test_path = write_files({ 'source_dir': { 'LC81010782014285LGN00_B6.img': 'test', 'LC81010782014285LGN00_B6.swamp': 'test' } }) source_path = test_path.joinpath('source_dir') dest_path = test_path.joinpath('dest_dir') called_back = [] package.prepare_target_imagery( source_path, dest_path, include_path=lambda p: p.suffix == '.img', translate_path=lambda p: p.with_suffix('.tif'), after_file_copy=lambda source, dest: called_back.append( (source, dest)), compress_imagery=False) dest_file = dest_path.joinpath('LC81010782014285LGN00_B6.tif') # The after_file_copy() callback should be called for each copied file. # *.swamp should not be returned, as it received None from our path translation. self.assertEqual([ (source_path.joinpath('LC81010782014285LGN00_B6.img'), [dest_file]) ], called_back) assert_file_structure( test_path, { 'source_dir': { 'LC81010782014285LGN00_B6.img': 'test', 'LC81010782014285LGN00_B6.swamp': 'test' }, 'dest_dir': { 'LC81010782014285LGN00_B6.tif': 'test', } }) self.assertTrue(dest_file.stat().st_size, 4) # Ensure source path was not touched. source_file = source_path.joinpath('LC81010782014285LGN00_B6.img') self.assertTrue(source_file.stat().st_size, 4)
def test_minimal_package_with_product_name(tmp_path: Path, l1_ls8_folder: Path): """ You can specify an ODC product name manually to avoid most of the name generation. """ out = tmp_path / "out" out.mkdir() [blue_geotiff_path] = l1_ls8_folder.rglob("L*_B2.TIF") with DatasetAssembler(out) as p: p.datetime = datetime(2019, 7, 4, 13, 7, 5) p.product_name = "loch_ness_sightings" p.processed = datetime(2019, 7, 4, 13, 8, 7) p.write_measurement("blue", blue_geotiff_path) dataset_id, metadata_path = p.done() assert dataset_id is not None assert_file_structure( out, { "loch_ness_sightings": { "2019": { "07": { "04": { # Set a dataset version to get rid of 'beta' label. "loch_ness_sightings_2019-07-04.odc-metadata.yaml": "", "loch_ness_sightings_2019-07-04.proc-info.yaml": "", "loch_ness_sightings_2019-07-04_blue.tif": "", "loch_ness_sightings_2019-07-04.sha1": "", } } } } }, )
def test_package(): work_path = temp_dir() output_path = work_path.joinpath('out') output_path.mkdir(parents=True) ancil_path = work_path.joinpath('ancil') # We have to override the ancillary directory lookup as they won't exist on test systems. ancil_files = ( FakeAncilFile(ancil_path, 'cpf', 'L8CPF20140101_20140331.05'), FakeAncilFile(ancil_path, 'bpf_oli', 'LO8BPF20140127130115_20140127144056.01'), FakeAncilFile(ancil_path, 'bpf_tirs', 'LT8BPF20140116023714_20140116032836.02'), FakeAncilFile(ancil_path, 'tirs_ssm_position', '20160529.l8_tirs_estimated_ssm_position.txt'), FakeAncilFile(ancil_path, 'rlut', 'L8RLUT20130211_20431231v09.h5', folder_offset=('2013',)), ) work_order_path = prepare_work_order(ancil_files, work_order_template_path) # Run! args = [ hardlink_arg(output_path, source_dataset), 'level1', '--newly-processed', '--parent', str(parent_dataset), '--add-file', str(work_order_path) ] for additional_file in additional_files.iterdir(): args.extend(['--add-file', str(additional_file)]) args.extend([ str(source_dataset), str(output_path) ]) run_packaging_cli(args) output_dataset = output_path.joinpath('LS8_OLITIRS_OTH_P51_GALPGS01-002_112_079_20140126') assert_file_structure(output_path, { 'LS8_OLITIRS_OTH_P51_GALPGS01-002_112_079_20140126': { 'browse.jpg': '', 'browse.fr.jpg': '', 'product': { # Newer versions of GDAL create an IMD file with some of the embedded metadata. 'LC81120792014026ASA00_B10.IMD': 'optional', 'LC81120792014026ASA00_B10.TIF': '', 'LC81120792014026ASA00_B11.IMD': 'optional', 'LC81120792014026ASA00_B11.TIF': '', 'LC81120792014026ASA00_B1.IMD': 'optional', 'LC81120792014026ASA00_B1.TIF': '', 'LC81120792014026ASA00_B2.IMD': 'optional', 'LC81120792014026ASA00_B2.TIF': '', 'LC81120792014026ASA00_B3.IMD': 'optional', 'LC81120792014026ASA00_B3.TIF': '', 'LC81120792014026ASA00_B4.IMD': 'optional', 'LC81120792014026ASA00_B4.TIF': '', 'LC81120792014026ASA00_B5.IMD': 'optional', 'LC81120792014026ASA00_B5.TIF': '', 'LC81120792014026ASA00_B6.IMD': 'optional', 'LC81120792014026ASA00_B6.TIF': '', 'LC81120792014026ASA00_B7.IMD': 'optional', 'LC81120792014026ASA00_B7.TIF': '', 'LC81120792014026ASA00_B8.IMD': 'optional', 'LC81120792014026ASA00_B8.TIF': '', 'LC81120792014026ASA00_B9.IMD': 'optional', 'LC81120792014026ASA00_B9.TIF': '', 'LC81120792014026ASA00_BQA.IMD': 'optional', 'LC81120792014026ASA00_BQA.TIF': '', 'LC81120792014026ASA00_GCP.txt': '', 'LC81120792014026ASA00_MTL.txt': '', 'LO8_20140126_112_079_L1T.xml': '', }, 'additional': { 'work_order.xml': '', 'lpgs_out.xml': '', '20141201_20010425_B6_gqa_results.yaml': '' }, 'ga-metadata.yaml': '', 'package.sha1': '' } }) # TODO: Check metadata fields are sensible. output_metadata_path = output_dataset.joinpath('ga-metadata.yaml') assert output_metadata_path.exists() md = yaml.load(output_metadata_path.open('r')) prepare_datasets_for_comparison( EXPECTED_METADATA, md, ancil_files, output_dataset.joinpath('product') ) assert_same(md, EXPECTED_METADATA) # TODO: Asset all files are checksummed. output_checksum_path = output_dataset.joinpath('package.sha1') assert output_checksum_path.exists() checksummed_filenames = load_checksum_filenames(output_checksum_path) expected_filenames = sorted(f for f in as_file_list(output_dataset) if f != 'package.sha1') assert set(checksummed_filenames) == set(expected_filenames) assert checksummed_filenames == expected_filenames
def test_dea_style_package(l1_ls8_dataset: DatasetDoc, l1_ls8_dataset_path: Path, tmp_path: Path): out = tmp_path [blue_geotiff_path] = l1_ls8_dataset_path.rglob("L*_B2.TIF") with DatasetAssembler(out, naming_conventions="dea") as p: # We add a source dataset, asking to inherit the common properties (eg. platform, instrument, datetime) p.add_source_path(l1_ls8_dataset_path, auto_inherit_properties=True) # It's a GA product of "numerus-unus" ("the number one"). p.producer = "ga.gov.au" p.product_family = "ones" p.dataset_version = "3.0.0" # Known properties are normalised (see tests at bottom of file) p.platform = "LANDSAT_8" # to: 'landsat-8' p.processed = "2016-03-04 14:23:30Z" # into a date. p.maturity = "FINAL" # lowercased p.properties["eo:off_nadir"] = "34" # into a number # Write a measurement from a numpy array, using the source dataset's grid spec. p.write_measurement_numpy( "ones", numpy.ones((60, 60), numpy.int16), GridSpec.from_dataset_doc(l1_ls8_dataset), nodata=-999, ) # Copy a measurement from an input file (it will write a COG with DEA naming conventions) p.write_measurement("blue", blue_geotiff_path) # Alternatively, all measurements could be by reference rather that a copy: # p.note_measurement("external_blue", blue_geotiff_path) # (See an example of referencing in eodatasets3/prepare/landsat_l1_prepare.py ) # Write a thumbnail using the given bands as r/g/b. p.write_thumbnail("ones", "ones", "blue") # Write a singleband thumbnail using a bit flag p.write_thumbnail_singleband("blue", bit=1, kind="singleband") # Write a singleband thumbnail using a lookuptable p.write_thumbnail_singleband("blue", lookup_table={1: (0, 0, 255)}, kind="singleband_lut") # Note any software versions important to this created data. p.note_software_version( "numerus-unus-processor", "https://github.com/GeoscienceAustralia/eo-datasets", "1.2.3", ) # p.done() will validate the dataset and write it to the destination atomically. dataset_id, metadata_path = p.done() assert isinstance(dataset_id, UUID), "Expected a random UUID to be assigned" out = tmp_path / "ga_ls8c_ones_3/090/084/2016/01/21" assert out == metadata_path.parent assert_file_structure( out, { "ga_ls8c_ones_3-0-0_090084_2016-01-21_final.odc-metadata.yaml": "", "ga_ls8c_ones_3-0-0_090084_2016-01-21_final_blue.tif": "", "ga_ls8c_ones_3-0-0_090084_2016-01-21_final_ones.tif": "", "ga_ls8c_ones_3-0-0_090084_2016-01-21_final_thumbnail.jpg": "", "ga_ls8c_ones_3-0-0_090084_2016-01-21_final.proc-info.yaml": "", "ga_ls8c_ones_3-0-0_090084_2016-01-21_final.sha1": "", "ga_ls8c_singleband_3-0-0_090084_2016-01-21_final_thumbnail.jpg": "", "ga_ls8c_singleband_lut_3-0-0_090084_2016-01-21_final_thumbnail.jpg": "", }, ) # TODO: check sha1 checksum list. assert_same_as_file( { "$schema": "https://schemas.opendatacube.org/dataset", "id": dataset_id, "label": "ga_ls8c_ones_3-0-0_090084_2016-01-21_final", "product": { # This was added automatically because we chose 'dea' conventions. "href": "https://collections.dea.ga.gov.au/product/ga_ls8c_ones_3", "name": "ga_ls8c_ones_3", }, "crs": "epsg:32655", "geometry": { "coordinates": [[ [879_315.0, -3_714_585.0], [641_985.0, -3_714_585.0], [641_985.0, -3_953_115.0], [879_315.0, -3_953_115.0], [879_315.0, -3_714_585.0], ]], "type": "Polygon", }, "grids": { # Note that the two bands had identical grid specs, so it combined them into one grid. "default": { "shape": [60, 60], "transform": [ 3955.5, 0.0, 641_985.0, 0.0, -3975.500_000_000_000_5, -3_714_585.0, 0.0, 0.0, 1.0, ], } }, "measurements": { "blue": { "path": "ga_ls8c_ones_3-0-0_090084_2016-01-21_final_blue.tif" }, "ones": { "path": "ga_ls8c_ones_3-0-0_090084_2016-01-21_final_ones.tif" }, }, "properties": { "datetime": datetime(2016, 1, 21, 23, 50, 23, 54435), "dea:dataset_maturity": "final", "odc:dataset_version": "3.0.0", "odc:file_format": "GeoTIFF", "odc:processing_datetime": "2016-03-04T14:23:30", "odc:producer": "ga.gov.au", "odc:product_family": "ones", # The remaining fields were inherited from the source dataset # (because we set auto_inherit_properties=True, and they're in the whitelist) "eo:platform": "landsat-8", # matching Stac's examples for capitalisation. "eo:instrument": "OLI_TIRS", # matching Stac's examples for capitalisation. "eo:cloud_cover": 93.22, "eo:off_nadir": 34.0, "eo:gsd": 15.0, "eo:sun_azimuth": 74.007_443_8, "eo:sun_elevation": 55.486_483, "landsat:collection_category": "T1", "landsat:collection_number": 1, "landsat:landsat_product_id": "LC08_L1TP_090084_20160121_20170405_01_T1", "landsat:landsat_scene_id": "LC80900842016021LGN02", "landsat:wrs_path": 90, "landsat:wrs_row": 84, "odc:region_code": "090084", }, "accessories": { # It wrote a checksum file for all of our files. "checksum:sha1": { "path": "ga_ls8c_ones_3-0-0_090084_2016-01-21_final.sha1" }, # We didn't add any extra processor metadata, so this just contains # some software versions. "metadata:processor": { "path": "ga_ls8c_ones_3-0-0_090084_2016-01-21_final.proc-info.yaml" }, # The thumbnails we made. "thumbnail": { "path": "ga_ls8c_ones_3-0-0_090084_2016-01-21_final_thumbnail.jpg" }, "thumbnail:singleband": { "path": "ga_ls8c_singleband_3-0-0_090084_2016-01-21_final_thumbnail.jpg" }, "thumbnail:singleband_lut": { "path": "ga_ls8c_singleband_lut_3-0-0_090084_2016-01-21_final_thumbnail.jpg" }, }, "lineage": { "level1": ["a780754e-a884-58a7-9ac0-df518a67f59d"] }, }, generated_file=metadata_path, )
def test_metadata(): output_path = temp_dir() run_packaging_cli([ hardlink_arg(output_path, source_dataset), 'raw', str(source_dataset), str(output_path) ]) assert_file_structure( output_path, { 'NPP_VIIRS_STD-HDF5_P00_18966.ASA_0_0_20150626T053709Z20150626T055046': { 'product': { 'RNSCA-RVIRS_npp_d20150626_t0537097_e0549423_b18966_' 'c20150626055046759000_nfts_drl.h5': '', }, 'ga-metadata.yaml': '', 'package.sha1': '' } }) output_path = output_path.joinpath( 'NPP_VIIRS_STD-HDF5_P00_18966.ASA_0_0_20150626T053709Z20150626T055046') # TODO: Check metadata fields are sensible. output_metadata_path = output_path.joinpath('ga-metadata.yaml') assert output_metadata_path.exists() md = yaml.load(output_metadata_path.open('r')) # ID is different every time: check not none, and clear it. assert md['id'] is not None md['id'] = None import sys sys.stderr.write('\n\n\n\n%r\n\n\n' % md) expected = { 'ga_label': 'NPP_VIIRS_STD-HDF5_P00_18966.ASA_0_0_20150626T053709Z20150626T055046', 'image': { 'bands': {} }, 'size_bytes': 0, 'creation_dt': datetime.datetime.utcfromtimestamp(source_dataset.stat().st_ctime), 'id': None, 'platform': { 'code': 'NPP' }, 'instrument': { 'name': 'VIIRS' }, 'ga_level': 'P00', 'format': { 'name': 'HDF5' }, 'checksum_path': 'package.sha1', 'product_type': 'satellite_telemetry_data', 'acquisition': { 'groundstation': { 'eods_domain_code': '002', 'label': 'Alice Springs', 'code': 'ASA' }, 'platform_orbit': 18966, 'los': datetime.datetime(2015, 6, 26, 5, 50, 46), 'aos': datetime.datetime(2015, 6, 26, 5, 37, 9) }, 'lineage': { 'machine': {}, 'source_datasets': {} }, } add_default_software_versions(expected) assert_same(md, expected) # Check all files are listed in checksum file. output_checksum_path = output_path.joinpath('package.sha1') assert output_checksum_path.exists() checksummed_filenames = load_checksum_filenames(output_checksum_path) assert checksummed_filenames == [ 'ga-metadata.yaml', 'product/RNSCA-RVIRS_npp_d20150626_t0537097_e0549423_b18966_' 'c20150626055046759000_nfts_drl.h5', ]
def test_metadata(): output_path = temp_dir() run_packaging_cli([ hardlink_arg(output_path, source_dataset), 'raw', str(source_dataset), str(output_path) ]) # EODS LS7 dataset id: # 'LS7_ETM_STD-RCC_P00_LANDSAT-7.65771.ALSP_0_0_20110827T021036Z20110827T021707' # ... slightly different to NCI? assert_file_structure( output_path, { 'LS7_ETM_STD-RCC_P00_L7EB2011239021036ASA111_0_0_20110827T021036Z20110827T021707': { 'product': { 'ephem.log': '', 'acs.log': '', 'L7EB2011239021036ASA111Q.data': '', 'passinfo': '', 'L7EB2011239021036ASA111I.data': '', 'ref.log': '', 'demod.log': '' }, 'ga-metadata.yaml': '', 'package.sha1': '' } }) output_path = output_path.joinpath( 'LS7_ETM_STD-RCC_P00_L7EB2011239021036ASA111_0_0_20110827T021036Z20110827T021707' ) # TODO: Check metadata fields are sensible. output_metadata_path = output_path.joinpath('ga-metadata.yaml') assert output_metadata_path.exists() md = yaml.load(output_metadata_path.open('r')) # ID is different every time: check not none, and clear it. assert md['id'] is not None md['id'] = None expected = { 'id': None, 'size_bytes': 164368, 'platform': { 'code': 'LANDSAT_7' }, 'instrument': { 'operation_mode': 'BUMPER', 'name': 'ETM' }, 'ga_level': 'P00', 'usgs': { 'interval_id': 'L7EB2011239021036ASA111' }, 'product_type': 'satellite_telemetry_data', 'format': { 'name': 'RCC' }, # Default creation date is the same as the input folder ctime. 'creation_dt': datetime.datetime.utcfromtimestamp(source_dataset.stat().st_ctime), 'ga_label': 'LS7_ETM_STD-RCC_P00_L7EB2011239021036ASA111_0_0_' '20110827T021036Z20110827T021707', 'acquisition': { 'aos': datetime.datetime(2011, 8, 27, 2, 10, 36), 'groundstation': { 'code': 'ASA', 'label': 'Alice Springs', 'eods_domain_code': '002' }, 'los': datetime.datetime(2011, 8, 27, 2, 17, 7), 'platform_orbit': 65771 }, 'image': { 'bands': {} }, 'lineage': { 'source_datasets': {}, 'machine': {} }, 'checksum_path': 'package.sha1' } add_default_software_versions(expected) assert_same(md, expected) # Check all files are listed in checksum file. output_checksum_path = output_path.joinpath('package.sha1') assert output_checksum_path.exists() checksummed_filenames = load_checksum_filenames(output_checksum_path) assert checksummed_filenames == [ 'ga-metadata.yaml', 'product/L7EB2011239021036ASA111I.data', 'product/L7EB2011239021036ASA111Q.data', 'product/acs.log', 'product/demod.log', 'product/ephem.log', 'product/passinfo', 'product/ref.log', ]
def test_metadata(): output_path = temp_dir() run_packaging_cli([ hardlink_arg(output_path, source_dataset), 'raw', str(source_dataset), str(output_path) ]) # EODS LS7 dataset id: # 'LS7_ETM_STD-RCC_P00_LANDSAT-7.65771.ALSP_0_0_20110827T021036Z20110827T021707' # ... slightly different to NCI? assert_file_structure( output_path, { 'AQUA_MODIS_STD-PDS_P00_65208.S1A1C1D1R1_0_0_20140807T031628Z20140807T031630': { 'product': { 'P1540064AAAAAAAAAAAAAA14219032341000.PDS': '', 'P1540064AAAAAAAAAAAAAA14219032341001.PDS': '', 'P1540141AAAAAAAAAAAAAA14219032341000.PDS': '', 'P1540141AAAAAAAAAAAAAA14219032341001.PDS': '', 'P1540157AAAAAAAAAAAAAA14219032341000.PDS': '', 'P1540157AAAAAAAAAAAAAA14219032341001.PDS': '', 'P1540261AAAAAAAAAAAAAA14219032341000.PDS': '', 'P1540261AAAAAAAAAAAAAA14219032341001.PDS': '', 'P1540262AAAAAAAAAAAAAA14219032341000.PDS': '', 'P1540262AAAAAAAAAAAAAA14219032341001.PDS': '', 'P1540290AAAAAAAAAAAAAA14219032341000.PDS': '', 'P1540290AAAAAAAAAAAAAA14219032341001.PDS': '', 'P1540342AAAAAAAAAAAAAA14219032341000.PDS': '', 'P1540342AAAAAAAAAAAAAA14219032341001.PDS': '', 'P1540402AAAAAAAAAAAAAA14219032341000.PDS': '', 'P1540402AAAAAAAAAAAAAA14219032341001.PDS': '', 'P1540404AAAAAAAAAAAAAA14219032341000.PDS': '', 'P1540404AAAAAAAAAAAAAA14219032341001.PDS': '', 'P1540405AAAAAAAAAAAAAA14219032341000.PDS': '', 'P1540405AAAAAAAAAAAAAA14219032341001.PDS': '', 'P1540406AAAAAAAAAAAAAA14219032341000.PDS': '', 'P1540406AAAAAAAAAAAAAA14219032341001.PDS': '', 'P1540407AAAAAAAAAAAAAA14219032341000.PDS': '', 'P1540407AAAAAAAAAAAAAA14219032341001.PDS': '', 'P1540414AAAAAAAAAAAAAA14219032341000.PDS': '', 'P1540414AAAAAAAAAAAAAA14219032341001.PDS': '', 'P1540415AAAAAAAAAAAAAA14219032341000.PDS': '', 'P1540415AAAAAAAAAAAAAA14219032341001.PDS': '', 'P1540957AAAAAAAAAAAAAA14219032341000.PDS': '', 'P1540957AAAAAAAAAAAAAA14219032341001.PDS': '', }, 'ga-metadata.yaml': '', 'package.sha1': '' } }) output_path = output_path.joinpath( 'AQUA_MODIS_STD-PDS_P00_65208.S1A1C1D1R1_0_0_20140807T031628Z20140807T031630' ) # TODO: Check metadata fields are sensible. output_metadata_path = output_path.joinpath('ga-metadata.yaml') assert output_metadata_path.exists() md = yaml.load(output_metadata_path.open('r')) # ID is different every time: check not none, and clear it. assert md['id'] is not None md['id'] = None expected = { 'lineage': { 'machine': {}, 'source_datasets': {} }, 'product_type': 'satellite_telemetry_data', 'format': { 'name': 'PDS' }, 'image': { 'bands': {}, 'day_percentage_estimate': 100.0 }, # Default creation date is the same as the input folder ctime. 'creation_dt': datetime.datetime.utcfromtimestamp(source_dataset.stat().st_ctime), 'rms_string': 'S1A1C1D1R1', 'instrument': { 'name': 'MODIS' }, 'ga_label': 'AQUA_MODIS_STD-PDS_P00_65208.S1A1C1D1R1_0_0_20140807T031628Z20140807T031630', 'platform': { 'code': 'AQUA' }, 'size_bytes': 2144280, 'checksum_path': 'package.sha1', 'id': None, 'acquisition': { 'los': datetime.datetime(2014, 8, 7, 3, 16, 30, 228023), 'platform_orbit': 65208, 'aos': datetime.datetime(2014, 8, 7, 3, 16, 28, 750910) } } add_default_software_versions(expected) assert_same(md, expected) # Check all files are listed in checksum file. output_checksum_path = output_path.joinpath('package.sha1') assert output_checksum_path.exists() checksummed_filenames = load_checksum_filenames(output_checksum_path) assert checksummed_filenames == [ 'ga-metadata.yaml', 'product/P1540064AAAAAAAAAAAAAA14219032341000.PDS', 'product/P1540064AAAAAAAAAAAAAA14219032341001.PDS', 'product/P1540141AAAAAAAAAAAAAA14219032341000.PDS', 'product/P1540141AAAAAAAAAAAAAA14219032341001.PDS', 'product/P1540157AAAAAAAAAAAAAA14219032341000.PDS', 'product/P1540157AAAAAAAAAAAAAA14219032341001.PDS', 'product/P1540261AAAAAAAAAAAAAA14219032341000.PDS', 'product/P1540261AAAAAAAAAAAAAA14219032341001.PDS', 'product/P1540262AAAAAAAAAAAAAA14219032341000.PDS', 'product/P1540262AAAAAAAAAAAAAA14219032341001.PDS', 'product/P1540290AAAAAAAAAAAAAA14219032341000.PDS', 'product/P1540290AAAAAAAAAAAAAA14219032341001.PDS', 'product/P1540342AAAAAAAAAAAAAA14219032341000.PDS', 'product/P1540342AAAAAAAAAAAAAA14219032341001.PDS', 'product/P1540402AAAAAAAAAAAAAA14219032341000.PDS', 'product/P1540402AAAAAAAAAAAAAA14219032341001.PDS', 'product/P1540404AAAAAAAAAAAAAA14219032341000.PDS', 'product/P1540404AAAAAAAAAAAAAA14219032341001.PDS', 'product/P1540405AAAAAAAAAAAAAA14219032341000.PDS', 'product/P1540405AAAAAAAAAAAAAA14219032341001.PDS', 'product/P1540406AAAAAAAAAAAAAA14219032341000.PDS', 'product/P1540406AAAAAAAAAAAAAA14219032341001.PDS', 'product/P1540407AAAAAAAAAAAAAA14219032341000.PDS', 'product/P1540407AAAAAAAAAAAAAA14219032341001.PDS', 'product/P1540414AAAAAAAAAAAAAA14219032341000.PDS', 'product/P1540414AAAAAAAAAAAAAA14219032341001.PDS', 'product/P1540415AAAAAAAAAAAAAA14219032341000.PDS', 'product/P1540415AAAAAAAAAAAAAA14219032341001.PDS', 'product/P1540957AAAAAAAAAAAAAA14219032341000.PDS', 'product/P1540957AAAAAAAAAAAAAA14219032341001.PDS', ]
def test_whole_wagl_package(l1_ls8_dataset: DatasetDoc, l1_ls8_folder: Path, tmp_path: Path): out = tmp_path from eodatasets3.scripts import packagewagl with pytest.warns(None) as warning_record: res = CliRunner().invoke( packagewagl.run, map(str, (WAGL_INPUT_PATH, "--level1", L1_METADATA_PATH, "--output", out)), catch_exceptions=False, ) # The last line of output ends with the dataset path. words, reported_metadata = res.output.splitlines()[-1].rsplit(" ", 1) # No warnings should have been logged during package. # We could tighten this to specific warnings if it proves too noisy, but it's # useful for catching things like unclosed files. if warning_record: messages = "\n".join(f"- {w.message} ({w})\n" for w in warning_record) raise AssertionError( f"Warnings were produced during wagl package:\n {messages}") expected_folder = out / "ga_ls8c_ard_3/092/084/2016/06/28" assert_file_structure( expected_folder, { "ga_ls8c_ard_3-2-0_092084_2016-06-28_final.odc-metadata.yaml": "", "ga_ls8c_ard_3-2-0_092084_2016-06-28_final.proc-info.yaml": "", "ga_ls8c_ard_3-2-0_092084_2016-06-28_final.sha1": "", "ga_ls8c_nbar_3-2-0_092084_2016-06-28_final_band01.tif": "", "ga_ls8c_nbar_3-2-0_092084_2016-06-28_final_band02.tif": "", "ga_ls8c_nbar_3-2-0_092084_2016-06-28_final_band03.tif": "", "ga_ls8c_nbar_3-2-0_092084_2016-06-28_final_band04.tif": "", "ga_ls8c_nbar_3-2-0_092084_2016-06-28_final_band05.tif": "", "ga_ls8c_nbar_3-2-0_092084_2016-06-28_final_band06.tif": "", "ga_ls8c_nbar_3-2-0_092084_2016-06-28_final_band07.tif": "", "ga_ls8c_nbar_3-2-0_092084_2016-06-28_final_band08.tif": "", "ga_ls8c_nbar_3-2-0_092084_2016-06-28_final_thumbnail.jpg": "", "ga_ls8c_nbart_3-2-0_092084_2016-06-28_final_band01.tif": "", "ga_ls8c_nbart_3-2-0_092084_2016-06-28_final_band02.tif": "", "ga_ls8c_nbart_3-2-0_092084_2016-06-28_final_band03.tif": "", "ga_ls8c_nbart_3-2-0_092084_2016-06-28_final_band04.tif": "", "ga_ls8c_nbart_3-2-0_092084_2016-06-28_final_band05.tif": "", "ga_ls8c_nbart_3-2-0_092084_2016-06-28_final_band06.tif": "", "ga_ls8c_nbart_3-2-0_092084_2016-06-28_final_band07.tif": "", "ga_ls8c_nbart_3-2-0_092084_2016-06-28_final_band08.tif": "", "ga_ls8c_nbart_3-2-0_092084_2016-06-28_final_thumbnail.jpg": "", "ga_ls8c_oa_3-2-0_092084_2016-06-28_final_azimuthal-exiting.tif": "", "ga_ls8c_oa_3-2-0_092084_2016-06-28_final_azimuthal-incident.tif": "", "ga_ls8c_oa_3-2-0_092084_2016-06-28_final_combined-terrain-shadow.tif": "", "ga_ls8c_oa_3-2-0_092084_2016-06-28_final_exiting-angle.tif": "", "ga_ls8c_oa_3-2-0_092084_2016-06-28_final_fmask.tif": "", "ga_ls8c_oa_3-2-0_092084_2016-06-28_final_incident-angle.tif": "", "ga_ls8c_oa_3-2-0_092084_2016-06-28_final_nbar-contiguity.tif": "", "ga_ls8c_oa_3-2-0_092084_2016-06-28_final_nbart-contiguity.tif": "", "ga_ls8c_oa_3-2-0_092084_2016-06-28_final_relative-azimuth.tif": "", "ga_ls8c_oa_3-2-0_092084_2016-06-28_final_relative-slope.tif": "", "ga_ls8c_oa_3-2-0_092084_2016-06-28_final_satellite-azimuth.tif": "", "ga_ls8c_oa_3-2-0_092084_2016-06-28_final_satellite-view.tif": "", "ga_ls8c_oa_3-2-0_092084_2016-06-28_final_solar-azimuth.tif": "", "ga_ls8c_oa_3-2-0_092084_2016-06-28_final_solar-zenith.tif": "", "ga_ls8c_oa_3-2-0_092084_2016-06-28_final_time-delta.tif": "", }, ) [output_metadata] = expected_folder.rglob("*.odc-metadata.yaml") assert reported_metadata == str( output_metadata), "Cli didn't report the expected output path" # Checksum should include all files other than itself. [checksum_file] = expected_folder.rglob("*.sha1") all_output_files = set( p.relative_to(checksum_file.parent) for p in expected_folder.rglob("*") if p != checksum_file) files_in_checksum = { Path(line.split("\t")[1]) for line in checksum_file.read_text().splitlines() } assert all_output_files == files_in_checksum # Verify the computed contiguity looks the same. (metadata fields will depend on it) [image] = expected_folder.rglob("*_oa_*nbar-contiguity.tif") assert_image(image, nodata=255, unique_pixel_counts={0: 1978, 1: 4184}) [image] = expected_folder.rglob("*_oa_*nbart-contiguity.tif") assert_image(image, nodata=255, unique_pixel_counts={0: 1979, 1: 4183}) assert_same_as_file( { "$schema": "https://schemas.opendatacube.org/dataset", # A stable ID is taken from the WAGL doc. "id": "787eb74c-e7df-43d6-b562-b796137330ae", "label": "ga_ls8c_ard_3-2-0_092084_2016-06-28_final", "product": { "href": "https://collections.dea.ga.gov.au/product/ga_ls8c_ard_3", "name": "ga_ls8c_ard_3", }, "crs": "epsg:32655", "geometry": { "coordinates": [[ [386_170.809_107_605_5, -3_787_581.737_315_514_6], [393_422.698_122_467_44, -3_754_539.332_156_166_4], [402_370.463_567_812_2, -3_717_207.883_853_628_3], [405_296.703_429_750_9, -3_713_106.822_612_258_6], [405_302.307_692_307_7, -3_713_085.0], [560_999.714_134_832_8, -3_745_790.820_117_99], [591_203.344_050_317_7, -3_755_934.776_849_929_2], [593_107.5, -3_756_373.614_649_681_4], [593_066.089_284_004_1, -3_756_560.384_007_281_6], [593_115.0, -3_756_576.810_780_758], [593_115.0, -3_769_934.639_090_926_4], [555_895.771_981_598_6, -3_924_204.823_795_153], [554_316.830_569_659_8, -3_931_326.117_549_759], [553_913.572_308_820_1, -3_932_420.854_216_015], [550_505.686_408_068, -3_946_546.219_392_854], [548_673.645_879_151_9, -3_946_645.831_477_726_3], [548_393.076_923_077, -3_947_407.5], [543_888.417_289_877_3, -3_946_906.014_911_907], [535_826.373_854_402_9, -3_947_344.365_997_631_6], [362_232.941_315_876_84, -3_905_575.014_223_633], [362_109.819_892_458_1, -3_904_490.351_889_350_5], [360_592.5, -3_904_126.385_350_318_6], [361_565.347_585_850_9, -3_899_693.716_286_561_5], [360_585.0, -3_891_057.151_898_734_3], [366_618.297_729_428_5, -3_863_717.869_440_751], [386_170.809_107_605_5, -3_787_581.737_315_514_6], ]], "type": "Polygon", }, "grids": { "default": { "shape": [79, 78], "transform": [ 2981.153_846_153_846, 0.0, 360_585.0, 0.0, -2966.202_531_645_569_7, -3_713_085.0, 0.0, 0.0, 1.0, ], }, "panchromatic": { "shape": [157, 156], "transform": [ 1490.480_769_230_769_3, 0.0, 360_592.5, 0.0, -1492.452_229_299_363, -3_713_092.5, 0.0, 0.0, 1.0, ], }, }, "properties": { "datetime": datetime(2016, 6, 28, 0, 2, 28, 624_635), "dea:dataset_maturity": "final", "dtr:end_datetime": datetime(2016, 6, 28, 0, 2, 43, 114_771), "dtr:start_datetime": datetime(2016, 6, 28, 0, 2, 14, 25815), "eo:cloud_cover": 63.069_613_577_531_236, "eo:gsd": 1490.480_769_230_769_3, "eo:instrument": "OLI_TIRS", "eo:platform": "landsat-8", "eo:sun_azimuth": 33.655_125_34, "eo:sun_elevation": 23.988_361_72, "fmask:clear": 32.735_343_657_403_305, "fmask:cloud": 63.069_613_577_531_236, "fmask:cloud_shadow": 4.139_470_857_647_722, "fmask:snow": 0.005_053_323_801_138_007, "fmask:water": 0.050_518_583_616_596_675, "gqa:abs_iterative_mean_x": 0.21, "gqa:abs_iterative_mean_xy": 0.27, "gqa:abs_iterative_mean_y": 0.18, "gqa:abs_x": 0.3, "gqa:abs_xy": 0.39, "gqa:abs_y": 0.25, "gqa:cep90": 0.46, "gqa:iterative_mean_x": -0.17, "gqa:iterative_mean_xy": 0.21, "gqa:iterative_mean_y": 0.12, "gqa:iterative_stddev_x": 0.19, "gqa:iterative_stddev_xy": 0.25, "gqa:iterative_stddev_y": 0.17, "gqa:mean_x": -0.1, "gqa:mean_xy": 0.14, "gqa:mean_y": 0.1, "gqa:stddev_x": 0.35, "gqa:stddev_xy": 0.45, "gqa:stddev_y": 0.29, "landsat:collection_category": "T1", "landsat:collection_number": 1, "landsat:landsat_product_id": "LC08_L1TP_092084_20160628_20170323_01_T1", "landsat:landsat_scene_id": "LC80920842016180LGN01", "landsat:wrs_path": 92, "landsat:wrs_row": 84, "odc:dataset_version": "3.2.0", "odc:file_format": "GeoTIFF", "odc:processing_datetime": datetime(2019, 7, 11, 23, 29, 29, 21245), "odc:producer": "ga.gov.au", "odc:product_family": "ard", "odc:region_code": "092084", }, "measurements": { "nbar_blue": { "path": "ga_ls8c_nbar_3-2-0_092084_2016-06-28_final_band02.tif" }, "nbar_coastal_aerosol": { "path": "ga_ls8c_nbar_3-2-0_092084_2016-06-28_final_band01.tif" }, "nbar_green": { "path": "ga_ls8c_nbar_3-2-0_092084_2016-06-28_final_band03.tif" }, "nbar_nir": { "path": "ga_ls8c_nbar_3-2-0_092084_2016-06-28_final_band05.tif" }, "nbar_panchromatic": { "grid": "panchromatic", "path": "ga_ls8c_nbar_3-2-0_092084_2016-06-28_final_band08.tif", }, "nbar_red": { "path": "ga_ls8c_nbar_3-2-0_092084_2016-06-28_final_band04.tif" }, "nbar_swir_1": { "path": "ga_ls8c_nbar_3-2-0_092084_2016-06-28_final_band06.tif" }, "nbar_swir_2": { "path": "ga_ls8c_nbar_3-2-0_092084_2016-06-28_final_band07.tif" }, "nbart_blue": { "path": "ga_ls8c_nbart_3-2-0_092084_2016-06-28_final_band02.tif" }, "nbart_coastal_aerosol": { "path": "ga_ls8c_nbart_3-2-0_092084_2016-06-28_final_band01.tif" }, "nbart_green": { "path": "ga_ls8c_nbart_3-2-0_092084_2016-06-28_final_band03.tif" }, "nbart_nir": { "path": "ga_ls8c_nbart_3-2-0_092084_2016-06-28_final_band05.tif" }, "nbart_panchromatic": { "grid": "panchromatic", "path": "ga_ls8c_nbart_3-2-0_092084_2016-06-28_final_band08.tif", }, "nbart_red": { "path": "ga_ls8c_nbart_3-2-0_092084_2016-06-28_final_band04.tif" }, "nbart_swir_1": { "path": "ga_ls8c_nbart_3-2-0_092084_2016-06-28_final_band06.tif" }, "nbart_swir_2": { "path": "ga_ls8c_nbart_3-2-0_092084_2016-06-28_final_band07.tif" }, "oa_azimuthal_exiting": { "path": "ga_ls8c_oa_3-2-0_092084_2016-06-28_final_azimuthal-exiting.tif" }, "oa_azimuthal_incident": { "path": "ga_ls8c_oa_3-2-0_092084_2016-06-28_final_azimuthal-incident.tif" }, "oa_combined_terrain_shadow": { "path": "ga_ls8c_oa_3-2-0_092084_2016-06-28_final_combined-terrain-shadow.tif" }, "oa_exiting_angle": { "path": "ga_ls8c_oa_3-2-0_092084_2016-06-28_final_exiting-angle.tif" }, "oa_fmask": { "path": "ga_ls8c_oa_3-2-0_092084_2016-06-28_final_fmask.tif" }, "oa_incident_angle": { "path": "ga_ls8c_oa_3-2-0_092084_2016-06-28_final_incident-angle.tif" }, "oa_nbar_contiguity": { "path": "ga_ls8c_oa_3-2-0_092084_2016-06-28_final_nbar-contiguity.tif" }, "oa_nbart_contiguity": { "path": "ga_ls8c_oa_3-2-0_092084_2016-06-28_final_nbart-contiguity.tif" }, "oa_relative_azimuth": { "path": "ga_ls8c_oa_3-2-0_092084_2016-06-28_final_relative-azimuth.tif" }, "oa_relative_slope": { "path": "ga_ls8c_oa_3-2-0_092084_2016-06-28_final_relative-slope.tif" }, "oa_satellite_azimuth": { "path": "ga_ls8c_oa_3-2-0_092084_2016-06-28_final_satellite-azimuth.tif" }, "oa_satellite_view": { "path": "ga_ls8c_oa_3-2-0_092084_2016-06-28_final_satellite-view.tif" }, "oa_solar_azimuth": { "path": "ga_ls8c_oa_3-2-0_092084_2016-06-28_final_solar-azimuth.tif" }, "oa_solar_zenith": { "path": "ga_ls8c_oa_3-2-0_092084_2016-06-28_final_solar-zenith.tif" }, "oa_time_delta": { "path": "ga_ls8c_oa_3-2-0_092084_2016-06-28_final_time-delta.tif" }, }, "accessories": { "checksum:sha1": { "path": "ga_ls8c_ard_3-2-0_092084_2016-06-28_final.sha1" }, "metadata:processor": { "path": "ga_ls8c_ard_3-2-0_092084_2016-06-28_final.proc-info.yaml" }, "thumbnail:nbar": { "path": "ga_ls8c_nbar_3-2-0_092084_2016-06-28_final_thumbnail.jpg" }, "thumbnail:nbart": { "path": "ga_ls8c_nbart_3-2-0_092084_2016-06-28_final_thumbnail.jpg" }, }, "lineage": { "level1": ["fb1c622e-90aa-50e8-9d5e-ad69db82d0f6"] }, }, output_metadata, ) [proc_info] = expected_folder.rglob("*.proc-info.yaml") assert_same_as_file( { "fmask": { "parameters": { "cloud_buffer_distance_metres": 0.0, "cloud_shadow_buffer_distance_metres": 0.0, "frantz_parallax_sentinel_2": False, }, "percent_class_distribution": { "clear": 32.735_343_657_403_305, "cloud": 63.069_613_577_531_236, "cloud_shadow": 4.139_470_857_647_722, "snow": 0.005_053_323_801_138_007, "water": 0.050_518_583_616_596_675, }, }, "software_versions": [ { "name": "modtran", "url": "http://www.ontar.com/software/productdetails.aspx?item=modtran", "version": "6.0.1", }, { "name": "wagl", "url": "https://github.com/GeoscienceAustralia/wagl.git", "version": "5.3.1+118.g9edd420", }, { "name": "eugl", "url": "https://github.com/OpenDataCubePipelines/eugl.git", "version": "0.0.2+69.gb1d1231", }, { "name": "gverify", "url": None, "version": "v0.25c" }, { "name": "fmask", "url": "https://bitbucket.org/chchrsc/python-fmask", "version": "0.5.3", }, { "name": "tesp", "url": "https://github.com/OpenDataCubePipelines/tesp.git", "version": "0.6.1", }, { "name": "eodatasets3", "url": "https://github.com/GeoscienceAustralia/eo-datasets", "version": eodatasets3.__version__, }, ], }, proc_info, ignore_fields=("gqa", "wagl"), ) # All produced tifs should be valid COGs for image in expected_folder.rglob("*.tif"): assert cogeo.cog_validate(image), f"Failed COG validation: {image}" # Check one of the images explicitly. [image] = expected_folder.rglob("*_nbar_*_band08.tif") with rasterio.open(image) as d: d: DatasetReader assert d.count == 1, "Expected one band" assert d.nodata == -999.0 # Verify the pixel values haven't changed. assert crc32(d.read(1).tobytes()) == 3_381_159_350 # (Rasterio's checksum is zero on some datasets for some reason? So we use crc above...) assert d.checksum(1) == 58403 # The last overview is an odd size because of the tiny test data image size. assert d.overviews(1) == [8, 16, 31] assert d.driver == "GTiff" assert d.dtypes == ("int16", ) assert d.compression == Compression.deflate assert d.height == 157 assert d.width == 156 # The reduced resolution makes it hard to test the chosen block size... assert d.block_shapes == [(26, 156)] # OA data should have no overviews. [*oa_images] = expected_folder.rglob("*_oa_*.tif") assert oa_images for image in oa_images: # fmask is the only OA that should have overviews according to spec (and Josh). if "fmask" in image.name: assert_image(image, overviews=[8, 16, 26]) else: assert_image(image, overviews=[]) # Check we didn't get height/width mixed up again :) # (The small size of our test data makes this slightly silly, though...) [thumb_path] = expected_folder.rglob("*_nbar_*.jpg") assert_image(thumb_path, bands=3, shape=(7, 8))
def test_package(): work_path = temp_dir() output_path = work_path.joinpath('out') output_path.mkdir(parents=True) ancil_base = work_path.joinpath('ancil') # We have to override the ancillary directory lookup as they won't exist on test systems. ancil_files = ( FakeAncilFile(ancil_base, 'cpf', 'LE07CPF_20110101_20110331_01.02'), FakeAncilFile(ancil_base, 'ephemeris', 'L72013231ASADEF.S00'), ) work_order = prepare_work_order(ancil_files, wo_template) # Run! args = [ hardlink_arg(output_path, source_dataset), 'level1', '--newly-processed', '--parent', str(parent_dataset), '--add-file', str(work_order) ] for additional_file in additional_files.iterdir(): args.extend(['--add-file', str(additional_file)]) args.extend([str(source_dataset), str(output_path)]) run_packaging_cli(args) output_dataset = output_path.joinpath( 'LS7_ETM_SYS_P31_GALPGS01-002_092_082_20110214') assert_file_structure( output_path, { 'LS7_ETM_SYS_P31_GALPGS01-002_092_082_20110214': { 'browse.jpg': '', 'browse.fr.jpg': '', 'product': { 'LE07_L1GS_092082_20110214_20170221_01_T2_ANG.txt': '', 'LE07_L1GS_092082_20110214_20170221_01_T2_B1.TIF': '', 'LE07_L1GS_092082_20110214_20170221_01_T2_B2.TIF': '', 'LE07_L1GS_092082_20110214_20170221_01_T2_B3.TIF': '', 'LE07_L1GS_092082_20110214_20170221_01_T2_B4.TIF': '', 'LE07_L1GS_092082_20110214_20170221_01_T2_B5.TIF': '', 'LE07_L1GS_092082_20110214_20170221_01_T2_B6_VCID_1.TIF': '', 'LE07_L1GS_092082_20110214_20170221_01_T2_B6_VCID_2.TIF': '', 'LE07_L1GS_092082_20110214_20170221_01_T2_B7.TIF': '', 'LE07_L1GS_092082_20110214_20170221_01_T2_B8.TIF': '', 'LE07_L1GS_092082_20110214_20170221_01_T2_BQA.TIF': '', 'LE07_L1GS_092082_20110214_20170221_01_T2_B1.IMD': 'optional', 'LE07_L1GS_092082_20110214_20170221_01_T2_B2.IMD': 'optional', 'LE07_L1GS_092082_20110214_20170221_01_T2_B3.IMD': 'optional', 'LE07_L1GS_092082_20110214_20170221_01_T2_B4.IMD': 'optional', 'LE07_L1GS_092082_20110214_20170221_01_T2_B5.IMD': 'optional', 'LE07_L1GS_092082_20110214_20170221_01_T2_B6_VCID_1.IMD': 'optional', 'LE07_L1GS_092082_20110214_20170221_01_T2_B6_VCID_2.IMD': 'optional', 'LE07_L1GS_092082_20110214_20170221_01_T2_B7.IMD': 'optional', 'LE07_L1GS_092082_20110214_20170221_01_T2_B8.IMD': 'optional', 'LE07_L1GS_092082_20110214_20170221_01_T2_BQA.IMD': 'optional', 'LE07_L1GS_092082_20110214_20170221_01_T2_MTL.txt': '', 'LE7_20110214_092_082_L1GS.xml': '', }, 'additional': { 'work_order.xml': '', '20130818_20000119_B5_gqa_results.yaml': '', 'lpgs_out.xml': '', }, 'ga-metadata.yaml': '', 'package.sha1': '' } }) # TODO: Check metadata fields are sensible. output_metadata_path = output_dataset.joinpath('ga-metadata.yaml') assert output_metadata_path.exists() md = yaml.load(output_metadata_path.open('r')) prepare_datasets_for_comparison(EXPECTED_METADATA, md, ancil_files, output_dataset.joinpath('product')) assert_same(md, EXPECTED_METADATA) # TODO: Asset all files are checksummed. output_checksum_path = output_dataset.joinpath('package.sha1') assert output_checksum_path.exists() checksummed_filenames = load_checksum_filenames(output_checksum_path) expected_filenames = sorted(f for f in as_file_list(output_dataset) if f != 'package.sha1') assert set(checksummed_filenames) == set(expected_filenames) assert checksummed_filenames == expected_filenames
def test_metadata(): output_path = temp_dir() run_packaging_cli([ hardlink_arg(output_path, source_dataset), 'raw', str(source_dataset), str(output_path) ]) assert_file_structure( output_path, { 'LS8_OLITIRS_STD-MD_P00_LC81140740812015123LGN00_114_074-081_' '20150503T031224Z20150503T031438': { 'product': { '270.000.2015123031324364.LGS': '', '271.000.2015123031330204.LGS': '', '271.001.2015123031352904.LGS': '', '271.002.2015123031415490.LGS': '', '271.003.2015123031438105.LGS': '', 'LC81140740812015123LGN00_IDF.xml': '', 'LC81140740812015123LGN00_MD5.txt': '', }, 'ga-metadata.yaml': '', 'package.sha1': '' } }) output_path = output_path.joinpath( 'LS8_OLITIRS_STD-MD_P00_LC81140740812015123LGN00_114_074-081_' '20150503T031224Z20150503T031438') # TODO: Check metadata fields are sensible. output_metadata_path = output_path.joinpath('ga-metadata.yaml') assert output_metadata_path.exists() md = yaml.load(output_metadata_path.open('r')) # ID is different every time: check not none, and clear it. assert md['id'] is not None md['id'] = None expected = { 'id': None, 'ga_label': 'LS8_OLITIRS_STD-MD_P00_LC81140740812015123LGN00_114_074-081_' '20150503T031224Z20150503T031438', # Default creation date is the same as the input folder ctime. 'creation_dt': datetime.datetime.utcfromtimestamp(source_dataset.stat().st_ctime), 'size_bytes': 4485, 'product_type': 'satellite_telemetry_data', 'usgs': { 'interval_id': 'LC81140740812015123LGN00' }, 'format': { 'name': 'MD' }, 'ga_level': 'P00', 'checksum_path': 'package.sha1', 'platform': { 'code': 'LANDSAT_8' }, 'instrument': { 'name': 'OLI_TIRS' }, 'acquisition': { 'los': datetime.datetime(2015, 5, 3, 3, 14, 38, 105000), 'aos': datetime.datetime(2015, 5, 3, 3, 12, 24, 364000), 'groundstation': { 'code': 'LGN', 'label': 'Landsat Ground Network', 'eods_domain_code': '032' } }, 'image': { 'satellite_ref_point_start': { 'x': 114, 'y': 74 }, 'satellite_ref_point_end': { 'x': 114, 'y': 81 }, 'bands': {}, }, 'lineage': { 'source_datasets': {}, 'machine': {} } } add_default_software_versions(expected) assert_same(md, expected) # Check all files are listed in checksum file. output_checksum_path = output_path.joinpath('package.sha1') assert output_checksum_path.exists() checksummed_filenames = load_checksum_filenames(output_checksum_path) assert checksummed_filenames == [ 'ga-metadata.yaml', 'product/270.000.2015123031324364.LGS', 'product/271.000.2015123031330204.LGS', 'product/271.001.2015123031352904.LGS', 'product/271.002.2015123031415490.LGS', 'product/271.003.2015123031438105.LGS', 'product/LC81140740812015123LGN00_IDF.xml', 'product/LC81140740812015123LGN00_MD5.txt', ]
def test_metadata(): output_path = temp_dir() run_packaging_cli([ hardlink_arg(output_path, source_dataset), 'raw', str(source_dataset), str(output_path) ]) assert_file_structure( output_path, { 'LS5_TM_STD-RCC_P00_L5TB2011240002022ASA123_0_0_20110828T002022Z20110828T002858': { 'product': { 'L5TB2011240002022ASA123I00.data': '', 'acs.log': '', 'demod.log': '', 'ephem.log': '', 'passinfo': '', 'ref.log': '', }, 'ga-metadata.yaml': '', 'package.sha1': '' } }) output_path = output_path.joinpath( 'LS5_TM_STD-RCC_P00_L5TB2011240002022ASA123_0_0_20110828T002022Z20110828T002858' ) # TODO: Check metadata fields are sensible. output_metadata_path = output_path.joinpath('ga-metadata.yaml') assert output_metadata_path.exists() md = yaml.load(output_metadata_path.open('r')) # ID is different every time: check not none, and clear it. assert md['id'] is not None md['id'] = None expected = { 'id': None, 'ga_level': 'P00', # Default creation date is the same as the input folder ctime. 'creation_dt': datetime.datetime.utcfromtimestamp(source_dataset.stat().st_ctime), 'platform': { 'code': 'LANDSAT_5' }, 'format': { 'version': 0, 'name': 'RCC' }, 'size_bytes': 226667, 'product_type': 'satellite_telemetry_data', 'usgs': { 'interval_id': 'L5TB2011240002022ASA123' }, 'instrument': { 'name': 'TM', 'operation_mode': 'BUMPER' }, 'acquisition': { 'aos': datetime.datetime(2011, 8, 28, 0, 20, 22), 'los': datetime.datetime(2011, 8, 28, 0, 28, 58), 'platform_orbit': 146212, 'groundstation': { 'eods_domain_code': '002', 'label': 'Alice Springs', 'code': 'ASA' }, }, 'ga_label': 'LS5_TM_STD-RCC_P00_L5TB2011240002022ASA123_0_0_' '20110828T002022Z20110828T002858', 'checksum_path': 'package.sha1', 'lineage': { 'machine': {}, 'source_datasets': {} }, 'image': { 'bands': {} } } add_default_software_versions(expected) assert_same(md, expected) # Check all files are listed in checksum file. output_checksum_path = output_path.joinpath('package.sha1') assert output_checksum_path.exists() checksummed_filenames = load_checksum_filenames(output_checksum_path) assert checksummed_filenames == [ 'ga-metadata.yaml', 'product/L5TB2011240002022ASA123I00.data', 'product/acs.log', 'product/demod.log', 'product/ephem.log', 'product/passinfo', 'product/ref.log', ]
def test_sentinel_wagl_package(tmp_path: Path): out = tmp_path from eodatasets3.scripts import packagewagl # No warnings should have been logged during package. # We could tighten this to specific warnings if it proves too noisy, but it's # useful for catching things like unclosed files. with expect_no_warnings(): res = CliRunner().invoke( packagewagl.run, map( str, ( WAGL_SENTINEL_OUTPUT, "--level1", S2_L1_METADATA_PATH, "--output", out, # Our weird scaled test dataset resolution "--oa-resolution", 998.1818181818181, ), ), catch_exceptions=False, ) # The last line of output ends with the dataset path. words, reported_metadata = res.output.splitlines()[-1].rsplit(" ", 1) expected_folder = out / "ga_s2am_ard_3/53/JQJ/2020/10/31" assert_file_structure( expected_folder, { "20201031T022859": { "ga_s2am_ard_3-2-1_53JQJ_2020-10-31_final.odc-metadata.yaml": "", "ga_s2am_ard_3-2-1_53JQJ_2020-10-31_final.proc-info.yaml": "", "ga_s2am_ard_3-2-1_53JQJ_2020-10-31_final.sha1": "", "ga_s2am_nbar_3-2-1_53JQJ_2020-10-31_final_band01.tif": "", "ga_s2am_nbar_3-2-1_53JQJ_2020-10-31_final_band02.tif": "", "ga_s2am_nbar_3-2-1_53JQJ_2020-10-31_final_band03.tif": "", "ga_s2am_nbar_3-2-1_53JQJ_2020-10-31_final_band04.tif": "", "ga_s2am_nbar_3-2-1_53JQJ_2020-10-31_final_band05.tif": "", "ga_s2am_nbar_3-2-1_53JQJ_2020-10-31_final_band06.tif": "", "ga_s2am_nbar_3-2-1_53JQJ_2020-10-31_final_band07.tif": "", "ga_s2am_nbar_3-2-1_53JQJ_2020-10-31_final_band08a.tif": "", "ga_s2am_nbar_3-2-1_53JQJ_2020-10-31_final_band08.tif": "", "ga_s2am_nbar_3-2-1_53JQJ_2020-10-31_final_band11.tif": "", "ga_s2am_nbar_3-2-1_53JQJ_2020-10-31_final_band12.tif": "", "ga_s2am_nbar_3-2-1_53JQJ_2020-10-31_final_thumbnail.jpg": "", "ga_s2am_nbart_3-2-1_53JQJ_2020-10-31_final_band01.tif": "", "ga_s2am_nbart_3-2-1_53JQJ_2020-10-31_final_band02.tif": "", "ga_s2am_nbart_3-2-1_53JQJ_2020-10-31_final_band03.tif": "", "ga_s2am_nbart_3-2-1_53JQJ_2020-10-31_final_band04.tif": "", "ga_s2am_nbart_3-2-1_53JQJ_2020-10-31_final_band05.tif": "", "ga_s2am_nbart_3-2-1_53JQJ_2020-10-31_final_band06.tif": "", "ga_s2am_nbart_3-2-1_53JQJ_2020-10-31_final_band07.tif": "", "ga_s2am_nbart_3-2-1_53JQJ_2020-10-31_final_band08a.tif": "", "ga_s2am_nbart_3-2-1_53JQJ_2020-10-31_final_band08.tif": "", "ga_s2am_nbart_3-2-1_53JQJ_2020-10-31_final_band11.tif": "", "ga_s2am_nbart_3-2-1_53JQJ_2020-10-31_final_band12.tif": "", "ga_s2am_nbart_3-2-1_53JQJ_2020-10-31_final_thumbnail.jpg": "", "ga_s2am_oa_3-2-1_53JQJ_2020-10-31_final_azimuthal-exiting.tif": "", "ga_s2am_oa_3-2-1_53JQJ_2020-10-31_final_azimuthal-incident.tif": "", "ga_s2am_oa_3-2-1_53JQJ_2020-10-31_final_combined-terrain-shadow.tif": "", "ga_s2am_oa_3-2-1_53JQJ_2020-10-31_final_exiting-angle.tif": "", "ga_s2am_oa_3-2-1_53JQJ_2020-10-31_final_fmask.tif": "", "ga_s2am_oa_3-2-1_53JQJ_2020-10-31_final_incident-angle.tif": "", "ga_s2am_oa_3-2-1_53JQJ_2020-10-31_final_nbar-contiguity.tif": "", "ga_s2am_oa_3-2-1_53JQJ_2020-10-31_final_nbart-contiguity.tif": "", "ga_s2am_oa_3-2-1_53JQJ_2020-10-31_final_relative-azimuth.tif": "", "ga_s2am_oa_3-2-1_53JQJ_2020-10-31_final_relative-slope.tif": "", "ga_s2am_oa_3-2-1_53JQJ_2020-10-31_final_satellite-azimuth.tif": "", "ga_s2am_oa_3-2-1_53JQJ_2020-10-31_final_satellite-view.tif": "", "ga_s2am_oa_3-2-1_53JQJ_2020-10-31_final_solar-azimuth.tif": "", "ga_s2am_oa_3-2-1_53JQJ_2020-10-31_final_solar-zenith.tif": "", "ga_s2am_oa_3-2-1_53JQJ_2020-10-31_final_time-delta.tif": "", } }, ) [output_metadata] = expected_folder.rglob("*.odc-metadata.yaml") # Checksum should include all files other than itself. [checksum_file] = expected_folder.rglob("*.sha1") all_output_files = set( p.relative_to(checksum_file.parent) for p in expected_folder.rglob("*") if p != checksum_file and not p.is_dir()) files_in_checksum = { Path(line.split("\t")[1]) for line in checksum_file.read_text().splitlines() } assert all_output_files == files_in_checksum # Verify the computed contiguity looks the same. (metadata fields will depend on it) [image] = expected_folder.rglob("*_oa_*nbar-contiguity.tif") assert_image(image, nodata=255, unique_pixel_counts={0: 5367, 1: 6733}) [image] = expected_folder.rglob("*_oa_*nbart-contiguity.tif") assert_image(image, nodata=255, unique_pixel_counts={0: 5367, 1: 6733}) assert_same_as_file( { "$schema": "https://schemas.opendatacube.org/dataset", "id": "14cfa990-7e2f-4f0c-bd5e-b4cb28c27e8d", "label": "ga_s2am_ard_3-2-1_53JQJ_2020-10-31_final", "product": { "name": "ga_s2am_ard_3", "href": "https://collections.dea.ga.gov.au/product/ga_s2am_ard_3", }, "crs": "epsg:32753", "geometry": { "type": "Polygon", "coordinates": [[ [731901.8181818182, 6790240.0], [728854.7368421053, 6790240.0], [752174.154338321, 6890002.646902946], [759379.8080509851, 6900040.0], [762411.0326110948, 6900040.0], [763218.8851094716, 6900040.0], [809760.0, 6900040.0], [809760.0, 6790240.0], [732900.0, 6790240.0], [731901.8181818182, 6790240.0], ]], }, "grids": { "default": { "shape": [110, 110], "transform": [ 998.1818181818181, 0.0, 699960.0, 0.0, -998.1818181818181, 6900040.0, 0.0, 0.0, 1.0, ], }, "a": { "shape": [55, 55], "transform": [ 1996.3636363636363, 0.0, 699960.0, 0.0, -1996.3636363636363, 6900040.0, 0.0, 0.0, 1.0, ], }, "b": { "shape": [19, 19], "transform": [ 5778.9473684210525, 0.0, 699960.0, 0.0, -5778.9473684210525, 6900040.0, 0.0, 0.0, 1.0, ], }, "c": { "shape": [19, 19], "transform": [ 5778.947368421053, 0.0, 699960.0, 0.0, -5778.947368421053, 6900040.0, 0.0, 0.0, 1.0, ], }, }, "properties": { "datetime": "2020-10-31T00:55:10.954414", "dea:dataset_maturity": "final", "eo:cloud_cover": 11.063428320692061, "eo:gsd": 998.1818181818181, "eo:instrument": "MSI", "eo:platform": "sentinel-2a", "eo:sun_azimuth": 62.9424764928076, "eo:sun_elevation": 26.8398246645449, "fmask:clear": 73.65382838133374, "fmask:cloud": 11.063428320692061, "fmask:cloud_shadow": 0.6983135097842945, "fmask:snow": 14.583962676987106, "fmask:water": 0.0004671112027989303, "gqa:abs_iterative_mean_x": 0.42, "gqa:abs_iterative_mean_xy": 0.53, "gqa:abs_iterative_mean_y": 0.32, "gqa:abs_x": 0.69, "gqa:abs_xy": 1.07, "gqa:abs_y": 0.82, "gqa:cep90": 0.97, "gqa:iterative_mean_x": 0.4, "gqa:iterative_mean_xy": 0.4, "gqa:iterative_mean_y": 0.04, "gqa:iterative_stddev_x": 0.29, "gqa:iterative_stddev_xy": 0.53, "gqa:iterative_stddev_y": 0.44, "gqa:mean_x": 0.38, "gqa:mean_xy": 0.39, "gqa:mean_y": -0.07, "gqa:stddev_x": 1.18, "gqa:stddev_xy": 2.24, "gqa:stddev_y": 1.9, "odc:dataset_version": "3.2.1", "odc:file_format": "GeoTIFF", "odc:processing_datetime": "2021-02-10T03:25:22.635668", "odc:producer": "ga.gov.au", "odc:product_family": "ard", "odc:region_code": "53JQJ", "sat:orbit_state": "descending", "sat:relative_orbit": 102, "sentinel:datastrip_id": "S2A_OPER_MSI_L1C_DS_EPAE_20201031T022859_S20201031T004711_N02.09", "sentinel:sentinel_tile_id": "S2A_OPER_MSI_L1C_TL_EPAE_20201031T022859_A027984_T53JQJ_N02.09", "sentinel:datatake_start_datetime": "2020-10-31T02:28:59", }, "measurements": { "nbar_blue": { "path": "ga_s2am_nbar_3-2-1_53JQJ_2020-10-31_final_band02.tif" }, "nbar_coastal_aerosol": { "path": "ga_s2am_nbar_3-2-1_53JQJ_2020-10-31_final_band01.tif", "grid": "b", }, "nbar_green": { "path": "ga_s2am_nbar_3-2-1_53JQJ_2020-10-31_final_band03.tif" }, "nbar_nir_1": { "path": "ga_s2am_nbar_3-2-1_53JQJ_2020-10-31_final_band08.tif" }, "nbar_nir_2": { "path": "ga_s2am_nbar_3-2-1_53JQJ_2020-10-31_final_band08a.tif", "grid": "a", }, "nbar_red": { "path": "ga_s2am_nbar_3-2-1_53JQJ_2020-10-31_final_band04.tif" }, "nbar_red_edge_1": { "path": "ga_s2am_nbar_3-2-1_53JQJ_2020-10-31_final_band05.tif", "grid": "a", }, "nbar_red_edge_2": { "path": "ga_s2am_nbar_3-2-1_53JQJ_2020-10-31_final_band06.tif", "grid": "a", }, "nbar_red_edge_3": { "path": "ga_s2am_nbar_3-2-1_53JQJ_2020-10-31_final_band07.tif", "grid": "a", }, "nbar_swir_2": { "path": "ga_s2am_nbar_3-2-1_53JQJ_2020-10-31_final_band11.tif", "grid": "a", }, "nbar_swir_3": { "path": "ga_s2am_nbar_3-2-1_53JQJ_2020-10-31_final_band12.tif", "grid": "a", }, "nbart_blue": { "path": "ga_s2am_nbart_3-2-1_53JQJ_2020-10-31_final_band02.tif" }, "nbart_coastal_aerosol": { "path": "ga_s2am_nbart_3-2-1_53JQJ_2020-10-31_final_band01.tif", "grid": "b", }, "nbart_green": { "path": "ga_s2am_nbart_3-2-1_53JQJ_2020-10-31_final_band03.tif" }, "nbart_nir_1": { "path": "ga_s2am_nbart_3-2-1_53JQJ_2020-10-31_final_band08.tif" }, "nbart_nir_2": { "path": "ga_s2am_nbart_3-2-1_53JQJ_2020-10-31_final_band08a.tif", "grid": "a", }, "nbart_red": { "path": "ga_s2am_nbart_3-2-1_53JQJ_2020-10-31_final_band04.tif" }, "nbart_red_edge_1": { "path": "ga_s2am_nbart_3-2-1_53JQJ_2020-10-31_final_band05.tif", "grid": "a", }, "nbart_red_edge_2": { "path": "ga_s2am_nbart_3-2-1_53JQJ_2020-10-31_final_band06.tif", "grid": "a", }, "nbart_red_edge_3": { "path": "ga_s2am_nbart_3-2-1_53JQJ_2020-10-31_final_band07.tif", "grid": "a", }, "nbart_swir_2": { "path": "ga_s2am_nbart_3-2-1_53JQJ_2020-10-31_final_band11.tif", "grid": "a", }, "nbart_swir_3": { "path": "ga_s2am_nbart_3-2-1_53JQJ_2020-10-31_final_band12.tif", "grid": "a", }, "oa_azimuthal_exiting": { "path": "ga_s2am_oa_3-2-1_53JQJ_2020-10-31_final_azimuthal-exiting.tif" }, "oa_azimuthal_incident": { "path": "ga_s2am_oa_3-2-1_53JQJ_2020-10-31_final_azimuthal-incident.tif" }, "oa_combined_terrain_shadow": { "path": "ga_s2am_oa_3-2-1_53JQJ_2020-10-31_final_combined-terrain-shadow.tif" }, "oa_exiting_angle": { "path": "ga_s2am_oa_3-2-1_53JQJ_2020-10-31_final_exiting-angle.tif" }, "oa_fmask": { "path": "ga_s2am_oa_3-2-1_53JQJ_2020-10-31_final_fmask.tif", "grid": "c", }, "oa_incident_angle": { "path": "ga_s2am_oa_3-2-1_53JQJ_2020-10-31_final_incident-angle.tif" }, "oa_nbar_contiguity": { "path": "ga_s2am_oa_3-2-1_53JQJ_2020-10-31_final_nbar-contiguity.tif" }, "oa_nbart_contiguity": { "path": "ga_s2am_oa_3-2-1_53JQJ_2020-10-31_final_nbart-contiguity.tif" }, "oa_relative_azimuth": { "path": "ga_s2am_oa_3-2-1_53JQJ_2020-10-31_final_relative-azimuth.tif" }, "oa_relative_slope": { "path": "ga_s2am_oa_3-2-1_53JQJ_2020-10-31_final_relative-slope.tif" }, "oa_satellite_azimuth": { "path": "ga_s2am_oa_3-2-1_53JQJ_2020-10-31_final_satellite-azimuth.tif" }, "oa_satellite_view": { "path": "ga_s2am_oa_3-2-1_53JQJ_2020-10-31_final_satellite-view.tif" }, "oa_solar_azimuth": { "path": "ga_s2am_oa_3-2-1_53JQJ_2020-10-31_final_solar-azimuth.tif" }, "oa_solar_zenith": { "path": "ga_s2am_oa_3-2-1_53JQJ_2020-10-31_final_solar-zenith.tif" }, "oa_time_delta": { "path": "ga_s2am_oa_3-2-1_53JQJ_2020-10-31_final_time-delta.tif" }, }, "accessories": { "checksum:sha1": { "path": "ga_s2am_ard_3-2-1_53JQJ_2020-10-31_final.sha1" }, "metadata:processor": { "path": "ga_s2am_ard_3-2-1_53JQJ_2020-10-31_final.proc-info.yaml" }, "thumbnail:nbar": { "path": "ga_s2am_nbar_3-2-1_53JQJ_2020-10-31_final_thumbnail.jpg" }, "thumbnail:nbart": { "path": "ga_s2am_nbart_3-2-1_53JQJ_2020-10-31_final_thumbnail.jpg" }, }, "lineage": { "level1": ["e27200c1-0a9c-5e24-bfe1-bbbb3f3bdedc"] }, }, output_metadata, ) [proc_info] = expected_folder.rglob("*.proc-info.yaml") assert_same_as_file( { "fmask": { "parameters": { "cloud_buffer_distance_metres": 0.0, "cloud_shadow_buffer_distance_metres": 0.0, "frantz_parallax_sentinel_2": False, }, "percent_class_distribution": { "clear": 73.65382838133374, "cloud": 11.063428320692061, "cloud_shadow": 0.6983135097842945, "snow": 14.583962676987106, "water": 0.0004671112027989303, }, }, "software_versions": [ { "name": "modtran", "url": "http://www.ontar.com/software/productdetails.aspx?item=modtran", "version": "6.0.1", }, { "name": "wagl", "url": "https://github.com/GeoscienceAustralia/wagl.git", "version": "5.4.1", }, { "name": "eugl", "url": "https://github.com/OpenDataCubePipelines/eugl.git", "version": "0.2.1", }, { "name": "gverify", "url": None, "version": "v0.25c" }, { "name": "fmask", "url": "https://bitbucket.org/chchrsc/python-fmask", "version": "0.5.4", }, { "name": "tesp", "url": "https://github.com/OpenDataCubePipelines/tesp.git", "version": "0.6.2", }, { "name": "eodatasets3", "url": "https://github.com/GeoscienceAustralia/eo-datasets", "version": eodatasets3.__version__, }, ], }, proc_info, ignore_fields=("gqa", "wagl"), ) # All produced tifs should be valid COGs for image in expected_folder.rglob("*.tif"): assert cogeo.cog_validate(image), f"Failed COG validation: {image}"