def test_remove_missing(test_dataset: DatasetForTests, integration_test_data: Path, other_dataset: DatasetForTests): """An on-disk dataset that's not indexed should be trashed when trash_missing=True""" register_base_directory(integration_test_data) trashed_path = test_dataset.base_path.joinpath(*_TRASH_PREFIX, *test_dataset.path_offset) # Add a second dataset that's indexed. Should not be touched! other_dataset.add_to_index() assert other_dataset.path.exists() assert test_dataset.path.exists(), "On-disk location should exist before test begins." assert not trashed_path.exists(), "Trashed file shouldn't exit." _check_sync( collection=test_dataset.collection, expected_paths=[ test_dataset.uri, other_dataset.path.as_uri(), ], expected_mismatches=[ mm.DatasetNotIndexed(test_dataset.dataset, test_dataset.uri) ], # Unmodified index expected_index_result=freeze_index(test_dataset.collection.index_), cache_path=integration_test_data, fix_settings=dict(trash_missing=True, update_locations=True) ) assert not test_dataset.path.exists(), "On-disk location should exist before test begins." assert trashed_path.exists(), "Trashed file shouldn't exit." assert other_dataset.path.exists(), "Dataset outside of collection folder shouldn't be touched"
def destination_path(tmpdir) -> Path: """A directory that datasets can be moved to or from. Provides a temp directory that is registered with the `digitalearthau.paths` module as a base directory. """ destination = Path(tmpdir) / 'destination_collection' destination.mkdir(exist_ok=False) paths.register_base_directory(destination) return destination
def test_is_trashed(test_dataset: DatasetForTests, integration_test_data: Path, archived_dt, expect_to_be_trashed): root = integration_test_data # Same test, but trash_archived=True, so it should be renamed to the. register_base_directory(root) test_dataset.add_to_index() test_dataset.archive_in_index(archived_dt=archived_dt) archived_on_disk = DatasetLite(test_dataset.dataset.id, archived_time=archived_dt) trashed_path = test_dataset.base_path.joinpath(*_TRASH_PREFIX, *test_dataset.path_offset) # Before the test, file is in place and nothing trashed. assert test_dataset.path.exists( ), "On-disk location should exist before test begins." assert not trashed_path.exists(), "Trashed file shouldn't exit." _check_sync( collection=test_dataset.collection, expected_paths=[test_dataset.uri], expected_mismatches=[ mm.ArchivedDatasetOnDisk(archived_on_disk, test_dataset.uri), ], expected_index_result={ # Archived: shouldn't be active in index. # on_disk: (on_disk_uri,), # Prov parent should still exist as it wasn't archived. test_dataset.parent: (), }, cache_path=root, fix_settings=dict(index_missing=True, update_locations=True, trash_archived=True)) # Show output structure for debugging # print("Output structure") # for p in paths.list_file_paths(root): # print(f"\t{p}") if expect_to_be_trashed: assert trashed_path.exists(), "File isn't in trash." assert not test_dataset.path.exists( ), "On-disk location still exists (should have been moved to trash)." else: assert not trashed_path.exists(), "File shouldn't have been trashed." assert test_dataset.path.exists( ), "On-disk location should still be in place."
def test_dataset(integration_test_data, dea_index) -> DatasetForTests: """A dataset on disk, with corresponding collection""" test_data = integration_test_data # Tests assume one dataset for the collection, so delete the second. shutil.rmtree( str( test_data.joinpath( 'LS8_OLITIRS_OTH_P51_GALPGS01-032_114_080_20150924'))) ls8_collection = Collection( name='ls8_scene_test', query={}, file_patterns=[str(test_data.joinpath('LS8*/ga-metadata.yaml'))], unique=[], index_=dea_index) collections._add(ls8_collection) # Add a decoy collection. ls5_nc_collection = Collection( name='ls5_nc_test', query={}, file_patterns=[str(test_data.joinpath('LS5*.nc'))], unique=[], index_=dea_index) collections._add(ls5_nc_collection) # register this as a base directory so that datasets can be trashed within it. register_base_directory(str(test_data)) cache_path = test_data.joinpath('cache') cache_path.mkdir() return DatasetForTests( collection=ls8_collection, id_=uuid.UUID('86150afc-b7d5-4938-a75e-3445007256d3'), base_path=test_data, path_offset=('LS8_OLITIRS_OTH_P51_GALPGS01-032_114_080_20160926', 'ga-metadata.yaml'), parent_id=uuid.UUID('dee471ed-5aa5-46f5-96b5-1e1ea91ffee4'))