def test_remove_missing(test_dataset: DatasetForTests,
                        integration_test_data: Path,
                        other_dataset: DatasetForTests):
    """An on-disk dataset that's not indexed should be trashed when trash_missing=True"""
    register_base_directory(integration_test_data)
    trashed_path = test_dataset.base_path.joinpath(*_TRASH_PREFIX, *test_dataset.path_offset)

    # Add a second dataset that's indexed. Should not be touched!
    other_dataset.add_to_index()

    assert other_dataset.path.exists()

    assert test_dataset.path.exists(), "On-disk location should exist before test begins."
    assert not trashed_path.exists(), "Trashed file shouldn't exit."
    _check_sync(
        collection=test_dataset.collection,
        expected_paths=[
            test_dataset.uri,
            other_dataset.path.as_uri(),
        ],
        expected_mismatches=[
            mm.DatasetNotIndexed(test_dataset.dataset, test_dataset.uri)
        ],
        # Unmodified index
        expected_index_result=freeze_index(test_dataset.collection.index_),
        cache_path=integration_test_data,
        fix_settings=dict(trash_missing=True, update_locations=True)
    )
    assert not test_dataset.path.exists(), "On-disk location should exist before test begins."
    assert trashed_path.exists(), "Trashed file shouldn't exit."
    assert other_dataset.path.exists(), "Dataset outside of collection folder shouldn't be touched"
Example #2
0
def destination_path(tmpdir) -> Path:
    """A directory that datasets can be moved to or from.

    Provides a temp directory that is registered with the `digitalearthau.paths` module as a base directory.
    """
    destination = Path(tmpdir) / 'destination_collection'
    destination.mkdir(exist_ok=False)
    paths.register_base_directory(destination)
    return destination
Example #3
0
def test_is_trashed(test_dataset: DatasetForTests, integration_test_data: Path,
                    archived_dt, expect_to_be_trashed):
    root = integration_test_data

    # Same test, but trash_archived=True, so it should be renamed to the.
    register_base_directory(root)
    test_dataset.add_to_index()
    test_dataset.archive_in_index(archived_dt=archived_dt)

    archived_on_disk = DatasetLite(test_dataset.dataset.id,
                                   archived_time=archived_dt)

    trashed_path = test_dataset.base_path.joinpath(*_TRASH_PREFIX,
                                                   *test_dataset.path_offset)

    # Before the test, file is in place and nothing trashed.
    assert test_dataset.path.exists(
    ), "On-disk location should exist before test begins."
    assert not trashed_path.exists(), "Trashed file shouldn't exit."
    _check_sync(
        collection=test_dataset.collection,
        expected_paths=[test_dataset.uri],
        expected_mismatches=[
            mm.ArchivedDatasetOnDisk(archived_on_disk, test_dataset.uri),
        ],
        expected_index_result={
            # Archived: shouldn't be active in index.
            # on_disk: (on_disk_uri,),
            # Prov parent should still exist as it wasn't archived.
            test_dataset.parent: (),
        },
        cache_path=root,
        fix_settings=dict(index_missing=True,
                          update_locations=True,
                          trash_archived=True))

    # Show output structure for debugging
    # print("Output structure")
    # for p in paths.list_file_paths(root):
    #    print(f"\t{p}")

    if expect_to_be_trashed:
        assert trashed_path.exists(), "File isn't in trash."
        assert not test_dataset.path.exists(
        ), "On-disk location still exists (should have been moved to trash)."
    else:
        assert not trashed_path.exists(), "File shouldn't have been trashed."
        assert test_dataset.path.exists(
        ), "On-disk location should still be in place."
Example #4
0
def test_dataset(integration_test_data, dea_index) -> DatasetForTests:
    """A dataset on disk, with corresponding collection"""
    test_data = integration_test_data

    # Tests assume one dataset for the collection, so delete the second.
    shutil.rmtree(
        str(
            test_data.joinpath(
                'LS8_OLITIRS_OTH_P51_GALPGS01-032_114_080_20150924')))
    ls8_collection = Collection(
        name='ls8_scene_test',
        query={},
        file_patterns=[str(test_data.joinpath('LS8*/ga-metadata.yaml'))],
        unique=[],
        index_=dea_index)
    collections._add(ls8_collection)

    # Add a decoy collection.
    ls5_nc_collection = Collection(
        name='ls5_nc_test',
        query={},
        file_patterns=[str(test_data.joinpath('LS5*.nc'))],
        unique=[],
        index_=dea_index)
    collections._add(ls5_nc_collection)

    # register this as a base directory so that datasets can be trashed within it.
    register_base_directory(str(test_data))

    cache_path = test_data.joinpath('cache')
    cache_path.mkdir()

    return DatasetForTests(
        collection=ls8_collection,
        id_=uuid.UUID('86150afc-b7d5-4938-a75e-3445007256d3'),
        base_path=test_data,
        path_offset=('LS8_OLITIRS_OTH_P51_GALPGS01-032_114_080_20160926',
                     'ga-metadata.yaml'),
        parent_id=uuid.UUID('dee471ed-5aa5-46f5-96b5-1e1ea91ffee4'))