Example #1
0
def test_move_when_corrupt_exists_at_dest(global_integration_cli_args,
                                          test_dataset: DatasetForTests,
                                          other_dataset: DatasetForTests,
                                          destination_path):
    """
    Move a dataset to a location that already exists but is invalid.

    It should see that the destination is corrupt and skip the move.
    """
    test_dataset.add_to_index()
    other_dataset.add_to_index()

    expected_new_path: Path = destination_path.joinpath(
        *test_dataset.path_offset)

    # Create a corrupt dataset at destination
    expected_new_path.parent.mkdir(parents=True)
    expected_new_path.write_text("invalid")

    original_index = freeze_index(test_dataset.collection.index_)

    # Move one path to destination_path
    res = _call_move(['--destination', destination_path, test_dataset.path],
                     global_integration_cli_args)

    # Move script should have completed, but dataset should have been skipped.
    assert res.exit_code == 0, res.output
    print(res.output)

    now_index = freeze_index(test_dataset.collection.index_)

    assert original_index == now_index

    assert test_dataset.path.exists()
def test_remove_missing(test_dataset: DatasetForTests,
                        integration_test_data: Path,
                        other_dataset: DatasetForTests):
    """An on-disk dataset that's not indexed should be trashed when trash_missing=True"""
    register_base_directory(integration_test_data)
    trashed_path = test_dataset.base_path.joinpath(*_TRASH_PREFIX, *test_dataset.path_offset)

    # Add a second dataset that's indexed. Should not be touched!
    other_dataset.add_to_index()

    assert other_dataset.path.exists()

    assert test_dataset.path.exists(), "On-disk location should exist before test begins."
    assert not trashed_path.exists(), "Trashed file shouldn't exit."
    _check_sync(
        collection=test_dataset.collection,
        expected_paths=[
            test_dataset.uri,
            other_dataset.path.as_uri(),
        ],
        expected_mismatches=[
            mm.DatasetNotIndexed(test_dataset.dataset, test_dataset.uri)
        ],
        # Unmodified index
        expected_index_result=freeze_index(test_dataset.collection.index_),
        cache_path=integration_test_data,
        fix_settings=dict(trash_missing=True, update_locations=True)
    )
    assert not test_dataset.path.exists(), "On-disk location should exist before test begins."
    assert trashed_path.exists(), "Trashed file shouldn't exit."
    assert other_dataset.path.exists(), "Dataset outside of collection folder shouldn't be touched"
def _check_mismatch_fix(index: Index,
                        mismatches: Iterable[Mismatch],
                        expected_index_result: Mapping[DatasetLite, Iterable[str]],
                        fix_settings: dict):
    """Check that the index is correctly updated when fixing mismatches"""

    # First check that no change is made to the index if we have all fixes set to False.
    starting_index = freeze_index(index)
    # Default settings are all false.
    fixes.fix_mismatches(mismatches, index)
    assert starting_index == freeze_index(index), "Changes made to index despite all fix settings being " \
                                                  "false (index_missing=False etc)"

    # Now perform fixes, check that they match expected.
    fixes.fix_mismatches(mismatches, index, **fix_settings)
    assert expected_index_result == freeze_index(index)
def test_detect_corrupt_existing(test_dataset: DatasetForTests,
                                 integration_test_data: Path):
    # type: (Tuple[Collection, str, str, Path]) -> None
    """If a dataset exists but cannot be read, report as corrupt"""
    path = uri_to_local_path(test_dataset.uri)

    test_dataset.add_to_index()
    assert path.exists()

    # Overwrite with corrupted file.
    os.unlink(str(path))
    with path.open('w') as f:
        f.write('corruption!')
    assert path.exists()

    # Another dataset exists in the same location

    _check_sync(
        collection=test_dataset.collection,
        expected_paths=[test_dataset.uri],
        expected_mismatches=[
            # We don't know if it's the same dataset
            mm.UnreadableDataset(None, test_dataset.uri)
        ],
        # Unmodified index
        expected_index_result=freeze_index(test_dataset.collection.index_),
        cache_path=integration_test_data,
        fix_settings=dict(trash_missing=True, trash_archived=True, update_locations=True)
    )
    # If a dataset is in the index pointing to the corrupt location, it shouldn't be trashed with trash_archived=True
    assert path.exists(), "Corrupt dataset with sibling in index should not be trashed"