Esempio n. 1
0
def test_staging_file(tmp_dir, dvc):
    from dvc.data import check
    from dvc.data.stage import stage
    from dvc.data.transfer import transfer

    tmp_dir.gen("foo", "foo")
    fs = LocalFileSystem()

    local_odb = dvc.odb.local
    staging_odb, _, obj = stage(local_odb, (tmp_dir / "foo").fs_path, fs,
                                "md5")

    assert not local_odb.exists(obj.hash_info)
    assert staging_odb.exists(obj.hash_info)

    with pytest.raises(FileNotFoundError):
        check(local_odb, obj)
    check(staging_odb, obj)

    transfer(staging_odb, local_odb, {obj.hash_info}, hardlink=True)
    check(local_odb, obj)
    check(staging_odb, obj)

    path = local_odb.hash_to_path(obj.hash_info.value)
    assert fs.exists(path)
Esempio n. 2
0
def test_get_hash_dirty_file(tmp_dir, dvc):
    from dvc.data import check
    from dvc.data.stage import get_file_hash
    from dvc.objects.errors import ObjectFormatError

    tmp_dir.dvc_gen("file", "file")
    file_hash_info = HashInfo("md5", "8c7dd922ad47494fc02c388e12c00eac")

    (tmp_dir / "file").write_text("something")
    something_hash_info = HashInfo("md5", "437b930db84b8079c2dd804a71936b5f")

    clean_staging()

    # file is modified in workspace
    # get_file_hash(file) should return workspace hash, not DVC cached hash
    fs = RepoFileSystem(repo=dvc)
    assert fs.info((tmp_dir / "file").fs_path).get("md5") is None
    staging, _, obj = stage(dvc.odb.local, (tmp_dir / "file").fs_path, fs,
                            "md5")
    assert obj.hash_info == something_hash_info
    check(staging, obj)

    # file is removed in workspace
    # any staged object referring to modified workspace obj is now invalid
    (tmp_dir / "file").unlink()
    with pytest.raises(ObjectFormatError):
        check(staging, obj)

    # get_file_hash(file) should return DVC cached hash
    assert fs.info((tmp_dir / "file").fs_path)["md5"] == file_hash_info.value
    _, hash_info = get_file_hash((tmp_dir / "file").fs_path,
                                 fs,
                                 "md5",
                                 state=dvc.state)
    assert hash_info == file_hash_info

    # tmp_dir/file can be staged even though it is missing in workspace since
    # repofs will use the DVC cached hash (and refer to the local cache object)
    _, _, obj = stage(dvc.odb.local, (tmp_dir / "file").fs_path, fs, "md5")
    assert obj.hash_info == file_hash_info