예제 #1
0
def test_walk_onerror(tmp_dir, dvc):
    def onerror(exc):
        raise exc

    tmp_dir.dvc_gen("foo", "foo")
    fs = DvcFileSystem(dvc)

    # path does not exist
    for _ in fs.walk("dir"):
        pass
    with pytest.raises(OSError):
        for _ in fs.walk("dir", onerror=onerror):
            pass

    # path is not a directory
    for _ in fs.walk("foo"):
        pass
    with pytest.raises(OSError):
        for _ in fs.walk("foo", onerror=onerror):
            pass
예제 #2
0
파일: __init__.py 프로젝트: isidentical/dvc
    def dvcfs(self):
        from dvc.fs.dvc import DvcFileSystem

        return DvcFileSystem(repo=self)
예제 #3
0
def test_get_hash_file(tmp_dir, dvc):
    tmp_dir.dvc_gen({"foo": "foo"})
    fs = DvcFileSystem(repo=dvc)
    assert (fs.info(
        (tmp_dir /
         "foo").fs_path)["md5"] == "acbd18db4cc2f85cedef654fccc4a4d8")
예제 #4
0
def test_isdvc(tmp_dir, dvc):
    tmp_dir.gen({"foo": "foo", "bar": "bar"})
    dvc.add("foo")
    fs = DvcFileSystem(repo=dvc)
    assert fs.isdvc("foo")
    assert not fs.isdvc("bar")
예제 #5
0
파일: test_dvc.py 프로젝트: jhhuh/dvc
def test_get_key(tmp_dir, dvc, path, key):
    fs = DvcFileSystem(repo=dvc)
    assert fs.fs._get_key(path) == key
예제 #6
0
파일: test_dvc.py 프로젝트: jhhuh/dvc
def test_walk_not_a_dir(tmp_dir, dvc):
    tmp_dir.dvc_gen("foo", "foo")
    fs = DvcFileSystem(repo=dvc)

    for _ in fs.walk("foo"):
        pass
예제 #7
0
파일: test_dvc.py 프로젝트: jhhuh/dvc
def test_walk_missing(tmp_dir, dvc):
    fs = DvcFileSystem(repo=dvc)

    for _ in fs.walk("dir"):
        pass
예제 #8
0
def test_get_hash_file(tmp_dir, dvc):
    tmp_dir.dvc_gen({"foo": "foo"})
    fs = DvcFileSystem(dvc)
    assert (fs.info(PathInfo(tmp_dir) /
                    "foo")["md5"] == "acbd18db4cc2f85cedef654fccc4a4d8")
예제 #9
0
파일: test_dvc.py 프로젝트: jear/dvc
def test_isdir_isfile(tmp_dir, dvc):
    tmp_dir.gen(
        {
            "datafile": "data",
            "datadir": {
                "foo": "foo",
                "bar": "bar",
            },
            "subdir": {
                "baz": "baz",
                "data": {
                    "abc": "abc",
                    "xyz": "xyz",
                },
            },
        },
    )

    fs = DvcFileSystem(repo=dvc)
    assert fs.isdir("datadir")
    assert not fs.isfile("datadir")
    assert not fs.isdvc("datadir")
    assert not fs.isdir("datafile")
    assert fs.isfile("datafile")
    assert not fs.isdvc("datafile")

    dvc.add(
        [
            "datadir",
            "datafile",
            os.path.join("subdir", "baz"),
            os.path.join("subdir", "data"),
        ]
    )
    shutil.rmtree(tmp_dir / "datadir")
    shutil.rmtree(tmp_dir / "subdir" / "data")
    (tmp_dir / "datafile").unlink()
    (tmp_dir / "subdir" / "baz").unlink()

    assert fs.isdir("datadir")
    assert not fs.isfile("datadir")
    assert fs.isdvc("datadir")
    assert not fs.isdir("datafile")
    assert fs.isfile("datafile")
    assert fs.isdvc("datafile")

    assert fs.isdir("subdir")
    assert not fs.isfile("subdir")
    assert not fs.isdvc("subdir")
    assert fs.isfile("subdir/baz")
    assert fs.isdir("subdir/data")
예제 #10
0
파일: test_dvc.py 프로젝트: jear/dvc
def test_dvcfs_no_subrepos(tmp_dir, dvc, scm):
    tmp_dir.scm_gen(
        {"dir": {"repo.txt": "file to confuse DvcFileSystem"}},
        commit="dir/repo.txt",
    )
    tmp_dir.dvc_gen({"lorem": "lorem"}, commit="add foo")

    subrepo = tmp_dir / "dir" / "repo"
    make_subrepo(subrepo, scm)
    with subrepo.chdir():
        subrepo.dvc_gen({"foo": "foo", "dir1": {"bar": "bar"}}, commit="FOO")
        subrepo.scm_gen({"ipsum": "ipsum"}, commit="BAR")

    # using fs that does not have dvcignore
    dvc._reset()
    fs = DvcFileSystem(repo=dvc)
    expected = [
        "/.dvcignore",
        "/.gitignore",
        "/lorem",
        "/lorem.dvc",
        "/dir",
        "/dir/repo.txt",
    ]

    actual = []
    for root, dirs, files in fs.walk("/", dvcfiles=True):
        for entry in dirs + files:
            actual.append(posixpath.join(root, entry))

    assert set(actual) == set(expected)
    assert len(actual) == len(expected)

    assert fs.isfile("lorem") is True
    assert fs.isfile("dir/repo/foo") is False
    assert fs.isdir("dir/repo") is False
    assert fs.isdir("dir") is True

    assert fs.isdvc("lorem") is True
    assert fs.isdvc("dir/repo/dir1") is False

    assert fs.exists("dir/repo.txt") is True
    assert fs.exists("repo/ipsum") is False
예제 #11
0
파일: test_dvc.py 프로젝트: jear/dvc
def test_subrepos(tmp_dir, scm, dvc, mocker):
    tmp_dir.scm_gen(
        {"dir": {"repo.txt": "file to confuse DvcFileSystem"}},
        commit="dir/repo.txt",
    )

    subrepo1 = tmp_dir / "dir" / "repo"
    subrepo2 = tmp_dir / "dir" / "repo2"

    for repo in [subrepo1, subrepo2]:
        make_subrepo(repo, scm)

    with subrepo1.chdir():
        subrepo1.dvc_gen({"foo": "foo", "dir1": {"bar": "bar"}}, commit="FOO")
    with subrepo2.chdir():
        subrepo2.dvc_gen(
            {"lorem": "lorem", "dir2": {"ipsum": "ipsum"}}, commit="BAR"
        )

    dvc._reset()
    fs = DvcFileSystem(repo=dvc, subrepos=True)

    def assert_fs_belongs_to_repo(ret_val):
        method = fs.fs._get_repo

        def f(*args, **kwargs):
            r = method(*args, **kwargs)
            assert r.root_dir == ret_val.root_dir
            return r

        return f

    with mock.patch.object(
        fs.fs, "_get_repo", side_effect=assert_fs_belongs_to_repo(subrepo1.dvc)
    ):
        assert fs.exists("dir/repo/foo") is True
        assert fs.exists("dir/repo/bar") is False

        assert fs.isfile("dir/repo/foo") is True
        assert fs.isfile("dir/repo/dir1/bar") is True
        assert fs.isfile("dir/repo/dir1") is False

        assert fs.isdir("dir/repo/dir1") is True
        assert fs.isdir("dir/repo/dir1/bar") is False
        assert fs.isdvc("dir/repo/foo") is True

    with mock.patch.object(
        fs.fs, "_get_repo", side_effect=assert_fs_belongs_to_repo(subrepo2.dvc)
    ):
        assert fs.exists("dir/repo2/lorem") is True
        assert fs.exists("dir/repo2/ipsum") is False

        assert fs.isfile("dir/repo2/lorem") is True
        assert fs.isfile("dir/repo2/dir2/ipsum") is True
        assert fs.isfile("dir/repo2/dir2") is False

        assert fs.isdir("dir/repo2/dir2") is True
        assert fs.isdir("dir/repo2/dir2/ipsum") is False
        assert fs.isdvc("dir/repo2/lorem") is True
예제 #12
0
파일: test_dvc.py 프로젝트: jear/dvc
def test_exists_isdir_isfile_dirty(tmp_dir, dvc):
    tmp_dir.dvc_gen(
        {"datafile": "data", "datadir": {"foo": "foo", "bar": "bar"}}
    )

    fs = DvcFileSystem(repo=dvc)
    shutil.rmtree(tmp_dir / "datadir")
    (tmp_dir / "datafile").unlink()

    assert fs.exists("datafile")
    assert fs.exists("datadir")
    assert fs.exists("datadir/foo")
    assert fs.isfile("datafile")
    assert not fs.isfile("datadir")
    assert fs.isfile("datadir/foo")
    assert not fs.isdir("datafile")
    assert fs.isdir("datadir")
    assert not fs.isdir("datadir/foo")

    # NOTE: creating file instead of dir and dir instead of file
    tmp_dir.gen({"datadir": "data", "datafile": {"foo": "foo", "bar": "bar"}})
    assert fs.exists("datafile")
    assert fs.exists("datadir")
    assert not fs.exists("datadir/foo")
    assert fs.exists("datafile/foo")
    assert not fs.isfile("datafile")
    assert fs.isfile("datadir")
    assert not fs.isfile("datadir/foo")
    assert fs.isfile("datafile/foo")
    assert fs.isdir("datafile")
    assert not fs.isdir("datadir")
    assert not fs.isdir("datadir/foo")
    assert not fs.isdir("datafile/foo")
예제 #13
0
파일: diff.py 프로젝트: jear/dvc
def diff(self, a_rev="HEAD", b_rev=None, targets=None):
    """
    By default, it compares the workspace with the last commit's fs.

    This implementation differs from `git diff` since DVC doesn't have
    the concept of `index`, but it keeps the same interface, thus,
    `dvc diff` would be the same as `dvc diff HEAD`.
    """

    if self.scm.no_commits:
        return {}

    from dvc.fs.dvc import DvcFileSystem

    dvcfs = DvcFileSystem(repo=self)

    b_rev = b_rev if b_rev else "workspace"
    results = {}
    missing_targets = {}
    for rev in self.brancher(revs=[a_rev, b_rev]):
        if rev == "workspace" and rev != b_rev:
            # brancher always returns workspace, but we only need to compute
            # workspace paths/checksums if b_rev was None
            continue

        targets_paths = None
        if targets is not None:
            # convert targets to paths, and capture any missing targets
            targets_paths, missing_targets[rev] = _targets_to_paths(
                dvcfs, targets)

        results[rev] = _paths_checksums(self, targets_paths)

    if targets is not None:
        # check for overlapping missing targets between a_rev and b_rev
        for target in set(missing_targets[a_rev]) & set(
                missing_targets[b_rev]):
            raise PathMissingError(target, self)

    old = results[a_rev]
    new = results[b_rev]

    # Compare paths between the old and new fs.
    # set() efficiently converts dict keys to a set
    added = sorted(set(new) - set(old))
    deleted_or_missing = set(old) - set(new)
    if b_rev == "workspace":
        # missing status is only applicable when diffing local workspace
        # against a commit
        missing = sorted(_filter_missing(dvcfs, deleted_or_missing))
    else:
        missing = []
    deleted = sorted(deleted_or_missing - set(missing))
    modified = sorted(set(old) & set(new))

    # Cases when file was changed and renamed are resulted
    # in having deleted and added record
    # To cover such cases we need to change hashing function
    # to produce rolling/chunking hash

    renamed = _calculate_renamed(new, old, added, deleted)

    for renamed_item in renamed:
        added.remove(renamed_item["path"]["new"])
        deleted.remove(renamed_item["path"]["old"])

    ret = {
        "added": [{
            "path": path,
            "hash": new[path]
        } for path in added],
        "deleted": [{
            "path": path,
            "hash": old[path]
        } for path in deleted],
        "modified": [{
            "path": path,
            "hash": {
                "old": old[path],
                "new": new[path]
            }
        } for path in modified if old[path] != new[path]],
        "renamed":
        renamed,
        "not in cache": [{
            "path": path,
            "hash": old[path]
        } for path in missing],
    }

    return ret if any(ret.values()) else {}
예제 #14
0
파일: __init__.py 프로젝트: pmrowla/dvc
    def dvcfs(self):
        from dvc.fs.dvc import DvcFileSystem

        return DvcFileSystem(repo=self,
                             subrepos=self.subrepos,
                             **self._fs_conf)