Example #1
0
def test_walk(tmp_dir, dvc, dvcfiles, extra_expected):
    tmp_dir.gen({
        "dir": {
            "subdir1": {
                "foo1": "foo1",
                "bar1": "bar1"
            },
            "subdir2": {
                "foo2": "foo2"
            },
        }
    })
    dvc.add(str(tmp_dir / "dir"), recursive=True)
    tmp_dir.gen({"dir": {"foo": "foo", "bar": "bar"}})
    tree = RepoTree(dvc)

    expected = [
        PathInfo("dir") / "subdir1",
        PathInfo("dir") / "subdir2",
        PathInfo("dir") / "subdir1" / "foo1",
        PathInfo("dir") / "subdir1" / "bar1",
        PathInfo("dir") / "subdir2" / "foo2",
        PathInfo("dir") / "foo",
        PathInfo("dir") / "bar",
    ]

    actual = []
    for root, dirs, files in tree.walk("dir", dvcfiles=dvcfiles):
        for entry in dirs + files:
            actual.append(os.path.join(root, entry))

    expected = [str(path) for path in expected + extra_expected]
    assert set(actual) == set(expected)
    assert len(actual) == len(expected)
Example #2
0
def test_walk_mixed_dir(tmp_dir, scm, dvc):
    tmp_dir.gen({"dir": {"foo": "foo", "bar": "bar"}})
    tmp_dir.dvc.add(os.path.join("dir", "foo"))
    tmp_dir.scm.add(
        [
            os.path.join("dir", "bar"),
            os.path.join("dir", ".gitignore"),
            os.path.join("dir", "foo.dvc"),
        ]
    )
    tmp_dir.scm.commit("add dir")

    tree = RepoTree(dvc)

    expected = [
        str(PathInfo("dir") / "foo"),
        str(PathInfo("dir") / "bar"),
        str(PathInfo("dir") / ".gitignore"),
    ]
    actual = []
    for root, dirs, files in tree.walk("dir"):
        for entry in dirs + files:
            actual.append(os.path.join(root, entry))

    assert set(actual) == set(expected)
    assert len(actual) == len(expected)
Example #3
0
def test_walk_dirty(tmp_dir, dvc):
    tmp_dir.dvc_gen(
        {
            "dir": {
                "foo": "foo",
                "subdir1": {"foo1": "foo1", "bar1": "bar1"},
                "subdir2": {"foo2": "foo2"},
            }
        }
    )
    tmp_dir.gen({"dir": {"bar": "bar", "subdir3": {"foo3": "foo3"}}})
    (tmp_dir / "dir" / "foo").unlink()

    tree = RepoTree(dvc)
    expected = [
        PathInfo("dir") / "subdir1",
        PathInfo("dir") / "subdir2",
        PathInfo("dir") / "subdir3",
        PathInfo("dir") / "subdir1" / "foo1",
        PathInfo("dir") / "subdir1" / "bar1",
        PathInfo("dir") / "subdir2" / "foo2",
        PathInfo("dir") / "subdir3" / "foo3",
        PathInfo("dir") / "bar",
    ]

    actual = []
    for root, dirs, files in tree.walk("dir"):
        for entry in dirs + files:
            actual.append(os.path.join(root, entry))

    expected = [str(path) for path in expected]
    assert set(actual) == set(expected)
    assert len(actual) == len(expected)
Example #4
0
def test_walk_onerror(tmp_dir, dvc):
    def onerror(exc):
        raise exc

    tmp_dir.dvc_gen("foo", "foo")
    tree = RepoTree(dvc)

    # path does not exist
    for _ in tree.walk("dir"):
        pass
    with pytest.raises(OSError):
        for _ in tree.walk("dir", onerror=onerror):
            pass

    # path is not a directory
    for _ in tree.walk("foo"):
        pass
    with pytest.raises(OSError):
        for _ in tree.walk("foo", onerror=onerror):
            pass
Example #5
0
def test_subrepo_walk(tmp_dir, scm, dvc, dvcfiles, extra_expected):
    tmp_dir.scm_gen(
        {"dir": {
            "repo.txt": "file to confuse RepoTree"
        }},
        commit="dir/repo.txt",
    )

    subrepo1 = tmp_dir / "dir" / "repo"
    subrepo2 = tmp_dir / "dir" / "repo2"

    subdirs = [subrepo1, subrepo2]
    for dir_ in subdirs:
        make_subrepo(dir_, scm)

    subrepo1.dvc_gen({"foo": "foo", "dir1": {"bar": "bar"}}, commit="FOO")
    subrepo2.dvc_gen({
        "lorem": "lorem",
        "dir2": {
            "ipsum": "ipsum"
        }
    },
                     commit="BAR")

    # using tree that does not have dvcignore
    dvc.tree._reset()
    tree = RepoTree(dvc, subrepos=True, fetch=True)
    expected = [
        PathInfo("dir") / "repo",
        PathInfo("dir") / "repo.txt",
        PathInfo("dir") / "repo2",
        PathInfo("dir") / "repo" / ".gitignore",
        PathInfo("dir") / "repo" / "foo",
        PathInfo("dir") / "repo" / "dir1",
        PathInfo("dir") / "repo" / "dir1" / "bar",
        PathInfo("dir") / "repo2" / ".gitignore",
        PathInfo("dir") / "repo2" / "lorem",
        PathInfo("dir") / "repo2" / "dir2",
        PathInfo("dir") / "repo2" / "dir2" / "ipsum",
    ]

    actual = []
    for root, dirs, files in tree.walk(os.path.join(tree.root_dir, "dir"),
                                       dvcfiles=dvcfiles):
        for entry in dirs + files:
            actual.append(os.path.join(root, entry))

    expected = [
        os.path.join(tree.root_dir, path) for path in expected + extra_expected
    ]
    assert set(actual) == set(expected)
    assert len(actual) == len(expected)
Example #6
0
def test_walk_nested_subrepos(tmp_dir, dvc, scm, traverse_subrepos):
    # generate a dvc and fs structure, with suffix based on repo's basename
    def fs_structure(suffix):
        return {
            f"foo-{suffix}": f"foo-{suffix}",
            f"dir-{suffix}": {
                f"bar-{suffix}": f"bar-{suffix}"
            },
        }

    def dvc_structure(suffix):
        return {
            f"lorem-{suffix}": f"lorem-{suffix}",
            f"dvc-{suffix}": {
                f"ipsum-{suffix}": f"ipsum-{suffix}"
            },
        }

    paths = ["subrepo1", "subrepo2", "subrepo1/subrepo3"]
    subrepos = [tmp_dir / path for path in paths]
    for repo_dir in subrepos:
        make_subrepo(repo_dir, scm)

    extras = {".dvcignore", ".gitignore"}  # these files are always there
    expected = {}
    for repo_dir in subrepos + [tmp_dir]:
        base = os.path.basename(repo_dir)
        scm_files = fs_structure(base)
        dvc_files = dvc_structure(base)
        with repo_dir.chdir():
            repo_dir.scm_gen(scm_files, commit=f"git add in {repo_dir}")
            repo_dir.dvc_gen(dvc_files, commit=f"dvc add in {repo_dir}")

        if traverse_subrepos or repo_dir == tmp_dir:
            expected[str(repo_dir)] = set(scm_files.keys() | dvc_files.keys()
                                          | extras)
            # files inside a dvc directory
            expected[str(repo_dir / f"dvc-{base}")] = {f"ipsum-{base}"}
            # files inside a git directory
            expected[str(repo_dir / f"dir-{base}")] = {f"bar-{base}"}

    if traverse_subrepos:
        # update subrepos
        expected[str(tmp_dir)].update(["subrepo1", "subrepo2"])
        expected[str(tmp_dir / "subrepo1")].add("subrepo3")

    actual = {}
    tree = RepoTree(dvc, subrepos=traverse_subrepos)
    for root, dirs, files in tree.walk(str(tmp_dir)):
        actual[root] = set(dirs + files)
    assert expected == actual
Example #7
0
def test_repotree_walk_fetch(tmp_dir, dvc, scm, local_remote):
    out = tmp_dir.dvc_gen({"dir": {"foo": "foo"}}, commit="init")[0].outs[0]
    dvc.push()
    remove(dvc.cache.local.cache_dir)
    remove(tmp_dir / "dir")

    tree = RepoTree(dvc, fetch=True)
    for _, _, _ in tree.walk("dir"):
        pass

    assert os.path.exists(out.cache_path)
    for _, hi in out.dir_cache.items():
        assert hi.name == out.tree.PARAM_CHECKSUM
        assert os.path.exists(dvc.cache.local.tree.hash_to_path_info(hi.value))
Example #8
0
def test_repotree_walk_fetch(tmp_dir, dvc, scm, local_remote):
    out = tmp_dir.dvc_gen({"dir": {"foo": "foo"}}, commit="init")[0].outs[0]
    dvc.push()
    remove(dvc.cache.local.cache_dir)

    tree = RepoTree(dvc, fetch=True)
    with dvc.state:
        for _, _, _ in tree.walk("dir"):
            pass

    assert os.path.exists(out.cache_path)
    for entry in out.dir_cache:
        hash_ = entry[out.tree.PARAM_CHECKSUM]
        assert os.path.exists(dvc.cache.local.tree.hash_to_path_info(hash_))
Example #9
0
def test_walk_dirty_cached_dir(tmp_dir, scm, dvc):
    tmp_dir.dvc_gen(
        {"data": {"foo": "foo", "bar": "bar"}}, commit="add data",
    )
    (tmp_dir / "data" / "foo").unlink()

    tree = RepoTree(dvc)

    data = PathInfo(tmp_dir) / "data"

    actual = []
    for root, dirs, files in tree.walk(data):
        for entry in dirs + files:
            actual.append(os.path.join(root, entry))

    assert actual == [(data / "bar").fspath]
Example #10
0
def test_repo_tree_no_subrepos(tmp_dir, dvc, scm):
    tmp_dir.scm_gen(
        {"dir": {
            "repo.txt": "file to confuse RepoTree"
        }},
        commit="dir/repo.txt",
    )
    tmp_dir.dvc_gen({"lorem": "lorem"}, commit="add foo")

    subrepo = tmp_dir / "dir" / "repo"
    make_subrepo(subrepo, scm)
    subrepo.dvc_gen({"foo": "foo", "dir1": {"bar": "bar"}}, commit="FOO")
    subrepo.scm_gen({"ipsum": "ipsum"}, commit="BAR")

    # using tree that does not have dvcignore
    dvc.tree._reset()
    tree = RepoTree(dvc, subrepos=False, fetch=True)
    expected = [
        tmp_dir / ".dvcignore",
        tmp_dir / ".gitignore",
        tmp_dir / "lorem",
        tmp_dir / "lorem.dvc",
        tmp_dir / "dir",
        tmp_dir / "dir" / "repo.txt",
    ]

    actual = []
    for root, dirs, files in tree.walk(tmp_dir, dvcfiles=True):
        for entry in dirs + files:
            actual.append(os.path.normpath(os.path.join(root, entry)))

    expected = [str(path) for path in expected]
    assert set(actual) == set(expected)
    assert len(actual) == len(expected)

    assert tree.isfile(tmp_dir / "lorem") is True
    assert tree.isfile(tmp_dir / "dir" / "repo" / "foo") is False
    assert tree.isdir(tmp_dir / "dir" / "repo") is False
    assert tree.isdir(tmp_dir / "dir") is True

    assert tree.isdvc(tmp_dir / "lorem") is True
    assert tree.isdvc(tmp_dir / "dir" / "repo" / "dir1") is False

    assert tree.exists(tmp_dir / "dir" / "repo.txt") is True
    assert tree.exists(tmp_dir / "repo" / "ipsum") is False