Esempio n. 1
0
def test_read_with_subrepos(tmp_dir, scm, local_cloud, local_repo):
    tmp_dir.scm_gen("foo.txt", "foo.txt", commit="add foo.txt")
    subrepo = tmp_dir / "dir" / "subrepo"
    make_subrepo(subrepo, scm, config=local_cloud.config)
    with subrepo.chdir():
        subrepo.scm_gen({"lorem": "lorem"}, commit="add lorem")
        subrepo.dvc_gen({"dir": {"file.txt": "file.txt"}}, commit="add dir")
        subrepo.dvc_gen("dvc-file", "dvc-file", commit="add dir")
        subrepo.dvc.push()

    repo_path = None if local_repo else f"file:///{tmp_dir}"
    subrepo_path = os.path.join("dir", "subrepo")

    assert api.read("foo.txt", repo=repo_path) == "foo.txt"
    assert (
        api.read(os.path.join(subrepo_path, "lorem"), repo=repo_path)
        == "lorem"
    )
    assert (
        api.read(os.path.join(subrepo_path, "dvc-file"), repo=repo_path)
        == "dvc-file"
    )
    assert (
        api.read(os.path.join(subrepo_path, "dir", "file.txt"), repo=repo_path)
        == "file.txt"
    )
Esempio n. 2
0
def test_update_from_subrepos(tmp_dir, dvc, erepo_dir, is_dvc):
    subrepo = erepo_dir / "subrepo"
    make_subrepo(subrepo, erepo_dir.scm)
    gen = subrepo.dvc_gen if is_dvc else subrepo.scm_gen
    with subrepo.chdir():
        gen("foo", "foo", commit="subrepo initial")

    path = os.path.join("subrepo", "foo")
    repo_path = os.fspath(erepo_dir)
    dvc.imp(repo_path, path, out="out")
    assert dvc.status() == {}

    with subrepo.chdir():
        gen("foo", "foobar", commit="subrepo second commit")

    assert dvc.status()["out.dvc"][0]["changed deps"] == {
        f"{path} ({repo_path})": "update available"
    }
    (stage, ) = dvc.update(["out.dvc"])

    assert (tmp_dir / "out").read_text() == "foobar"
    assert stage.deps[0].def_path == os.path.join("subrepo", "foo")
    assert stage.deps[0].def_repo == {
        "url": repo_path,
        "rev_lock": erepo_dir.scm.get_rev(),
    }
Esempio n. 3
0
def test_hook_is_called(tmp_dir, erepo_dir, mocker):
    subrepo_paths = [
        "subrepo1",
        "subrepo2",
        os.path.join("dir", "subrepo3"),
        os.path.join("dir", "subrepo4"),
        "subrepo5",
        os.path.join("subrepo5", "subrepo6"),
    ]
    subrepos = [erepo_dir / path for path in subrepo_paths]
    for repo in subrepos:
        make_subrepo(repo, erepo_dir.scm)

    for repo in subrepos + [erepo_dir]:
        with repo.chdir():
            repo.scm_gen("foo", "foo", commit=f"git add {repo}/foo")
            repo.dvc_gen("bar", "bar", commit=f"dvc add {repo}/bar")

    with external_repo(str(erepo_dir)) as repo:
        spy = mocker.spy(repo.repo_fs.fs, "repo_factory")

        list(repo.repo_fs.walk("", ignore_subrepos=False))  # drain
        assert spy.call_count == len(subrepos)

        paths = [os.path.join(repo.root_dir, path) for path in subrepo_paths]
        spy.assert_has_calls(
            [
                call(
                    path,
                    fs=repo.fs,
                    repo_factory=repo.repo_fs.fs.repo_factory,
                ) for path in paths
            ],
            any_order=True,
        )
Esempio n. 4
0
def test_granular_get_from_subrepos(tmp_dir, erepo_dir):
    subrepo = erepo_dir / "subrepo"
    make_subrepo(subrepo, erepo_dir.scm)
    with subrepo.chdir():
        subrepo.dvc_gen({"dir": {"bar": "bar"}}, commit="files in subrepo")

    path = os.path.join("subrepo", "dir", "bar")
    Repo.get(os.fspath(erepo_dir), path, out="out")
    assert (tmp_dir / "out").read_text() == "bar"
Esempio n. 5
0
def test_get_from_subrepos(tmp_dir, erepo_dir, is_dvc, files):
    subrepo = erepo_dir / "subrepo"
    make_subrepo(subrepo, erepo_dir.scm)
    gen = subrepo.dvc_gen if is_dvc else subrepo.scm_gen
    with subrepo.chdir():
        gen(files, commit="add files in subrepo")

    key = next(iter(files))
    Repo.get(os.fspath(erepo_dir), f"subrepo/{key}", out="out")

    assert (tmp_dir / "out").read_text() == files[key]
Esempio n. 6
0
def test_granular_import_from_subrepos(tmp_dir, dvc, erepo_dir):
    subrepo = erepo_dir / "subrepo"
    make_subrepo(subrepo, erepo_dir.scm)
    with subrepo.chdir():
        subrepo.dvc_gen({"dir": {"bar": "bar"}}, commit="files in subrepo")

    path = os.path.join("subrepo", "dir", "bar")
    stage = dvc.imp(os.fspath(erepo_dir), path, out="out")
    assert (tmp_dir / "out").read_text() == "bar"
    assert stage.deps[0].def_path == path
    assert stage.deps[0].def_repo == {
        "url": os.fspath(erepo_dir),
        "rev_lock": erepo_dir.scm.get_rev(),
    }
Esempio n. 7
0
def test_get_url_subrepos(tmp_dir, scm, local_cloud):
    subrepo = tmp_dir / "subrepo"
    make_subrepo(subrepo, scm, config=local_cloud.config)
    with subrepo.chdir():
        subrepo.dvc_gen(
            {"dir": {"foo": "foo"}, "bar": "bar"}, commit="add files"
        )
        subrepo.dvc.push()

    path = os.path.relpath(local_cloud.config["url"])

    expected_url = os.path.join(path, "ac", "bd18db4cc2f85cedef654fccc4a4d8")
    assert api.get_url(os.path.join("subrepo", "dir", "foo")) == expected_url

    expected_url = os.path.join(path, "37", "b51d194a7513e45b56f6524f2d51f2")
    assert api.get_url("subrepo/bar") == expected_url
Esempio n. 8
0
def test_subrepos_are_ignored_for_git_tracked_dirs(tmp_dir, erepo_dir):
    subrepo = erepo_dir / "dir" / "subrepo"
    make_subrepo(subrepo, erepo_dir.scm)
    with erepo_dir.chdir():
        scm_files = {"foo": "foo", "bar": "bar", "subdir": {"lorem": "lorem"}}
        erepo_dir.scm_gen({"dir": scm_files}, commit="add scm dir")

    with subrepo.chdir():
        subrepo.dvc_gen({"file": "file"}, commit="add files on subrepo")

    with external_repo(os.fspath(erepo_dir)) as repo:
        repo.repo_fs.download(
            os.path.join(repo.root_dir, "dir"),
            os.fspath(tmp_dir / "out"),
        )
        # subrepo files should not be here
        assert (tmp_dir / "out").read_text() == scm_files
Esempio n. 9
0
def test_metadata_on_subrepos(make_tmp_dir, tmp_dir, dvc, scm, repo_fs):
    subrepo = tmp_dir / "subrepo"
    make_subrepo(subrepo, scm)
    subrepo.scm_gen("foo", "foo", commit="add foo on subrepo")
    subrepo.dvc_gen("foobar", "foobar", commit="add foobar on subrepo")

    for path in ["subrepo", "subrepo/foo", "subrepo/foobar"]:
        meta = repo_fs.metadata(tmp_dir / path)
        assert meta.repo.root_dir == str(
            subrepo), f"repo root didn't match for {path}"

    # supports external outputs on top-level DVC repo
    external_dir = make_tmp_dir("external-output")
    external_dir.gen("bar", "bar")
    dvc.add(str(external_dir / "bar"), external=True)
    meta = repo_fs.metadata(external_dir / "bar")
    assert meta.repo.root_dir == str(tmp_dir)
Esempio n. 10
0
def test_subrepos_are_ignored(tmp_dir, erepo_dir):
    subrepo = erepo_dir / "dir" / "subrepo"
    make_subrepo(subrepo, erepo_dir.scm)
    with erepo_dir.chdir():
        erepo_dir.dvc_gen("dir/foo", "foo", commit="foo")
        erepo_dir.scm_gen("dir/bar", "bar", commit="bar")

    with subrepo.chdir():
        subrepo.dvc_gen({"file": "file"}, commit="add files on subrepo")

    with external_repo(os.fspath(erepo_dir)) as repo:
        repo.repo_fs.download(
            PathInfo(repo.root_dir) / "dir",
            PathInfo(tmp_dir / "out"),
            follow_subrepos=False,
        )
        expected_files = {"foo": "foo", "bar": "bar", ".gitignore": "/foo\n"}
        assert (tmp_dir / "out").read_text() == expected_files

        # clear cache to test saving to cache
        cache_dir = tmp_dir / repo.odb.local.cache_dir
        remove(cache_dir)
        clean_staging()
        makedirs(cache_dir)

        staging, _, obj = stage(
            repo.odb.local,
            PathInfo(repo.root_dir) / "dir",
            repo.repo_fs,
            "md5",
            dvcignore=repo.dvcignore,
        )
        transfer(
            staging,
            repo.odb.local,
            {obj.hash_info},
            shallow=False,
            move=True,
        )
        assert set(cache_dir.glob("??/*")) == {
            cache_dir / "e1" / "d9e8eae5374860ae025ec84cfd85c7.dir",
            cache_dir / "37" / "b51d194a7513e45b56f6524f2d51f2",
            cache_dir / "94" / "7d2b84e5aa88170e80dff467a5bfb6",
            cache_dir / "ac" / "bd18db4cc2f85cedef654fccc4a4d8",
        }
Esempio n. 11
0
def test_import_subrepos(tmp_dir, erepo_dir, dvc, scm, is_dvc, files):
    subrepo = erepo_dir / "subrepo"
    make_subrepo(subrepo, erepo_dir.scm)
    gen = subrepo.dvc_gen if is_dvc else subrepo.scm_gen
    with subrepo.chdir():
        gen(files, commit="add files in subrepo")

    key = next(iter(files))
    path = str((subrepo / key).relative_to(erepo_dir))

    stage = dvc.imp(os.fspath(erepo_dir), path, out="out")

    assert (tmp_dir / "out").read_text() == files[key]
    assert stage.deps[0].def_path == path
    assert stage.deps[0].def_repo == {
        "url": os.fspath(erepo_dir),
        "rev_lock": erepo_dir.scm.get_rev(),
    }
Esempio n. 12
0
def test_subrepo(tmp_dir, scm, workspace):
    from tests.unit.fs.test_repo import make_subrepo

    subrepo = tmp_dir / "dir" / "repo"
    make_subrepo(subrepo, scm)

    subrepo.gen("copy.py", COPY_SCRIPT)
    subrepo.gen("params.yaml", "foo: 1")

    with subrepo.chdir():
        subrepo.dvc.run(
            cmd="python copy.py params.yaml metrics.yaml",
            metrics_no_cache=["metrics.yaml"],
            params=["foo"],
            name="copy-file",
            no_exec=True,
        )
        scm.add(
            [
                subrepo / "dvc.yaml",
                subrepo / "copy.py",
                subrepo / "params.yaml",
            ]
        )
        scm.commit("init")

        results = subrepo.dvc.experiments.run(
            PIPELINE_FILE, params=["foo=2"], tmp_dir=not workspace
        )
        assert results

    exp = first(results)
    ref_info = first(exp_refs_by_rev(scm, exp))

    fs = scm.get_fs(exp)
    for fname in ["metrics.yaml", "dvc.lock"]:
        assert fs.exists(subrepo / fname)
    with fs.open(subrepo / "metrics.yaml") as fobj:
        assert fobj.read().strip() == "foo: 2"

    assert subrepo.dvc.experiments.get_exact_name(exp) == ref_info.name
    assert scm.resolve_rev(ref_info.name) == exp
Esempio n. 13
0
def test_import_complete_repo(tmp_dir, dvc, erepo_dir):
    with erepo_dir.chdir():
        erepo_dir.dvc_gen({"foo": "foo"}, commit="add foo")

    subrepo = erepo_dir / "subrepo"
    make_subrepo(subrepo, erepo_dir.scm)
    with subrepo.chdir():
        subrepo.dvc_gen({"dir": {"bar": "bar"}}, commit="files in subrepo")

    dvc.imp(os.fspath(erepo_dir), "subrepo", out="out_sub")
    assert (tmp_dir / "out_sub").read_text() == {
        ".gitignore": "/dir\n",
        "dir": {"bar": "bar"},
    }

    dvc.imp(os.fspath(erepo_dir), os.curdir, out="out")
    assert (tmp_dir / "out").read_text() == {
        ".gitignore": "/foo\n",
        "foo": "foo",
    }
Esempio n. 14
0
def test_subrepo_is_constructed_properly(
    tmp_dir, scm, mocker, make_tmp_dir, root_is_dvc
):
    if root_is_dvc:
        make_subrepo(tmp_dir, scm)

    subrepo = tmp_dir / "subrepo"
    make_subrepo(subrepo, scm)
    local_cache = subrepo.dvc.odb.local.cache_dir

    tmp_dir.scm_gen("bar", "bar", commit="add bar")
    subrepo.dvc_gen("foo", "foo", commit="add foo")

    cache_dir = make_tmp_dir("temp-cache")
    with external_repo(
        str(tmp_dir), cache_dir=str(cache_dir), cache_types=["symlink"]
    ) as repo:
        spy = mocker.spy(repo.repo_fs, "repo_factory")

        list(repo.repo_fs.walk(repo.root_dir, ignore_subrepos=False))  # drain
        assert spy.call_count == 1
        subrepo = spy.spy_return

        assert repo.url == str(tmp_dir)
        assert repo.config["cache"]["dir"] == str(cache_dir)
        assert repo.odb.local.cache_dir == str(cache_dir)
        assert subrepo.odb.local.cache_dir == str(cache_dir)

        assert repo.config["cache"]["type"] == ["symlink"]
        assert repo.odb.local.cache_types == ["symlink"]
        assert subrepo.odb.local.cache_types == ["symlink"]

        assert (
            subrepo.config["remote"]["auto-generated-upstream"]["url"]
            == local_cache
        )
        if root_is_dvc:
            main_cache = tmp_dir.dvc.odb.local.cache_dir
            assert repo.config["remote"]["auto-generated-upstream"][
                "url"
            ] == str(main_cache)
Esempio n. 15
0
def test_pull_imported_stage_from_subrepos(tmp_dir, dvc, erepo_dir, is_dvc,
                                           files):
    subrepo = erepo_dir / "subrepo"
    make_subrepo(subrepo, erepo_dir.scm)
    gen = subrepo.dvc_gen if is_dvc else subrepo.scm_gen
    with subrepo.chdir():
        gen(files, commit="files in subrepo")

    key = first(files)
    path = os.path.join("subrepo", key)
    dvc.imp(os.fspath(erepo_dir), path, out="out")

    # clean everything
    remove(dvc.odb.local.cache_dir)
    remove("out")
    makedirs(dvc.odb.local.cache_dir)

    stats = dvc.pull(["out.dvc"])

    expected = [f"out{os.sep}"] if isinstance(files[key], dict) else ["out"]
    assert stats["added"] == expected
    assert (tmp_dir / "out").read_text() == files[key]
Esempio n. 16
0
def test_info_on_subrepos(make_tmp_dir, tmp_dir, dvc, scm, repo_fs):
    subrepo = tmp_dir / "subrepo"
    make_subrepo(subrepo, scm)
    with subrepo.chdir():
        subrepo.scm_gen("foo", "foo", commit="add foo on subrepo")
        subrepo.dvc_gen("foobar", "foobar", commit="add foobar on subrepo")

    for path in [
            "subrepo",
            os.path.join("subrepo", "foo"),
            os.path.join("subrepo", "foobar"),
    ]:
        info = repo_fs.info(tmp_dir / path)
        assert info["repo"].root_dir == str(
            subrepo), f"repo root didn't match for {path}"

    # supports external outputs on top-level DVC repo
    external_dir = make_tmp_dir("external-output")
    external_dir.gen("bar", "bar")
    dvc.add(str(external_dir / "bar"), external=True)
    info = repo_fs.info((external_dir / "bar").fs_path)
    assert info["repo"].root_dir == str(tmp_dir)