Example #1
0
def test_commit_no_exec_missing_out(tmp_dir, dvc):
    stage = dvc.run(name="my", cmd="mycmd", outs=["out"], no_exec=True)
    clean_staging()
    assert dvc.status(stage.path)

    with pytest.raises(OutputDoesNotExistError):
        dvc.commit(stage.path, force=True)
Example #2
0
    def test(self):
        from tests.utils import clean_staging

        cmd = f"python {self.CODE} {self.FOO} {self.BAR}"
        stage = self.dvc.run(deps=[self.FOO],
                             outs=[self.BAR],
                             cmd=cmd,
                             single_stage=True)
        clean_staging()

        os.chmod(self.BAR, 0o644)
        with open(self.BAR, "w", encoding="utf-8") as fd:
            fd.write("corrupting the output cache")

        patch_checkout = mock.patch.object(stage.outs[0],
                                           "checkout",
                                           wraps=stage.outs[0].checkout)
        from dvc.stage.run import cmd_run

        patch_run = mock.patch("dvc.stage.run.cmd_run", wraps=cmd_run)

        with self.dvc.lock:
            with patch_checkout as mock_checkout:
                with patch_run as mock_run:
                    stage.run()

                    mock_run.assert_called_once()
                    mock_checkout.assert_not_called()
Example #3
0
def test_push_wildcard_from_bare_git_repo(tmp_dir, make_tmp_dir, erepo_dir,
                                          local_cloud):
    import git

    git.Repo.init(os.fspath(tmp_dir), bare=True)

    erepo_dir.add_remote(config=local_cloud.config)
    with erepo_dir.chdir():
        erepo_dir.dvc_gen(
            {
                "dir123": {
                    "foo": "foo content"
                },
                "dirextra": {
                    "extrafoo": "extra foo content"
                },
            },
            commit="initial",
        )
    erepo_dir.dvc.push([os.path.join(os.fspath(erepo_dir), "dire*")],
                       glob=True)

    erepo_dir.scm.gitpython.repo.create_remote("origin", os.fspath(tmp_dir))
    erepo_dir.scm.gitpython.repo.remote("origin").push("master")

    dvc_repo = make_tmp_dir("dvc-repo", scm=True, dvc=True)
    with dvc_repo.chdir():
        dvc_repo.dvc.imp(os.fspath(tmp_dir), "dirextra")
        clean_staging()

        with pytest.raises(PathMissingError):
            dvc_repo.dvc.imp(os.fspath(tmp_dir), "dir123")
Example #4
0
def test_missing_cache(tmp_dir, dvc, local_remote, caplog):
    from tests.utils import clean_staging

    tmp_dir.dvc_gen({"foo": "foo", "bar": "bar"})

    # purge cache
    remove(dvc.odb.local.cache_dir)
    clean_staging()

    header = ("Some of the cache files do not exist "
              "neither locally nor on remote. Missing cache files:\n")
    foo = "name: bar, md5: 37b51d194a7513e45b56f6524f2d51f2\n"
    bar = "name: foo, md5: acbd18db4cc2f85cedef654fccc4a4d8\n"

    caplog.clear()
    dvc.push()
    assert header in caplog.text
    assert foo in caplog.text
    assert bar in caplog.text

    caplog.clear()
    dvc.fetch()
    assert header in caplog.text
    assert foo in caplog.text
    assert bar in caplog.text

    caplog.clear()
    assert dvc.status(cloud=True) == {
        "bar": "missing",
        "foo": "missing",
    }
    assert header not in caplog.text
    assert foo not in caplog.text
    assert bar not in caplog.text
Example #5
0
def test_subrepo(dvc_top_level, erepo):
    from tests.func.test_get import make_subrepo
    from tests.utils import clean_staging

    dvc_files = {"foo.txt": "foo.txt", "dvc_dir": {"lorem": "lorem"}}
    scm_files = {"bar.txt": "bar.txt", "scm_dir": {"ipsum": "ipsum"}}
    subrepo = erepo / "subrepo"
    make_subrepo(subrepo, erepo.scm)

    for repo in [erepo, subrepo]:
        with repo.chdir():
            repo.scm_gen(scm_files, commit=f"scm track for top {repo}")
            if hasattr(repo, "dvc"):
                repo.dvc_gen(dvc_files, commit=f"dvc track for {repo}")
        clean_staging()

    def _list_files(repo, path=None):
        return set(map(itemgetter("path"), Repo.ls(os.fspath(repo), path)))

    extras = {".dvcignore", ".gitignore"}
    git_tracked_outputs = {"bar.txt", "scm_dir"}
    dvc_files = {"dvc_dir", "foo.txt", "foo.txt.dvc", "dvc_dir.dvc"}
    common_outputs = git_tracked_outputs | extras | dvc_files

    top_level_outputs = (common_outputs
                         if dvc_top_level else git_tracked_outputs)
    assert _list_files(erepo) == top_level_outputs
    assert _list_files(erepo, "scm_dir") == {"ipsum"}
    if dvc_top_level:
        assert _list_files(erepo, "dvc_dir") == {"lorem"}

    assert _list_files(subrepo, ".") == common_outputs
    assert _list_files(subrepo, "scm_dir") == {"ipsum"}
    assert _list_files(subrepo, "dvc_dir") == {"lorem"}
Example #6
0
def test_imported_entries_unchanged(tmp_dir, dvc, erepo_dir):
    with erepo_dir.chdir():
        erepo_dir.dvc_gen("file", "file content", "initial commit")
    clean_staging()

    stage = dvc.imp(os.fspath(erepo_dir), "file")

    assert stage.changed_entries() == ([], [], None)
Example #7
0
def test_indexed_on_push(tmp_dir, dvc, index):
    foo = tmp_dir.dvc_gen({"foo": "foo content"})[0].outs[0]
    bar = tmp_dir.dvc_gen({"bar": {"baz": "baz content"}})[0].outs[0]
    baz_hash = bar.obj.trie.get(("baz", ))[1]
    clean_staging()

    dvc.push()
    assert {bar.hash_info.value, baz_hash.value} == set(index.hashes())
    assert [bar.hash_info.value] == list(index.dir_hashes())
    assert foo.hash_info.value not in index.hashes()
Example #8
0
def test_get_hash_dirty_dir(tmp_dir, dvc):
    tmp_dir.dvc_gen({"dir": {"foo": "foo", "bar": "bar"}})
    (tmp_dir / "dir" / "baz").write_text("baz")
    clean_staging()

    fs = RepoFileSystem(repo=dvc)
    _, meta, obj = stage(dvc.odb.local, (tmp_dir / "dir").fs_path, fs, "md5")
    assert obj.hash_info == HashInfo("md5",
                                     "ba75a2162ca9c29acecb7957105a0bc2.dir")
    assert meta.nfiles == 3
Example #9
0
def clean(outs, dvc=None):
    from tests.utils import clean_staging

    if dvc:
        outs = outs + [dvc.odb.local.cache_dir]
    for path in outs:
        print(path)
        remove(path)
    if dvc:
        os.makedirs(dvc.odb.local.cache_dir, exist_ok=True)
        clean_repos()
        clean_staging()
Example #10
0
def test_commit_pipeline_stage(tmp_dir, dvc, run_copy):
    tmp_dir.gen("foo", "foo")
    stage = run_copy("foo", "bar", no_commit=True, name="copy-foo-bar")
    clean_staging()
    assert dvc.status(stage.addressing)
    assert dvc.commit(stage.addressing, force=True) == [stage]
    assert not dvc.status(stage.addressing)

    # just to confirm different variants work
    assert dvc.commit(f":{stage.addressing}") == [stage]
    assert dvc.commit(f"{PIPELINE_FILE}:{stage.addressing}") == [stage]
    assert dvc.commit(PIPELINE_FILE) == [stage]
Example #11
0
def test_commit_no_exec(tmp_dir, dvc):
    tmp_dir.gen({"dep": "dep", "out": "out"})
    stage = dvc.run(name="my",
                    cmd="mycmd",
                    deps=["dep"],
                    outs=["out"],
                    no_exec=True)
    clean_staging()

    assert dvc.status(stage.path)
    dvc.commit(stage.path, force=True)
    assert dvc.status(stage.path) == {}
Example #12
0
def test_get_hash_mixed_dir(tmp_dir, scm, dvc):
    tmp_dir.gen({"dir": {"foo": "foo", "bar": "bar"}})
    tmp_dir.dvc.add(os.path.join("dir", "foo"))
    tmp_dir.scm.add([
        os.path.join("dir", "bar"),
        os.path.join("dir", ".gitignore"),
        os.path.join("dir", "foo.dvc"),
    ])
    tmp_dir.scm.commit("add dir")
    clean_staging()

    fs = RepoFileSystem(repo=dvc)
    _, _, obj = stage(dvc.odb.local, (tmp_dir / "dir").fs_path, fs, "md5")
    assert obj.hash_info == HashInfo("md5",
                                     "e1d9e8eae5374860ae025ec84cfd85c7.dir")
Example #13
0
def test_commit_changed_md5(tmp_dir, dvc):
    tmp_dir.gen({"file": "file content"})
    (stage, ) = dvc.add("file", no_commit=True)

    stage_file_content = (tmp_dir / stage.path).parse()
    stage_file_content["md5"] = "1111111111"
    (tmp_dir / stage.path).dump(stage_file_content)

    clean_staging()

    with pytest.raises(StageCommitError):
        dvc.commit(stage.path)

    dvc.commit(stage.path, force=True)
    assert "md5" not in (tmp_dir / stage.path).parse()
Example #14
0
def test_commit_granular_output_dir(tmp_dir, dvc):
    tmp_dir.gen({
        "data": {
            "foo": "foo",
            "bar": "bar",
            "subdir": {
                "subfoo": "subfoo",
                "subbar": "subbar"
            },
        }
    })
    dvc.add("data", no_commit=True)
    clean_staging()
    dvc.commit("data")
    assert dvc.status() == {}
Example #15
0
def test_circular_import(tmp_dir, dvc, scm, erepo_dir):
    from dvc.exceptions import CircularImportError

    with erepo_dir.chdir():
        erepo_dir.dvc_gen({"dir": {"foo": "foo", "bar": "bar"}}, commit="init")

    dvc.imp(os.fspath(erepo_dir), "dir", "dir_imported")
    scm.add("dir_imported.dvc")
    scm.commit("import")
    clean_staging()

    with erepo_dir.chdir():
        with pytest.raises(CircularImportError):
            erepo_dir.dvc.imp(os.fspath(tmp_dir), "dir_imported",
                              "circular_import")
Example #16
0
def test_commit_granular_output(tmp_dir, dvc):
    dvc.run(
        name="mystage",
        cmd=["echo foo>foo", "echo bar>bar"],
        outs=["foo", "bar"],
        no_commit=True,
    )
    clean_staging()

    cache = tmp_dir / ".dvc" / "cache"
    assert not list(cache.glob("*/*"))

    dvc.commit("foo")
    assert list(cache.glob("*/*")) == [
        cache / "d3" / "b07384d113edec49eaa6238ad5ff00"
    ]
Example #17
0
def test_subrepos_are_ignored(tmp_dir, erepo_dir):
    subrepo = erepo_dir / "dir" / "subrepo"
    make_subrepo(subrepo, erepo_dir.scm)
    with erepo_dir.chdir():
        erepo_dir.dvc_gen("dir/foo", "foo", commit="foo")
        erepo_dir.scm_gen("dir/bar", "bar", commit="bar")

    with subrepo.chdir():
        subrepo.dvc_gen({"file": "file"}, commit="add files on subrepo")

    with external_repo(os.fspath(erepo_dir)) as repo:
        repo.dvcfs.get(
            "dir",
            os.fspath(tmp_dir / "out"),
        )
        expected_files = {"foo": "foo", "bar": "bar", ".gitignore": "/foo\n"}
        assert (tmp_dir / "out").read_text() == expected_files

        # clear cache to test saving to cache
        cache_dir = tmp_dir / repo.odb.local.cache_dir
        remove(cache_dir)
        clean_staging()
        makedirs(cache_dir)

        staging, _, obj = stage(
            repo.odb.local,
            "dir",
            repo.dvcfs,
            "md5",
            ignore=repo.dvcignore,
        )
        transfer(
            staging,
            repo.odb.local,
            {obj.hash_info},
            shallow=False,
            hardlink=True,
        )
        assert set(cache_dir.glob("??/*")) == {
            cache_dir / "e1" / "d9e8eae5374860ae025ec84cfd85c7",
            cache_dir / "e1" / "d9e8eae5374860ae025ec84cfd85c7.dir",
            cache_dir / "37" / "b51d194a7513e45b56f6524f2d51f2",
            cache_dir / "94" / "7d2b84e5aa88170e80dff467a5bfb6",
            cache_dir / "ac" / "bd18db4cc2f85cedef654fccc4a4d8",
        }
Example #18
0
def test_commit_with_deps(tmp_dir, dvc, run_copy, run_kw):
    tmp_dir.gen("foo", "foo")
    (foo_stage, ) = dvc.add("foo", no_commit=True)
    assert foo_stage is not None
    assert len(foo_stage.outs) == 1

    stage = run_copy("foo", "file", no_commit=True, **run_kw)
    assert stage is not None
    assert len(stage.outs) == 1

    assert foo_stage.outs[0].changed_cache()
    assert stage.outs[0].changed_cache()

    clean_staging()

    dvc.commit(stage.path, with_deps=True)
    assert not foo_stage.outs[0].changed_cache()
    assert not stage.outs[0].changed_cache()
Example #19
0
def test_stage_dir_optimization(
    tmp_dir, dvc, mocker, dry_run, expected_staging_contents
):
    from dvc_data import stage
    from dvc_data.objects.tree import Tree

    tmp_dir.dvc_gen(
        {
            "data": {
                "foo": "bar",
                "subdir": {"subfoo": "subbar"},
            }
        }
    )
    odb = dvc.odb.local

    objs = set(odb.all())
    clean_staging()

    tmp_dir.gen({"data": {"baz": "quz"}})

    stage_spy = mocker.spy(stage, "_stage_tree")
    _, _, tree = stage.stage(odb, "data", odb.fs, odb.fs.PARAM_CHECKSUM)

    assert stage_spy.called
    assert set(odb.all()) - objs == {tree.hash_info.as_raw().value}
    stage_spy.reset_mock()
    clean_staging()

    load_spy = mocker.spy(Tree, "load")
    build_tree_spy = mocker.spy(stage, "_build_tree")

    staging, _, tree = stage.stage(
        odb, "data", odb.fs, odb.fs.PARAM_CHECKSUM, dry_run=dry_run
    )
    assert not stage_spy.called
    assert not build_tree_spy.called

    load_args, _ = load_spy.call_args
    assert load_args[1].value == tree.hash_info.as_raw().value

    assert set(staging.all()) == expected_staging_contents
Example #20
0
def test_get_hash_dirty_file(tmp_dir, dvc):
    from dvc.data import check
    from dvc.data.stage import get_file_hash
    from dvc.objects.errors import ObjectFormatError

    tmp_dir.dvc_gen("file", "file")
    file_hash_info = HashInfo("md5", "8c7dd922ad47494fc02c388e12c00eac")

    (tmp_dir / "file").write_text("something")
    something_hash_info = HashInfo("md5", "437b930db84b8079c2dd804a71936b5f")

    clean_staging()

    # file is modified in workspace
    # get_file_hash(file) should return workspace hash, not DVC cached hash
    fs = RepoFileSystem(repo=dvc)
    assert fs.info((tmp_dir / "file").fs_path).get("md5") is None
    staging, _, obj = stage(dvc.odb.local, (tmp_dir / "file").fs_path, fs,
                            "md5")
    assert obj.hash_info == something_hash_info
    check(staging, obj)

    # file is removed in workspace
    # any staged object referring to modified workspace obj is now invalid
    (tmp_dir / "file").unlink()
    with pytest.raises(ObjectFormatError):
        check(staging, obj)

    # get_file_hash(file) should return DVC cached hash
    assert fs.info((tmp_dir / "file").fs_path)["md5"] == file_hash_info.value
    _, hash_info = get_file_hash((tmp_dir / "file").fs_path,
                                 fs,
                                 "md5",
                                 state=dvc.state)
    assert hash_info == file_hash_info

    # tmp_dir/file can be staged even though it is missing in workspace since
    # repofs will use the DVC cached hash (and refer to the local cache object)
    _, _, obj = stage(dvc.odb.local, (tmp_dir / "file").fs_path, fs, "md5")
    assert obj.hash_info == file_hash_info
Example #21
0
def test_commit_granular_dir(tmp_dir, dvc):
    tmp_dir.gen({
        "data": {
            "foo": "foo",
            "bar": "bar",
            "subdir": {
                "subfoo": "subfoo",
                "subbar": "subbar"
            },
        }
    })
    dvc.add("data", no_commit=True)
    clean_staging()

    cache = tmp_dir / ".dvc" / "cache"

    assert set(cache.glob("*/*")) == {
        cache / "1a" / "ca2c799df82929bbdd976557975546",
    }

    dvc.commit(os.path.join("data", "foo"))
    assert set(cache.glob("*/*")) == {
        cache / "1a" / "ca2c799df82929bbdd976557975546",
        cache / "1a" / "ca2c799df82929bbdd976557975546.dir",
        cache / "ac" / "bd18db4cc2f85cedef654fccc4a4d8",
    }
    clean_staging()

    dvc.commit(os.path.join("data", "subdir"))
    assert set(cache.glob("*/*")) == {
        cache / "26" / "d6b64d96a660707412f523e8184b5f",
        cache / "1a" / "ca2c799df82929bbdd976557975546",
        cache / "1a" / "ca2c799df82929bbdd976557975546.dir",
        cache / "ac" / "bd18db4cc2f85cedef654fccc4a4d8",
        cache / "4c" / "e8d2a2cf314a52fa7f315ca37ca445",
        cache / "68" / "dde2c3c4e7953c2290f176bbdc9a54",
    }
    clean_staging()

    dvc.commit(os.path.join("data"))
    assert set(cache.glob("*/*")) == {
        cache / "26" / "d6b64d96a660707412f523e8184b5f",
        cache / "1a" / "ca2c799df82929bbdd976557975546",
        cache / "1a" / "ca2c799df82929bbdd976557975546.dir",
        cache / "ac" / "bd18db4cc2f85cedef654fccc4a4d8",
        cache / "4c" / "e8d2a2cf314a52fa7f315ca37ca445",
        cache / "68" / "dde2c3c4e7953c2290f176bbdc9a54",
        cache / "37" / "b51d194a7513e45b56f6524f2d51f2",
    }
Example #22
0
def test_commit_granular_output_file(tmp_dir, dvc):
    tmp_dir.gen("foo", "foo")
    dvc.add("foo", no_commit=True)
    clean_staging()
    dvc.commit("foo")
    assert dvc.status() == {}