def test_parent_repo_collect_stages(tmp_dir, scm, dvc): tmp_dir.gen({"subdir": {}}) tmp_dir.gen({"deep": {"dir": {}}}) subrepo_dir = tmp_dir / "subdir" deep_subrepo_dir = tmp_dir / "deep" / "dir" with subrepo_dir.chdir(): subrepo = Repo.init(subdir=True) subrepo_dir.gen("subrepo_file", "subrepo file content") subrepo.add("subrepo_file") with deep_subrepo_dir.chdir(): deep_subrepo = Repo.init(subdir=True) deep_subrepo_dir.gen("subrepo_file", "subrepo file content") deep_subrepo.add("subrepo_file") dvc._reset() stages = dvc.stage.collect(None) subrepo_stages = subrepo.stage.collect(None) deep_subrepo_stages = deep_subrepo.stage.collect(None) assert stages == [] assert subrepo_stages != [] assert deep_subrepo_stages != []
def test_status_before_and_after_dvc_init(tmp_dir, dvc, erepo_dir): with erepo_dir.chdir(): erepo_dir.scm.repo.index.remove([".dvc"], r=True) shutil.rmtree(".dvc") erepo_dir.scm_gen("file", "first version", commit="first verison") old_rev = erepo_dir.scm.get_rev() dvc.imp(fspath(erepo_dir), "file", "file") assert dvc.status(["file.dvc"]) == {} with erepo_dir.chdir(): Repo.init() erepo_dir.scm.repo.index.remove(["file"]) os.remove("file") erepo_dir.dvc_gen("file", "second version") erepo_dir.scm.add([".dvc", "file.dvc"]) erepo_dir.scm.commit("version with dvc") new_rev = erepo_dir.scm.get_rev() assert old_rev != new_rev # Caching in external repos doesn't see upstream updates within single # cli call, so we need to clean the caches to see the changes. clean_repos() status, = dvc.status(["file.dvc"])["file.dvc"] assert status == { "changed deps": { "file ({})".format(fspath(erepo_dir)): "update available" } }
def __init__( self, baseline_rev: str, checkpoint_reset: Optional[bool] = False, **kwargs, ): from dvc.repo import Repo dvc_dir = kwargs.pop("dvc_dir") cache_dir = kwargs.pop("cache_dir") super().__init__(baseline_rev, **kwargs) self.tmp_dir = TemporaryDirectory() # init empty DVC repo (will be overwritten when input is uploaded) Repo.init(root_dir=self.tmp_dir.name, no_scm=True) logger.debug( "Init local executor in dir '%s' with baseline '%s'.", self.tmp_dir, baseline_rev[:7], ) self.dvc_dir = os.path.join(self.tmp_dir.name, dvc_dir) self._config(cache_dir) self._tree = LocalTree(self.dvc, {"url": self.dvc.root_dir}) # override default CACHE_MODE since files must be writable in order # to run repro self._tree.CACHE_MODE = 0o644 self.checkpoint_reset = checkpoint_reset self.checkpoint = False
def test_gen_dvcignore(tmp_dir): DvcRepo.init(no_scm=True) text = ( "# Add patterns of files dvc should ignore, which could improve\n" "# the performance. Learn more at\n" "# https://dvc.org/doc/user-guide/dvcignore\n" ) assert text == (tmp_dir / ".dvcignore").read_text()
def test_scm_subrepo(tmp_dir, scm): subdir = tmp_dir / "subdir" subdir.mkdir() with subdir.chdir(): Repo.init(subdir=True) scm = _scm_in_use() assert scm == "Git"
def test_html(tmp_dir, dvc_repo, html, signal_exists): if dvc_repo: from dvc.repo import Repo Repo.init(no_scm=True) dvclive.init("logs", html=html) dvclive.log("m1", 1) dvclive.next_step() assert (tmp_dir / ".dvc" / "tmp" / SIGNAL_FILE).is_file() == signal_exists
def test_html(tmp_dir, dvc_repo, html, signal_exists, monkeypatch): if dvc_repo: from dvc.repo import Repo Repo.init(no_scm=True) monkeypatch.setenv(env.DVCLIVE_PATH, "logs") monkeypatch.setenv(env.DVCLIVE_HTML, str(int(html))) dvclive.log("m1", 1) dvclive.next_step() assert (tmp_dir / ".dvc" / "tmp" / SIGNAL_FILE).is_file() == signal_exists
def test_branch_config(tmp_dir, scm): tmp_dir.scm_gen("foo", "foo", commit="init") # sanity check with pytest.raises(NotDvcRepoError): Repo().close() scm.checkout("branch", create_new=True) dvc = Repo.init() with dvc.config.edit() as conf: conf["remote"]["branch"] = {"url": "/some/path"} dvc.close() scm.add([os.path.join(".dvc", "config")]) scm.commit("init dvc") scm.checkout("master") with pytest.raises(NotDvcRepoError): Repo(rev="master").close() dvc = Repo(rev="branch") try: assert dvc.config["remote"]["branch"]["url"] == "/some/path" finally: dvc.close()
def test_show_non_plot(tmp_dir, scm, use_dvc): metric = [ {"first_val": 100, "val": 2}, {"first_val": 200, "val": 3}, ] _write_json(tmp_dir, metric, "metric.json") if use_dvc: dvc = Repo.init() else: dvc = Repo(uninitialized=True) plot_string = dvc.plots.show(targets=["metric.json"])["metric.json"] plot_content = json.loads(plot_string) assert plot_content["data"]["values"] == [ { "val": 2, PlotData.INDEX_FIELD: 0, "first_val": 100, "rev": "workspace", }, { "val": 3, PlotData.INDEX_FIELD: 1, "first_val": 200, "rev": "workspace", }, ] assert plot_content["encoding"]["x"]["field"] == PlotData.INDEX_FIELD assert plot_content["encoding"]["y"]["field"] == "val" if not use_dvc: assert not (tmp_dir / ".dvc").exists()
def test_dynamic_cache_initalization(tmp_dir, scm): dvc = Repo.init() with dvc.config.edit() as conf: conf["cache"]["ssh"] = "foo" conf["remote"]["foo"] = {"url": "remote://bar/baz"} Repo(str(tmp_dir))
def test_show_subrepo_with_preexisting_tags(tmp_dir, scm): tmp_dir.gen("foo", "foo") scm.add("foo") scm.commit("init") scm.tag("no-metrics") tmp_dir.gen({"subdir": {}}) subrepo_dir = tmp_dir / "subdir" with subrepo_dir.chdir(): dvc = Repo.init(subdir=True) scm.commit("init dvc") dvc.run( cmd="echo foo: 1 > metrics.yaml", metrics=["metrics.yaml"], single_stage=True, ) scm.add( [ str(subrepo_dir / "metrics.yaml"), str(subrepo_dir / "metrics.yaml.dvc"), ] ) scm.commit("init metrics") scm.tag("v1") expected_path = os.path.join("subdir", "metrics.yaml") assert dvc.metrics.show(all_tags=True) == { "working tree": {expected_path: {"foo": 1}}, "v1": {expected_path: {"foo": 1}}, }
def test_modified_subrepo(tmp_dir, scm, dvc): from dvc.repo import Repo tmp_dir.gen({"subdir": {"file": "first"}}) subrepo_dir = tmp_dir / "subdir" with subrepo_dir.chdir(): subrepo = Repo.init(subdir=True) subrepo.add("file") scm.add(os.path.join("subdir", "file.dvc")) scm.commit("init") (subrepo_dir / "file").write_text("second") with subrepo_dir.chdir(): subrepo = Repo() assert subrepo.diff() == { "added": [], "deleted": [], "modified": [ { "path": "file", "hash": {"old": digest("first"), "new": digest("second")}, } ], "not in cache": [], "renamed": [], }
def dvc(repo_dir, git): try: dvc = DvcRepo.init(repo_dir._root_dir) dvc.scm.commit("init dvc") yield dvc finally: dvc.scm.git.close()
def test_show_non_metric_branch(tmp_dir, scm, use_dvc): tmp_dir.scm_gen("metrics.yaml", "foo: 1.1", commit="init") with tmp_dir.branch("branch", new=True): tmp_dir.scm_gen("metrics.yaml", "foo: 2.2", commit="other") if use_dvc: dvc = Repo.init() else: dvc = Repo(uninitialized=True) assert dvc.metrics.show(targets=["metrics.yaml"], revs=["branch"]) == { "workspace": { "data": { "metrics.yaml": { "data": { "foo": 1.1 } } } }, "branch": { "data": { "metrics.yaml": { "data": { "foo": 2.2 } } } }, } if not use_dvc: assert not (tmp_dir / ".dvc").exists()
def test_walk_dont_ignore_subrepos(tmp_dir, scm, dvc): tmp_dir.dvc_gen({"foo": "foo"}, commit="add foo") subrepo_dir = tmp_dir / "subdir" subrepo_dir.mkdir() with subrepo_dir.chdir(): Repo.init(subdir=True) scm.add(["subdir"]) scm.commit("Add subrepo") dvc_fs = dvc.fs dvc._reset() scm_fs = scm.get_fs("HEAD") path = os.fspath(tmp_dir) get_dirs = itemgetter(1) assert set(get_dirs(next(dvc_fs.walk(path)))) == {".dvc", "subdir", ".git"} assert set(get_dirs(next(scm_fs.walk(path)))) == {".dvc", "subdir"}
def test_no_commits(tmp_dir): from scmrepo.git import Git from dvc.repo import Repo git = Git.init(tmp_dir.fs_path) assert git.no_commits assert Repo.init().metrics.diff() == {}
def test_no_commits(tmp_dir): from dvc.repo import Repo from dvc.scm.git import Git from tests.dir_helpers import git_init git_init(".") assert Git().no_commits assert Repo.init().metrics.diff() == {}
def test_show_non_metric(tmp_dir, scm, use_dvc): tmp_dir.gen("metrics.yaml", "foo: 1.1") if use_dvc: dvc = Repo.init() else: dvc = Repo(uninitialized=True) assert dvc.metrics.show(targets=["metrics.yaml"]) == { "": {"metrics.yaml": {"foo": 1.1}} }
def test_ignore_resurface_subrepo(tmp_dir, scm, dvc): tmp_dir.dvc_gen({"foo": "foo"}, commit="add foo") subrepo_dir = tmp_dir / "subdir" subrepo_dir.mkdir() with subrepo_dir.chdir(): Repo.init(subdir=True) dvc.tree._reset() dirs = ["subdir"] files = ["foo"] assert dvc.tree.dvcignore(os.fspath(tmp_dir), dirs, files) == ([], files) assert dvc.tree.dvcignore(os.fspath(tmp_dir), dirs, files, ignore_subrepos=False) == (dirs, files) assert dvc.tree.dvcignore.is_ignored_dir(os.fspath(subrepo_dir)) assert not dvc.tree.dvcignore.is_ignored_dir(os.fspath(subrepo_dir), ignore_subrepos=False)
def test_walk_dont_ignore_subrepos(tmp_dir, scm, dvc): tmp_dir.dvc_gen({"foo": "foo"}, commit="add foo") subrepo_dir = tmp_dir / "subdir" subrepo_dir.mkdir() with subrepo_dir.chdir(): Repo.init(subdir=True) scm.add(["subdir"]) scm.commit("Add subrepo") dvc_fs = dvc.fs dvc_fs._reset() scm_fs = scm.get_fs("HEAD", use_dvcignore=True) path = os.fspath(tmp_dir) get_dirs = itemgetter(1) assert get_dirs(next(dvc_fs.walk(path))) == [] assert get_dirs(next(scm_fs.walk(path))) == [] kw = {"ignore_subrepos": False} assert get_dirs(next(dvc_fs.walk(path, **kw))) == ["subdir"] assert get_dirs(next(scm_fs.walk(path, **kw))) == ["subdir"]
def test_walk_dont_ignore_subrepos(tmp_dir, scm, dvc): tmp_dir.dvc_gen({"foo": "foo"}, commit="add foo") subrepo_dir = tmp_dir / "subdir" subrepo_dir.mkdir() with subrepo_dir.chdir(): Repo.init(subdir=True) scm.add(["subdir"]) scm.commit("Add subrepo") dvc_tree = dvc.tree dvc_tree.__dict__.pop("dvcignore") scm_tree = scm.get_tree("HEAD", use_dvcignore=True) path = os.fspath(tmp_dir) get_dirs = itemgetter(1) assert get_dirs(next(dvc_tree.walk(path))) == [] assert get_dirs(next(scm_tree.walk(path))) == [] kw = dict(ignore_subrepos=False) assert get_dirs(next(dvc_tree.walk(path, **kw))) == ["subdir"] assert get_dirs(next(scm_tree.walk(path, **kw))) == ["subdir"]
def test_show_non_plot(tmp_dir, scm, use_dvc): metric = [{"first_val": 100, "val": 2}, {"first_val": 200, "val": 3}] (tmp_dir / "metric.json").dump_json(metric, sort_keys=True) if use_dvc: dvc = Repo.init() else: dvc = Repo(uninitialized=True) plots = dvc.plots.show(targets=["metric.json"]) assert plots["workspace"]["data"]["metric.json"]["data"] == metric
def test_ignore_resurface_subrepo(tmp_dir, scm, dvc): tmp_dir.dvc_gen({"foo": "foo"}, commit="add foo") subrepo_dir = tmp_dir / "subdir" subrepo_dir.mkdir() with subrepo_dir.chdir(): Repo.init(subdir=True) subrepo_dir.gen({"bar": {"bar": "bar"}}) dvc._reset() files = ["foo"] dirs = ["bar"] root = os.fspath(subrepo_dir) assert dvc.dvcignore(root, dirs, files, ignore_subrepos=False) == ( dirs, files, ) assert dvc.dvcignore(root, dirs, files) == ([], []) assert dvc.dvcignore.is_ignored_dir(os.fspath(subrepo_dir / "bar")) assert not dvc.dvcignore.is_ignored_dir(os.fspath(subrepo_dir / "bar"), ignore_subrepos=False)
def run(self): from dvc.repo import Repo from dvc.exceptions import InitError try: self.repo = Repo.init( ".", no_scm=self.args.no_scm, force=self.args.force ) self.config = self.repo.config except InitError: logger.exception("failed to initiate dvc") return 1 return 0
def test_parent_repo_collect_stages(tmp_dir, scm, dvc): tmp_dir.gen({"subdir": {}}) subrepo_dir = tmp_dir / "subdir" with subrepo_dir.chdir(): subrepo = Repo.init(subdir=True) subrepo_dir.gen("subrepo_file", "subrepo file content") subrepo.add("subrepo_file") stages = dvc.collect(None) subrepo_stages = subrepo.collect(None) assert stages == [] assert subrepo_stages != []
def setUp(self): super().setUp() self.additional_path = TestDir.mkdtemp() self.additional_git = Repo.init(self.additional_path) self.additional_dvc = DvcRepo.init(self.additional_path) cache_path = os.path.join(self._root_dir, ".dvc", "cache") config_path = os.path.join(self.additional_path, ".dvc", "config.local") cfg = configobj.ConfigObj() cfg.filename = config_path cfg["cache"] = {"dir": cache_path} cfg.write() self.additional_dvc = DvcRepo(self.additional_path)
def test_ignore_subrepo(tmp_dir, scm, dvc): tmp_dir.gen({".dvcignore": "foo", "subdir": {"foo": "foo"}}) scm.add([".dvcignore"]) scm.commit("init parent dvcignore") subrepo_dir = tmp_dir / "subdir" assert not dvc.tree.exists(subrepo_dir / "foo") with subrepo_dir.chdir(): subrepo = Repo.init(subdir=True) scm.add(str(subrepo_dir / "foo")) scm.commit("subrepo init") for _ in subrepo.brancher(all_commits=True): assert subrepo.tree.exists(subrepo_dir / "foo")
def run(self): from dvc.exceptions import InitError from dvc.repo import Repo try: with Repo.init( ".", no_scm=self.args.no_scm, force=self.args.force, subdir=self.args.subdir, ) as repo: self.config = repo.config _welcome_message() except InitError: logger.exception("failed to initiate DVC") return 1 return 0
def init(self, *, scm=False, dvc=False): from dvc.repo import Repo from dvc.scm.git import Git assert not scm or not hasattr(self, "scm") assert not dvc or not hasattr(self, "dvc") str_path = fspath(self) if scm: _git_init(str_path) if dvc: self.dvc = Repo.init(str_path, no_scm=True) if scm: self.scm = self.dvc.scm if hasattr(self, "dvc") else Git(str_path) if dvc and hasattr(self, "scm"): self.scm.commit("init dvc")
def test_cleanfs_subrepo(tmp_dir, dvc, scm, monkeypatch): tmp_dir.gen({"subdir": {}}) subrepo_dir = tmp_dir / "subdir" with subrepo_dir.chdir(): subrepo = Repo.init(subdir=True) subrepo_dir.gen({"foo": "foo", "dir": {"bar": "bar"}}) path = PathInfo(subrepo_dir) assert dvc.fs.exists(path / "foo") assert dvc.fs.isfile(path / "foo") assert dvc.fs.exists(path / "dir") assert dvc.fs.isdir(path / "dir") assert subrepo.fs.exists(path / "foo") assert subrepo.fs.isfile(path / "foo") assert subrepo.fs.exists(path / "dir") assert subrepo.fs.isdir(path / "dir")