def is_hardlink(path): path = fspath(path) if System.is_unix(): return os.stat(path).st_nlink > 1 info = System._getdirinfo(path) return info.nNumberOfLinks > 1
def test_non_cached_output(tmp_dir, erepo_dir, monkeypatch): src = "non_cached_file" dst = src + "_imported" with monkeypatch.context() as m: m.chdir(fspath(erepo_dir)) erepo_dir.dvc.run( outs_no_cache=[src], cmd="echo hello > non_cached_file" ) erepo_dir.scm.add([src, src + ".dvc"]) erepo_dir.scm.commit("add non-cached output") Repo.get(fspath(erepo_dir), src, dst) assert (tmp_dir / dst).is_file() # NOTE: using strip() to account for `echo` differences on win and *nix assert (tmp_dir / dst).read_text().strip() == "hello"
def test_import_rev(tmp_dir, scm, dvc, erepo_dir): with erepo_dir.chdir(), erepo_dir.branch("branch", new=True): erepo_dir.dvc_gen("foo", "foo content", commit="create foo on branch") dvc.imp(fspath(erepo_dir), "foo", "foo_imported", rev="branch") assert (tmp_dir / "foo_imported").read_text() == "foo content" assert scm.repo.git.check_ignore("foo_imported")
def test_get_repo_file(tmp_dir, erepo_dir): with erepo_dir.chdir(): erepo_dir.dvc_gen("file", "contents", commit="create file") Repo.get(fspath(erepo_dir), "file", "file_imported") assert os.path.isfile("file_imported") assert (tmp_dir / "file_imported").read_text() == "contents"
def test_import_git_file(erepo_dir, tmp_dir, dvc, scm, src_is_dvc): if not src_is_dvc: erepo_dir.dvc.scm.repo.index.remove([".dvc"], r=True) erepo_dir.dvc.scm.commit("remove .dvc") src = "some_file" dst = "some_file_imported" erepo_dir.scm_gen({src: "hello"}, commit="add a regular file") tmp_dir.dvc.imp(fspath(erepo_dir), src, dst) assert (tmp_dir / dst).is_file() assert filecmp.cmp(fspath(erepo_dir / src), fspath(tmp_dir / dst), shallow=False) assert tmp_dir.scm.repo.git.check_ignore(fspath(tmp_dir / dst))
def test_should_remove_outs_before_import(tmp_dir, dvc, mocker, erepo_dir): erepo_dir.gen({"foo": "foo"}) remove_outs_call_counter = mocker.spy(Stage, "remove_outs") ret = main(["import-url", fspath(erepo_dir / "foo")]) assert ret == 0 assert remove_outs_call_counter.mock.call_count == 1
def local_remote(request, tmp_dir, dvc, make_tmp_dir): path = make_tmp_dir("local-remote") with dvc.config.edit() as conf: conf["remote"]["upstream"] = {"url": fspath(path)} conf["core"]["remote"] = "upstream" if "scm" in request.fixturenames: tmp_dir.scm_add([dvc.config.files["repo"]], commit="add remote") return path
def test_pull_non_workspace(tmp_dir, scm, dvc, erepo_dir): with erepo_dir.chdir(): erepo_dir.dvc_gen("foo", "master content", commit="create foo") with erepo_dir.branch("branch", new=True): erepo_dir.dvc_gen("foo", "branch content", commit="modify foo") stage = dvc.imp(fspath(erepo_dir), "foo", "foo_imported", rev="branch") tmp_dir.scm_add([stage.relpath], commit="imported branch") scm.tag("ref-to-branch") # Overwrite via import dvc.imp(fspath(erepo_dir), "foo", "foo_imported", rev="master") os.remove(stage.outs[0].cache_path) dvc.fetch(all_tags=True) assert os.path.exists(stage.outs[0].cache_path)
def test_update_import(tmp_dir, dvc, erepo_dir, cached): gen = erepo_dir.dvc_gen if cached else erepo_dir.scm_gen with erepo_dir.branch("branch", new=True), erepo_dir.chdir(): gen("version", "branch", "add version file") old_rev = erepo_dir.scm.get_rev() stage = dvc.imp(fspath(erepo_dir), "version", "version", rev="branch") imported = tmp_dir / "version" assert imported.is_file() assert imported.read_text() == "branch" assert stage.deps[0].def_repo == { "url": fspath(erepo_dir), "rev": "branch", "rev_lock": old_rev, } with erepo_dir.branch("branch", new=False), erepo_dir.chdir(): gen("version", "updated", "update version content") new_rev = erepo_dir.scm.get_rev() assert old_rev != new_rev # Caching in external repos doesn't see upstream updates within single # cli call, so we need to clean the caches to see the changes. clean_repos() status, = dvc.status([stage.path])["version.dvc"] changed_dep, = list(status["changed deps"].items()) assert changed_dep[0].startswith("version ") assert changed_dep[1] == "update available" dvc.update(stage.path) assert dvc.status([stage.path]) == {} assert imported.is_file() assert imported.read_text() == "updated" stage = Stage.load(dvc, stage.path) assert stage.deps[0].def_repo == { "url": fspath(erepo_dir), "rev": "branch", "rev_lock": new_rev, }
def dvc(tmp_dir, request): from dvc.repo import Repo if "scm" in request.fixturenames: if not hasattr(tmp_dir, "scm"): _git_init() dvc = Repo.init(fspath(tmp_dir)) dvc.scm.commit("init dvc") else: dvc = Repo.init(fspath(tmp_dir), no_scm=True) try: tmp_dir.dvc = dvc yield dvc finally: dvc.close()
def test_get_url_requires_dvc(tmp_dir, scm): tmp_dir.scm_gen({"foo": "foo"}, commit="initial") with pytest.raises(UrlNotDvcRepoError, match="not a DVC repository"): api.get_url("foo", repo=fspath(tmp_dir)) with pytest.raises(UrlNotDvcRepoError): api.get_url("foo", repo="file://{}".format(tmp_dir))
def test_get_url_positive(tmp_dir, erepo_dir, caplog): with erepo_dir.chdir(): erepo_dir.dvc_gen("foo", "foo") caplog.clear() with caplog.at_level(logging.ERROR, logger="dvc"): assert main(["get", fspath(erepo_dir), "foo", "--show-url"]) == 0 assert caplog.text == ""
def test_cache_type_is_properly_overridden(tmp_dir, erepo_dir, monkeypatch): with monkeypatch.context() as m: m.chdir(fspath(erepo_dir)) erepo_dir.dvc.config.set( Config.SECTION_CACHE, Config.SECTION_CACHE_TYPE, "symlink" ) erepo_dir.dvc.cache = Cache(erepo_dir.dvc) erepo_dir.scm_add( [erepo_dir.dvc.config.config_file], "set cache type to symlinks" ) erepo_dir.dvc_gen("file", "contents", "create file") assert System.is_symlink(erepo_dir / "file") Repo.get(fspath(erepo_dir), "file", "file_imported") assert not System.is_symlink("file_imported") assert (tmp_dir / "file_imported").read_text() == "contents"
def test_update_import_after_remote_updates_to_dvc(tmp_dir, dvc, erepo_dir): old_rev = None with erepo_dir.branch("branch", new=True), erepo_dir.chdir(): erepo_dir.scm_gen("version", "branch", commit="add version file") old_rev = erepo_dir.scm.get_rev() stage = dvc.imp(fspath(erepo_dir), "version", "version", rev="branch") imported = tmp_dir / "version" assert imported.is_file() assert imported.read_text() == "branch" assert stage.deps[0].def_repo == { "url": fspath(erepo_dir), "rev": "branch", "rev_lock": old_rev, } new_rev = None with erepo_dir.branch("branch", new=False), erepo_dir.chdir(): erepo_dir.scm.repo.index.remove(["version"]) erepo_dir.dvc_gen("version", "updated") erepo_dir.scm.add(["version", "version.dvc"]) erepo_dir.scm.commit("upgrade to DVC tracking") new_rev = erepo_dir.scm.get_rev() assert old_rev != new_rev (status,) = dvc.status([stage.path])["version.dvc"] (changed_dep,) = list(status["changed deps"].items()) assert changed_dep[0].startswith("version ") assert changed_dep[1] == "update available" dvc.update(stage.path) assert dvc.status([stage.path]) == {} assert imported.is_file() assert imported.read_text() == "updated" stage = Stage.load(dvc, stage.path) assert stage.deps[0].def_repo == { "url": fspath(erepo_dir), "rev": "branch", "rev_lock": new_rev, }
def test_plot_multiple_revs(tmp_dir, scm, dvc): shutil.copy(fspath(tmp_dir / ".dvc" / "plot" / "default.json"), "template.json") metric_1 = [{"y": 2}, {"y": 3}] _write_json(tmp_dir, metric_1, "metric.json") _run_with_metric(tmp_dir, "metric.json", "init", "v1") metric_2 = [{"y": 3}, {"y": 5}] _write_json(tmp_dir, metric_2, "metric.json") _run_with_metric(tmp_dir, "metric.json", "second", "v2") metric_3 = [{"y": 5}, {"y": 6}] _write_json(tmp_dir, metric_3, "metric.json") _run_with_metric(tmp_dir, "metric.json", "third") plot_string = dvc.plot( "metric.json", template="template.json", revisions=["HEAD", "v2", "v1"], ) plot_content = json.loads(plot_string) assert plot_content["data"]["values"] == [ { "y": 5, PlotData.INDEX_FIELD: 0, "rev": "HEAD" }, { "y": 6, PlotData.INDEX_FIELD: 1, "rev": "HEAD" }, { "y": 3, PlotData.INDEX_FIELD: 0, "rev": "v2" }, { "y": 5, PlotData.INDEX_FIELD: 1, "rev": "v2" }, { "y": 2, PlotData.INDEX_FIELD: 0, "rev": "v1" }, { "y": 3, PlotData.INDEX_FIELD: 1, "rev": "v1" }, ] assert plot_content["encoding"]["x"]["field"] == PlotData.INDEX_FIELD assert plot_content["encoding"]["y"]["field"] == "y"
def test_external_dir_resource_on_no_cache(tmp_dir, dvc, tmp_path_factory): # https://github.com/iterative/dvc/issues/2647, is some situations # (external dir dependency) cache is required to calculate dir md5 external_dir = tmp_path_factory.mktemp("external_dir") (external_dir / "file").write_text("content") dvc.cache.local = None with pytest.raises(RemoteCacheRequiredError): dvc.run(deps=[fspath(external_dir)])
def pull_to(self, path, to_info): try: # Git handled files can't have absolute path if os.path.isabs(path): raise FileNotFoundError fs_copy(os.path.join(self.root_dir, path), fspath(to_info)) except FileNotFoundError: raise PathMissingError(path, self.url)
def test_import(tmp_dir, scm, dvc, erepo_dir): with erepo_dir.chdir(): erepo_dir.dvc_gen("foo", "foo content", commit="create foo") dvc.imp(fspath(erepo_dir), "foo", "foo_imported") assert os.path.isfile("foo_imported") assert (tmp_dir / "foo_imported").read_text() == "foo content" assert scm.repo.git.check_ignore("foo_imported")
def test_import_cached_file(erepo_dir, tmp_dir, dvc, scm, monkeypatch): src = "some_file" dst = "some_file_imported" with erepo_dir.chdir(): erepo_dir.dvc_gen({src: "hello"}, commit="add a regular file") tmp_dir.dvc_gen({dst: "hello"}) (tmp_dir / dst).unlink() remote_exception = NoRemoteError("dvc import") with patch.object(cloud.DataCloud, "pull", side_effect=remote_exception): tmp_dir.dvc.imp(fspath(erepo_dir), src, dst) assert (tmp_dir / dst).is_file() assert filecmp.cmp(fspath(erepo_dir / src), fspath(tmp_dir / dst), shallow=False)
def reflink(source, link_name): source, link_name = fspath(source), fspath(link_name) system = platform.system() try: if system == "Windows": ret = System._reflink_windows(source, link_name) elif system == "Darwin": ret = System._reflink_darwin(source, link_name) elif system == "Linux": ret = System._reflink_linux(source, link_name) else: ret = -1 except IOError: ret = -1 if ret != 0: raise DvcException("reflink is not supported")
def test_ls_repo_with_target_dir(tmp_dir, dvc, scm): tmp_dir.scm_gen(FS_STRUCTURE, commit="init") tmp_dir.dvc_gen(DVC_STRUCTURE, commit="dvc") files = Repo.ls(fspath(tmp_dir), target="model") match_files( files, (("script.py", ), ("train.py", ), ("people.csv", ), ("people.csv.dvc", )), )
def test_import_stage_accompanies_target(tmp_dir, dvc, erepo_dir): with erepo_dir.chdir(): erepo_dir.dvc_gen("file1", "file1 content", commit="commit file") tmp_dir.gen({"dir": {}}) erepo = {"url": fspath(erepo_dir)} dvc.imp_url("file1", out=os.path.join("dir", "imported_file"), erepo=erepo) assert (tmp_dir / "dir" / "imported_file").exists() assert (tmp_dir / "dir" / "imported_file.dvc").exists()
def test_cache_type_is_properly_overridden(tmp_dir, scm, dvc, erepo_dir, monkeypatch): with monkeypatch.context() as m: m.chdir(fspath(erepo_dir)) erepo_dir.dvc.config.set(Config.SECTION_CACHE, Config.SECTION_CACHE_TYPE, "symlink") erepo_dir.dvc.cache = Cache(erepo_dir.dvc) erepo_dir.scm_add( [erepo_dir.dvc.config.config_file], "set source repo cache type to symlink", ) erepo_dir.dvc_gen("foo", "foo content", "create foo") assert System.is_symlink(erepo_dir / "foo") dvc.imp(fspath(erepo_dir), "foo", "foo_imported") assert not System.is_symlink("foo_imported") assert (tmp_dir / "foo_imported").read_text() == "foo content" assert scm.repo.git.check_ignore("foo_imported")
def test_import_git_dir(erepo_dir, tmp_dir, dvc, scm, src_is_dvc): if not src_is_dvc: erepo_dir.dvc.scm.repo.index.remove([".dvc"], r=True) erepo_dir.dvc.scm.commit("remove .dvc") src = "some_directory" dst = "some_directory_imported" erepo_dir.scm_gen({src: {"file.txt": "hello"}}, commit="add a dir") stage = dvc.imp(fspath(erepo_dir), src, dst) assert (tmp_dir / dst).is_dir() trees_equal(fspath(erepo_dir / src), fspath(tmp_dir / dst)) assert tmp_dir.scm.repo.git.check_ignore(fspath(tmp_dir / dst)) assert stage.deps[0].def_repo == { "url": fspath(erepo_dir), "rev_lock": erepo_dir.scm.get_rev(), }
def test_get_to_dir(tmp_dir, erepo_dir, dname): with erepo_dir.chdir(): erepo_dir.dvc_gen("file", "contents", commit="create file") makedirs(dname, exist_ok=True) Repo.get(fspath(erepo_dir), "file", dname) assert (tmp_dir / dname).is_dir() assert (tmp_dir / dname / "file").read_text() == "contents"
def test_get_git_file(tmp_dir, erepo_dir): src = "some_file" dst = "some_file_imported" erepo_dir.scm_gen({src: "hello"}, commit="add a regular file") Repo.get(fspath(erepo_dir), src, dst) assert (tmp_dir / dst).is_file() assert (tmp_dir / dst).read_text() == "hello"
def test_ls_repo_with_path_subdir_outs_only_recursive(tmp_dir, dvc, scm): tmp_dir.scm_gen(FS_STRUCTURE, commit="init") tmp_dir.dvc_gen(DVC_STRUCTURE, commit="dvc") path = os.path.join("data", "subcontent") files = Repo.ls(fspath(tmp_dir), path, outs_only=True, recursive=True) match_files(files, ( (("data.xml", ), True), (("statistics", "data.csv"), True), ))
def test_gc_cloud_positive(tmp_dir, dvc, tmp_path_factory): with dvc.config.edit() as conf: storage = fspath(tmp_path_factory.mktemp("test_remote_base")) conf["remote"]["local_remote"] = {"url": storage} conf["core"]["remote"] = "local_remote" dvc.push() for flag in ["-cw", "-ca", "-cT", "-caT", "-cwT"]: assert main(["gc", "-vf", flag]) == 0
def test_ls_repo_with_target_subdir_outs_only(tmp_dir, dvc, scm): tmp_dir.scm_gen(FS_STRUCTURE, commit="init") tmp_dir.dvc_gen(DVC_STRUCTURE, commit="dvc") target = os.path.join("data", "subcontent") files = Repo.ls(fspath(tmp_dir), target, outs_only=True) match_files(files, ( (("data.xml", ), True), (("statistics", ), False), ))
def test_update_import_url(tmp_dir, dvc, tmp_path_factory): import_src = tmp_path_factory.mktemp("import_url_source") src = import_src / "file" src.write_text("file content") dst = tmp_dir / "imported_file" stage = dvc.imp_url(fspath(src), fspath(dst)) assert dst.is_file() assert dst.read_text() == "file content" # update data src.write_text("updated file content") assert dvc.status([stage.path]) == {} dvc.update(stage.path) assert dvc.status([stage.path]) == {} assert dst.is_file() assert dst.read_text() == "updated file content"