def test_import_url_dir(tmp_dir, dvc, workspace, stage_md5, dir_md5): workspace.gen({"dir": {"file": "file", "subdir": {"subfile": "subfile"}}}) # remove external cache to make sure that we don't need it to import dirs with dvc.config.edit() as conf: del conf["cache"] dvc.odb = ODBManager(dvc) assert not (tmp_dir / "dir").exists() # sanity check dvc.imp_url("remote://workspace/dir") assert set(os.listdir(tmp_dir / "dir")) == {"file", "subdir"} assert (tmp_dir / "dir" / "file").read_text() == "file" assert list(os.listdir(tmp_dir / "dir" / "subdir")) == ["subfile"] assert (tmp_dir / "dir" / "subdir" / "subfile").read_text() == "subfile" assert (tmp_dir / "dir.dvc").read_text() == ( f"md5: {stage_md5}\n" "frozen: true\n" "deps:\n" f"- md5: {dir_md5}\n" " size: 11\n" " nfiles: 2\n" " path: remote://workspace/dir\n" "outs:\n" "- md5: b6dcab6ccd17ca0a8bf4a215a37d14cc.dir\n" " size: 11\n" " nfiles: 2\n" " path: dir\n") assert dvc.status() == {}
def test_windows_should_add_when_cache_on_different_drive( tmp_dir, dvc, temporary_windows_drive): dvc.config["cache"]["dir"] = temporary_windows_drive dvc.odb = ODBManager(dvc) (stage, ) = tmp_dir.dvc_gen({"file": "file"}) cache_path = stage.outs[0].cache_path assert path_isin(cache_path, temporary_windows_drive) assert os.path.isfile(cache_path) filecmp.cmp("file", cache_path)
def test_cache_type_is_properly_overridden(tmp_dir, erepo_dir): with erepo_dir.chdir(): with erepo_dir.dvc.config.edit() as conf: conf["cache"]["type"] = "symlink" erepo_dir.dvc.odb = ODBManager(erepo_dir.dvc) erepo_dir.scm_add([erepo_dir.dvc.config.files["repo"]], "set cache type to symlinks") erepo_dir.dvc_gen("file", "contents", "create file") assert System.is_symlink(erepo_dir / "file") Repo.get(os.fspath(erepo_dir), "file", "file_imported") assert not System.is_symlink("file_imported") assert (tmp_dir / "file_imported").read_text() == "contents"
def test_shared_stage_cache(tmp_dir, dvc, run_copy): import stat from dvc.objects.db import ODBManager tmp_dir.gen("foo", "foo") with dvc.config.edit() as config: config["cache"]["shared"] = "group" dvc.odb = ODBManager(dvc) assert not os.path.exists(dvc.odb.local.cache_dir) run_copy("foo", "bar", name="copy-foo-bar") parent_cache_dir = os.path.join( dvc.stage_cache.cache_dir, "88", ) cache_dir = os.path.join( parent_cache_dir, "883395068439203a9de3d1e1649a16e9027bfd1ab5dab4f438d321c4a928b328", ) cache_file = os.path.join( cache_dir, "e42b7ebb9bc5ac4bccab769c8d1338914dad25d7ffecc8671dbd4581bad4aa15", ) # sanity check assert os.path.isdir(cache_dir) assert os.listdir(cache_dir) == [os.path.basename(cache_file)] assert os.path.isfile(cache_file) def _mode(path): return stat.S_IMODE(os.stat(path).st_mode) if os.name == "nt": dir_mode = 0o777 file_mode = 0o666 else: dir_mode = 0o2775 file_mode = 0o664 assert _mode(dvc.odb.local.cache_dir) == dir_mode assert _mode(dvc.stage_cache.cache_dir) == dir_mode assert _mode(parent_cache_dir) == dir_mode assert _mode(cache_dir) == dir_mode assert _mode(cache_file) == file_mode
def test_destroy(tmp_dir, dvc, run_copy): dvc.config["cache"]["type"] = ["symlink"] dvc.odb = ODBManager(dvc) tmp_dir.dvc_gen("file", "text") tmp_dir.dvc_gen({"dir": {"file": "lorem", "subdir/file": "ipsum"}}) run_copy("file", "file2", single_stage=True) run_copy("file2", "file3", name="copy-file2-file3") run_copy("file3", "file4", name="copy-file3-file4") dvc.destroy() # Remove all the files related to DVC for path in [ ".dvc", "file.dvc", "file2.dvc", "dir.dvc", PIPELINE_FILE, PIPELINE_LOCK, ]: assert not (tmp_dir / path).exists() # Leave the rest of the files for path in [ "file", "file2", "file3", "file4", "dir/file", "dir/subdir/file", ]: assert (tmp_dir / path).is_file() # Make sure that data was unprotected after `destroy` for path in [ "file", "file2", "file3", "file4", "dir", "dir/file", "dir/subdir", "dir/subdir/file", ]: assert not System.is_symlink(tmp_dir / path)
def test_cache_type_is_properly_overridden(tmp_dir, scm, dvc, erepo_dir): with erepo_dir.chdir(): with erepo_dir.dvc.config.edit() as conf: conf["cache"]["type"] = "symlink" erepo_dir.dvc.odb = ODBManager(erepo_dir.dvc) erepo_dir.scm_add( [erepo_dir.dvc.config.files["repo"]], "set source repo cache type to symlink", ) erepo_dir.dvc_gen("foo", "foo content", "create foo") assert System.is_symlink(erepo_dir / "foo") dvc.imp(os.fspath(erepo_dir), "foo", "foo_imported") assert not System.is_symlink("foo_imported") assert (tmp_dir / "foo_imported").read_text() == "foo content" assert scm.is_ignored("foo_imported")
def _make_workspace(name, typ="local"): from dvc.objects.db import ODBManager cloud = make_cloud(typ) # pylint: disable=W0621 tmp_dir.add_remote(name=name, config=cloud.config, default=False) tmp_dir.add_remote(name=f"{name}-cache", url="remote://workspace/cache", default=False) scheme = getattr(cloud, "scheme", "local") if scheme != "http": with dvc.config.edit() as conf: conf["cache"][scheme] = f"{name}-cache" dvc.odb = ODBManager(dvc) return cloud
def workspace(tmp_dir, dvc, request): from dvc.objects.db import ODBManager cloud = request.param assert cloud tmp_dir.add_remote(name="workspace", config=cloud.config, default=False) tmp_dir.add_remote(name="cache", url="remote://workspace/cache", default=False) scheme = getattr(cloud, "scheme", "local") if scheme != "http": with dvc.config.edit() as conf: conf["cache"][scheme] = "cache" dvc.odb = ODBManager(dvc) return cloud
def test_shared_cache(tmp_dir, dvc, group): from dvc.utils.fs import umask if group: with dvc.config.edit() as conf: conf["cache"].update({"shared": "group"}) dvc.odb = ODBManager(dvc) cache_dir = dvc.odb.local.cache_dir assert not os.path.exists(cache_dir) tmp_dir.dvc_gen({ "file": "file content", "dir": { "file2": "file 2 " "content" } }) actual = {} for root, dnames, fnames in os.walk(cache_dir): for name in dnames + fnames: path = os.path.join(root, name) actual[path] = oct(stat.S_IMODE(os.stat(path).st_mode)) file_mode = oct(0o444) dir_mode = oct(0o2775 if group else (0o777 & ~umask)) expected = { os.path.join(cache_dir, "17"): dir_mode, os.path.join(cache_dir, "17", "4eaa1dd94050255b7b98a7e1924b31.dir"): file_mode, os.path.join(cache_dir, "97"): dir_mode, os.path.join(cache_dir, "97", "e17781c198500e2766ea56bd697c03"): file_mode, os.path.join(cache_dir, "d1"): dir_mode, os.path.join(cache_dir, "d1", "0b4c3ff123b26dc068d43a8bef2d23"): file_mode, } assert expected == actual
def test_get(self): cache = ODBManager(self.dvc).local.hash_to_path(self.cache1_md5) self.assertEqual(os.fspath(cache), self.cache1)
def test_all(self): md5_list = list(ODBManager(self.dvc).local.all()) self.assertEqual(len(md5_list), 2) self.assertIn(self.cache1_md5, md5_list) self.assertIn(self.cache2_md5, md5_list)
def __init__( self, root_dir=None, scm=None, rev=None, subrepos=False, uninitialized=False, config=None, url=None, repo_factory=None, ): from dvc.config import Config from dvc.data_cloud import DataCloud from dvc.fs.local import LocalFileSystem from dvc.lock import LockNoop, make_lock from dvc.machine import MachineManager from dvc.objects.db import ODBManager from dvc.repo.live import Live from dvc.repo.metrics import Metrics from dvc.repo.params import Params from dvc.repo.plots import Plots from dvc.repo.stage import StageLoad from dvc.scm import SCM from dvc.stage.cache import StageCache from dvc.state import State, StateNoop self.url = url self._fs_conf = {"repo_factory": repo_factory} if rev and not scm: scm = SCM(root_dir or os.curdir) self.root_dir, self.dvc_dir, self.tmp_dir = self._get_repo_dirs( root_dir=root_dir, scm=scm, rev=rev, uninitialized=uninitialized ) if scm: self._fs = scm.get_fs(rev) else: self._fs = LocalFileSystem(url=self.root_dir) self.config = Config(self.dvc_dir, fs=self.fs, config=config) self._uninitialized = uninitialized self._scm = scm # used by RepoFileSystem to determine if it should traverse subrepos self.subrepos = subrepos self.cloud = DataCloud(self) self.stage = StageLoad(self) if scm or not self.dvc_dir: self.lock = LockNoop() self.state = StateNoop() self.odb = ODBManager(self) else: self.lock = make_lock( os.path.join(self.tmp_dir, "lock"), tmp_dir=self.tmp_dir, hardlink_lock=self.config["core"].get("hardlink_lock", False), friendly=True, ) # NOTE: storing state and link_state in the repository itself to # avoid any possible state corruption in 'shared cache dir' # scenario. self.state = State(self.root_dir, self.tmp_dir, self.dvcignore) self.odb = ODBManager(self) self.stage_cache = StageCache(self) self._ignore() self.metrics = Metrics(self) self.plots = Plots(self) self.params = Params(self) self.live = Live(self) if self.tmp_dir and ( self.config["feature"].get("machine", False) or env2bool("DVC_TEST") ): self.machine = MachineManager(self) else: self.machine = None self.stage_collection_error_handler: Optional[ Callable[[str, Exception], None] ] = None self._lock_depth = 0
def __init__( self, root_dir=None, scm=None, rev=None, subrepos=False, uninitialized=False, config=None, url=None, repo_factory=None, ): from dvc.config import Config from dvc.data_cloud import DataCloud from dvc.fs.git import GitFileSystem from dvc.fs.local import LocalFileSystem from dvc.lock import LockNoop, make_lock from dvc.objects.db import ODBManager from dvc.repo.live import Live from dvc.repo.metrics import Metrics from dvc.repo.params import Params from dvc.repo.plots import Plots from dvc.repo.stage import StageLoad from dvc.scm import SCM, Git from dvc.stage.cache import StageCache from dvc.state import State, StateNoop self.url = url self._fs_conf = {"repo_factory": repo_factory} if rev and not scm: scm = SCM(root_dir or os.curdir) self.root_dir, self.dvc_dir, self.tmp_dir = self._get_repo_dirs( root_dir=root_dir, scm=scm, rev=rev, uninitialized=uninitialized ) if scm: assert isinstance(scm, Git) self._fs = GitFileSystem(scm=scm, rev=rev) else: self._fs = LocalFileSystem(url=self.root_dir) self.config = Config(self.dvc_dir, fs=self.fs, config=config) self._uninitialized = uninitialized self._scm = scm # used by RepoFileSystem to determine if it should traverse subrepos self.subrepos = subrepos self.cloud = DataCloud(self) self.stage = StageLoad(self) if scm or not self.dvc_dir: self.lock = LockNoop() self.state = StateNoop() self.odb = ODBManager(self) else: self.lock = make_lock( os.path.join(self.tmp_dir, "lock"), tmp_dir=self.tmp_dir, hardlink_lock=self.config["core"].get("hardlink_lock", False), friendly=True, ) state_db_dir = self._get_database_dir("state") self.state = State(self.root_dir, state_db_dir, self.dvcignore) self.odb = ODBManager(self) self.stage_cache = StageCache(self) self._ignore() self.metrics = Metrics(self) self.plots = Plots(self) self.params = Params(self) self.live = Live(self) self.stage_collection_error_handler: Optional[ Callable[[str, Exception], None] ] = None self._lock_depth = 0