Esempio n. 1
0
def test_cache(tmp_dir, dvc):
    cache1_md5 = "123"
    cache2_md5 = "234"
    cache1 = os.path.join(
        dvc.odb.local.cache_dir,
        cache1_md5[0:2],
        cache1_md5[2:],
    )
    cache2 = os.path.join(
        dvc.odb.local.cache_dir,
        cache2_md5[0:2],
        cache2_md5[2:],
    )
    tmp_dir.gen({cache1: "1", cache2: "2"})

    assert os.path.exists(cache1)
    assert os.path.exists(cache2)

    odb = ODBManager(dvc)

    md5_list = list(odb.local.all())
    assert len(md5_list) == 2
    assert cache1_md5 in md5_list
    assert cache2_md5 in md5_list

    odb_cache1 = odb.local.hash_to_path(cache1_md5)
    odb_cache2 = odb.local.hash_to_path(cache2_md5)
    assert os.fspath(odb_cache1) == cache1
    assert os.fspath(odb_cache2) == cache2
Esempio n. 2
0
def test_cache_link_type(tmp_dir, scm, dvc):
    with dvc.config.edit() as conf:
        conf["cache"]["type"] = "reflink,copy"
    dvc.odb = ODBManager(dvc)

    stages = tmp_dir.dvc_gen({"foo": "foo"})
    assert len(stages) == 1
    assert (tmp_dir / "foo").read_text().strip() == "foo"
Esempio n. 3
0
def test_remote_cache_references(tmp_dir, dvc):
    with dvc.config.edit() as conf:
        conf["remote"]["storage"] = {"url": "ssh://user@localhost:23"}
        conf["remote"]["cache"] = {"url": "remote://storage/tmp"}
        conf["cache"]["ssh"] = "cache"

    dvc.odb = ODBManager(dvc)

    assert dvc.odb.ssh.fs_path == "/tmp"
Esempio n. 4
0
def test_windows_should_add_when_cache_on_different_drive(
        tmp_dir, dvc, temporary_windows_drive):
    dvc.config["cache"]["dir"] = temporary_windows_drive
    dvc.odb = ODBManager(dvc)

    (stage, ) = tmp_dir.dvc_gen({"file": "file"})
    cache_path = stage.outs[0].cache_path

    assert path_isin(cache_path, temporary_windows_drive)
    assert os.path.isfile(cache_path)
    filecmp.cmp("file", cache_path)
Esempio n. 5
0
def test_cache_type_is_properly_overridden(tmp_dir, erepo_dir):
    with erepo_dir.chdir():
        with erepo_dir.dvc.config.edit() as conf:
            conf["cache"]["type"] = "symlink"
        erepo_dir.dvc.odb = ODBManager(erepo_dir.dvc)
        erepo_dir.scm_add([erepo_dir.dvc.config.files["repo"]],
                          "set cache type to symlinks")
        erepo_dir.dvc_gen("file", "contents", "create file")
    assert System.is_symlink(erepo_dir / "file")

    Repo.get(os.fspath(erepo_dir), "file", "file_imported")

    assert not System.is_symlink("file_imported")
    assert (tmp_dir / "file_imported").read_text() == "contents"
Esempio n. 6
0
def test_destroy(tmp_dir, dvc, run_copy):
    dvc.config["cache"]["type"] = ["symlink"]
    dvc.odb = ODBManager(dvc)

    tmp_dir.dvc_gen("file", "text")
    tmp_dir.dvc_gen({"dir": {"file": "lorem", "subdir/file": "ipsum"}})

    run_copy("file", "file2", single_stage=True)
    run_copy("file2", "file3", name="copy-file2-file3")
    run_copy("file3", "file4", name="copy-file3-file4")

    dvc.destroy()

    # Remove all the files related to DVC
    for path in [
            ".dvc",
            ".dvcignore",
            "file.dvc",
            "file2.dvc",
            "dir.dvc",
            PIPELINE_FILE,
            PIPELINE_LOCK,
    ]:
        assert not (tmp_dir / path).exists()

    # Leave the rest of the files
    for path in [
            "file",
            "file2",
            "file3",
            "file4",
            "dir/file",
            "dir/subdir/file",
    ]:
        assert (tmp_dir / path).is_file()

    # Make sure that data was unprotected after `destroy`
    for path in [
            "file",
            "file2",
            "file3",
            "file4",
            "dir",
            "dir/file",
            "dir/subdir",
            "dir/subdir/file",
    ]:
        assert not System.is_symlink(tmp_dir / path)
Esempio n. 7
0
def test_cache_type_is_properly_overridden(tmp_dir, scm, dvc, erepo_dir):
    with erepo_dir.chdir():
        with erepo_dir.dvc.config.edit() as conf:
            conf["cache"]["type"] = "symlink"
        erepo_dir.dvc.odb = ODBManager(erepo_dir.dvc)
        erepo_dir.scm_add(
            [erepo_dir.dvc.config.files["repo"]],
            "set source repo cache type to symlink",
        )
        erepo_dir.dvc_gen("foo", "foo content", "create foo")
    assert System.is_symlink(erepo_dir / "foo")

    dvc.imp(os.fspath(erepo_dir), "foo", "foo_imported")

    assert not System.is_symlink("foo_imported")
    assert (tmp_dir / "foo_imported").read_text() == "foo content"
    assert scm.is_ignored("foo_imported")
Esempio n. 8
0
def test_shared_stage_cache(tmp_dir, dvc, run_copy):
    import stat

    from dvc.data.db import ODBManager

    tmp_dir.gen("foo", "foo")

    with dvc.config.edit() as config:
        config["cache"]["shared"] = "group"

    dvc.odb = ODBManager(dvc)

    assert not os.path.exists(dvc.odb.local.cache_dir)

    run_copy("foo", "bar", name="copy-foo-bar")

    parent_cache_dir = os.path.join(dvc.stage_cache.cache_dir, "88")
    cache_dir = os.path.join(
        parent_cache_dir,
        "883395068439203a9de3d1e1649a16e9027bfd1ab5dab4f438d321c4a928b328",
    )
    cache_file = os.path.join(
        cache_dir,
        "e42b7ebb9bc5ac4bccab769c8d1338914dad25d7ffecc8671dbd4581bad4aa15",
    )

    # sanity check
    assert os.path.isdir(cache_dir)
    assert os.listdir(cache_dir) == [os.path.basename(cache_file)]
    assert os.path.isfile(cache_file)

    def _mode(path):
        return stat.S_IMODE(os.stat(path).st_mode)

    if os.name == "nt":
        dir_mode = 0o777
        file_mode = 0o666
    else:
        dir_mode = 0o2775
        file_mode = 0o664

    assert _mode(dvc.odb.local.cache_dir) == dir_mode
    assert _mode(dvc.stage_cache.cache_dir) == dir_mode
    assert _mode(parent_cache_dir) == dir_mode
    assert _mode(cache_dir) == dir_mode
    assert _mode(cache_file) == file_mode
Esempio n. 9
0
    def _make_workspace(name, typ="local"):
        from dvc.data.db import ODBManager

        cloud = make_cloud(typ)  # pylint: disable=W0621

        tmp_dir.add_remote(name=name, config=cloud.config, default=False)
        tmp_dir.add_remote(name=f"{name}-cache",
                           url="remote://workspace/cache",
                           default=False)

        scheme = getattr(cloud, "scheme", "local")
        if scheme != "http":
            with dvc.config.edit() as conf:
                conf["cache"][scheme] = f"{name}-cache"

            dvc.odb = ODBManager(dvc)

        return cloud
Esempio n. 10
0
    def test_import_dir(self, tmp_dir, dvc, workspace, stage_md5, dir_md5):
        from dvc.data.db import ODBManager

        workspace.gen(
            {"dir": {
                "file": "file",
                "subdir": {
                    "subfile": "subfile"
                }
            }})

        # remove external cache to make sure that we don't need it
        # to import dirs
        with dvc.config.edit() as conf:
            del conf["cache"]
        dvc.odb = ODBManager(dvc)

        assert not (tmp_dir / "dir").exists()  # sanity check
        dvc.imp_url("remote://workspace/dir")
        assert set(os.listdir(tmp_dir / "dir")) == {"file", "subdir"}
        assert (tmp_dir / "dir" / "file").read_text() == "file"
        assert list(os.listdir(tmp_dir / "dir" / "subdir")) == ["subfile"]
        assert (tmp_dir / "dir" / "subdir" /
                "subfile").read_text() == "subfile"

        assert dvc.status() == {}

        if stage_md5 is not None and dir_md5 is not None:
            assert (tmp_dir / "dir.dvc").read_text() == (
                f"md5: {stage_md5}\n"
                "frozen: true\n"
                "deps:\n"
                f"- md5: {dir_md5}\n"
                "  size: 11\n"
                "  nfiles: 2\n"
                "  path: remote://workspace/dir\n"
                "outs:\n"
                "- md5: b6dcab6ccd17ca0a8bf4a215a37d14cc.dir\n"
                "  size: 11\n"
                "  nfiles: 2\n"
                "  path: dir\n")
Esempio n. 11
0
def test_shared_cache(tmp_dir, dvc, group):
    from dvc.utils.fs import umask

    if group:
        with dvc.config.edit() as conf:
            conf["cache"].update({"shared": "group"})
    dvc.odb = ODBManager(dvc)
    cache_dir = dvc.odb.local.cache_dir

    assert not os.path.exists(cache_dir)

    tmp_dir.dvc_gen({
        "file": "file content",
        "dir": {
            "file2": "file 2 "
            "content"
        }
    })

    actual = {}
    for root, dnames, fnames in os.walk(cache_dir):
        for name in dnames + fnames:
            path = os.path.join(root, name)
            actual[path] = oct(stat.S_IMODE(os.stat(path).st_mode))

    file_mode = oct(0o444)
    dir_mode = oct(0o2775 if group else (0o777 & ~umask))

    expected = {
        os.path.join(cache_dir, "17"): dir_mode,
        os.path.join(cache_dir, "17", "4eaa1dd94050255b7b98a7e1924b31.dir"):
        file_mode,
        os.path.join(cache_dir, "97"): dir_mode,
        os.path.join(cache_dir, "97", "e17781c198500e2766ea56bd697c03"):
        file_mode,
        os.path.join(cache_dir, "d1"): dir_mode,
        os.path.join(cache_dir, "d1", "0b4c3ff123b26dc068d43a8bef2d23"):
        file_mode,
    }

    assert expected == actual
Esempio n. 12
0
def test_external_cache_dir(tmp_dir, dvc, make_tmp_dir):
    cache_dir = make_tmp_dir("cache")

    with dvc.config.edit() as conf:
        conf["cache"]["dir"] = cache_dir.fs_path
    assert not os.path.exists(dvc.odb.local.cache_dir)
    dvc.odb = ODBManager(dvc)

    tmp_dir.dvc_gen({"foo": "foo"})

    tmp_dir.dvc_gen({
        "data_dir": {
            "data": "data_dir/data",
            "data_sub_dir": {
                "data_sub": "data_dir/data_sub_dir/data_sub"
            },
        }
    })

    assert not os.path.exists(".dvc/cache")
    assert len(os.listdir(cache_dir)) != 0
Esempio n. 13
0
def test_cmd_cache_relative_path(tmp_dir, scm, dvc, make_tmp_dir):
    cache_dir = make_tmp_dir("cache")
    dname = relpath(cache_dir)
    ret = main(["cache", "dir", dname])
    assert ret == 0

    dvc.config.load()
    dvc.odb = ODBManager(dvc)

    # NOTE: we are in the repo's root and config is in .dvc/, so
    # dir path written to config should be just one level above.
    rel = os.path.join("..", dname)
    config = configobj.ConfigObj(dvc.config.files["repo"])
    assert config["cache"]["dir"] == rel.replace("\\", "/")

    tmp_dir.dvc_gen({"foo": "foo"})

    subdirs = os.listdir(cache_dir)
    assert len(subdirs) == 1
    files = os.listdir(os.path.join(cache_dir, subdirs[0]))
    assert len(files) == 1
Esempio n. 14
0
    def __init__(
        self,
        root_dir=None,
        fs=None,
        rev=None,
        subrepos=False,
        uninitialized=False,
        config=None,
        url=None,
        repo_factory=None,
    ):
        from dvc.config import Config
        from dvc.data.db import ODBManager
        from dvc.data_cloud import DataCloud
        from dvc.fs.git import GitFileSystem
        from dvc.fs.local import localfs
        from dvc.lock import LockNoop, make_lock
        from dvc.repo.live import Live
        from dvc.repo.metrics import Metrics
        from dvc.repo.params import Params
        from dvc.repo.plots import Plots
        from dvc.repo.stage import StageLoad
        from dvc.scm import SCM
        from dvc.stage.cache import StageCache
        from dvc.state import State, StateNoop

        self.url = url
        self._fs_conf = {"repo_factory": repo_factory}
        self._fs = fs or localfs
        self._scm = None

        if rev and not fs:
            self._scm = SCM(root_dir or os.curdir)
            self._fs = GitFileSystem(scm=self._scm, rev=rev)

        self.root_dir, self.dvc_dir, self.tmp_dir = self._get_repo_dirs(
            root_dir=root_dir, fs=self.fs, uninitialized=uninitialized)

        self.config = Config(self.dvc_dir, fs=self.fs, config=config)
        self._uninitialized = uninitialized

        # used by RepoFileSystem to determine if it should traverse subrepos
        self.subrepos = subrepos

        self.cloud = DataCloud(self)
        self.stage = StageLoad(self)

        if isinstance(self.fs, GitFileSystem) or not self.dvc_dir:
            self.lock = LockNoop()
            self.state = StateNoop()
            self.odb = ODBManager(self)
        else:
            self.lock = make_lock(
                os.path.join(self.tmp_dir, "lock"),
                tmp_dir=self.tmp_dir,
                hardlink_lock=self.config["core"].get("hardlink_lock", False),
                friendly=True,
            )

            state_db_dir = self._get_database_dir("state")
            self.state = State(self.root_dir, state_db_dir, self.dvcignore)
            self.odb = ODBManager(self)

            self.stage_cache = StageCache(self)

            self._ignore()

        self.metrics = Metrics(self)
        self.plots = Plots(self)
        self.params = Params(self)
        self.live = Live(self)

        self.stage_collection_error_handler: Optional[Callable[
            [str, Exception], None]] = None
        self._lock_depth = 0
Esempio n. 15
0
 def test_get(self):
     cache = ODBManager(self.dvc).local.hash_to_path(self.cache1_md5)
     self.assertEqual(os.fspath(cache), self.cache1)
Esempio n. 16
0
 def test_all(self):
     md5_list = list(ODBManager(self.dvc).local.all())
     self.assertEqual(len(md5_list), 2)
     self.assertIn(self.cache1_md5, md5_list)
     self.assertIn(self.cache2_md5, md5_list)