Beispiel #1
0
def test_import_url_dir(tmp_dir, dvc, workspace, stage_md5, dir_md5):
    workspace.gen({"dir": {"file": "file", "subdir": {"subfile": "subfile"}}})

    # remove external cache to make sure that we don't need it to import dirs
    with dvc.config.edit() as conf:
        del conf["cache"]
    dvc.odb = ODBManager(dvc)

    assert not (tmp_dir / "dir").exists()  # sanity check
    dvc.imp_url("remote://workspace/dir")
    assert set(os.listdir(tmp_dir / "dir")) == {"file", "subdir"}
    assert (tmp_dir / "dir" / "file").read_text() == "file"
    assert list(os.listdir(tmp_dir / "dir" / "subdir")) == ["subfile"]
    assert (tmp_dir / "dir" / "subdir" / "subfile").read_text() == "subfile"

    assert (tmp_dir / "dir.dvc").read_text() == (
        f"md5: {stage_md5}\n"
        "frozen: true\n"
        "deps:\n"
        f"- md5: {dir_md5}\n"
        "  size: 11\n"
        "  nfiles: 2\n"
        "  path: remote://workspace/dir\n"
        "outs:\n"
        "- md5: b6dcab6ccd17ca0a8bf4a215a37d14cc.dir\n"
        "  size: 11\n"
        "  nfiles: 2\n"
        "  path: dir\n")

    assert dvc.status() == {}
Beispiel #2
0
def test_windows_should_add_when_cache_on_different_drive(
        tmp_dir, dvc, temporary_windows_drive):
    dvc.config["cache"]["dir"] = temporary_windows_drive
    dvc.odb = ODBManager(dvc)

    (stage, ) = tmp_dir.dvc_gen({"file": "file"})
    cache_path = stage.outs[0].cache_path

    assert path_isin(cache_path, temporary_windows_drive)
    assert os.path.isfile(cache_path)
    filecmp.cmp("file", cache_path)
Beispiel #3
0
def test_cache_type_is_properly_overridden(tmp_dir, erepo_dir):
    with erepo_dir.chdir():
        with erepo_dir.dvc.config.edit() as conf:
            conf["cache"]["type"] = "symlink"
        erepo_dir.dvc.odb = ODBManager(erepo_dir.dvc)
        erepo_dir.scm_add([erepo_dir.dvc.config.files["repo"]],
                          "set cache type to symlinks")
        erepo_dir.dvc_gen("file", "contents", "create file")
    assert System.is_symlink(erepo_dir / "file")

    Repo.get(os.fspath(erepo_dir), "file", "file_imported")

    assert not System.is_symlink("file_imported")
    assert (tmp_dir / "file_imported").read_text() == "contents"
Beispiel #4
0
def test_shared_stage_cache(tmp_dir, dvc, run_copy):
    import stat

    from dvc.objects.db import ODBManager

    tmp_dir.gen("foo", "foo")

    with dvc.config.edit() as config:
        config["cache"]["shared"] = "group"

    dvc.odb = ODBManager(dvc)

    assert not os.path.exists(dvc.odb.local.cache_dir)

    run_copy("foo", "bar", name="copy-foo-bar")

    parent_cache_dir = os.path.join(
        dvc.stage_cache.cache_dir,
        "88",
    )
    cache_dir = os.path.join(
        parent_cache_dir,
        "883395068439203a9de3d1e1649a16e9027bfd1ab5dab4f438d321c4a928b328",
    )
    cache_file = os.path.join(
        cache_dir,
        "e42b7ebb9bc5ac4bccab769c8d1338914dad25d7ffecc8671dbd4581bad4aa15",
    )

    # sanity check
    assert os.path.isdir(cache_dir)
    assert os.listdir(cache_dir) == [os.path.basename(cache_file)]
    assert os.path.isfile(cache_file)

    def _mode(path):
        return stat.S_IMODE(os.stat(path).st_mode)

    if os.name == "nt":
        dir_mode = 0o777
        file_mode = 0o666
    else:
        dir_mode = 0o2775
        file_mode = 0o664

    assert _mode(dvc.odb.local.cache_dir) == dir_mode
    assert _mode(dvc.stage_cache.cache_dir) == dir_mode
    assert _mode(parent_cache_dir) == dir_mode
    assert _mode(cache_dir) == dir_mode
    assert _mode(cache_file) == file_mode
Beispiel #5
0
def test_destroy(tmp_dir, dvc, run_copy):
    dvc.config["cache"]["type"] = ["symlink"]
    dvc.odb = ODBManager(dvc)

    tmp_dir.dvc_gen("file", "text")
    tmp_dir.dvc_gen({"dir": {"file": "lorem", "subdir/file": "ipsum"}})

    run_copy("file", "file2", single_stage=True)
    run_copy("file2", "file3", name="copy-file2-file3")
    run_copy("file3", "file4", name="copy-file3-file4")

    dvc.destroy()

    # Remove all the files related to DVC
    for path in [
            ".dvc",
            "file.dvc",
            "file2.dvc",
            "dir.dvc",
            PIPELINE_FILE,
            PIPELINE_LOCK,
    ]:
        assert not (tmp_dir / path).exists()

    # Leave the rest of the files
    for path in [
            "file",
            "file2",
            "file3",
            "file4",
            "dir/file",
            "dir/subdir/file",
    ]:
        assert (tmp_dir / path).is_file()

    # Make sure that data was unprotected after `destroy`
    for path in [
            "file",
            "file2",
            "file3",
            "file4",
            "dir",
            "dir/file",
            "dir/subdir",
            "dir/subdir/file",
    ]:
        assert not System.is_symlink(tmp_dir / path)
Beispiel #6
0
def test_cache_type_is_properly_overridden(tmp_dir, scm, dvc, erepo_dir):
    with erepo_dir.chdir():
        with erepo_dir.dvc.config.edit() as conf:
            conf["cache"]["type"] = "symlink"
        erepo_dir.dvc.odb = ODBManager(erepo_dir.dvc)
        erepo_dir.scm_add(
            [erepo_dir.dvc.config.files["repo"]],
            "set source repo cache type to symlink",
        )
        erepo_dir.dvc_gen("foo", "foo content", "create foo")
    assert System.is_symlink(erepo_dir / "foo")

    dvc.imp(os.fspath(erepo_dir), "foo", "foo_imported")

    assert not System.is_symlink("foo_imported")
    assert (tmp_dir / "foo_imported").read_text() == "foo content"
    assert scm.is_ignored("foo_imported")
Beispiel #7
0
    def _make_workspace(name, typ="local"):
        from dvc.objects.db import ODBManager

        cloud = make_cloud(typ)  # pylint: disable=W0621

        tmp_dir.add_remote(name=name, config=cloud.config, default=False)
        tmp_dir.add_remote(name=f"{name}-cache",
                           url="remote://workspace/cache",
                           default=False)

        scheme = getattr(cloud, "scheme", "local")
        if scheme != "http":
            with dvc.config.edit() as conf:
                conf["cache"][scheme] = f"{name}-cache"

            dvc.odb = ODBManager(dvc)

        return cloud
Beispiel #8
0
def workspace(tmp_dir, dvc, request):
    from dvc.objects.db import ODBManager

    cloud = request.param

    assert cloud

    tmp_dir.add_remote(name="workspace", config=cloud.config, default=False)
    tmp_dir.add_remote(name="cache",
                       url="remote://workspace/cache",
                       default=False)

    scheme = getattr(cloud, "scheme", "local")
    if scheme != "http":
        with dvc.config.edit() as conf:
            conf["cache"][scheme] = "cache"

        dvc.odb = ODBManager(dvc)

    return cloud
Beispiel #9
0
def test_shared_cache(tmp_dir, dvc, group):
    from dvc.utils.fs import umask

    if group:
        with dvc.config.edit() as conf:
            conf["cache"].update({"shared": "group"})
    dvc.odb = ODBManager(dvc)
    cache_dir = dvc.odb.local.cache_dir

    assert not os.path.exists(cache_dir)

    tmp_dir.dvc_gen({
        "file": "file content",
        "dir": {
            "file2": "file 2 "
            "content"
        }
    })

    actual = {}
    for root, dnames, fnames in os.walk(cache_dir):
        for name in dnames + fnames:
            path = os.path.join(root, name)
            actual[path] = oct(stat.S_IMODE(os.stat(path).st_mode))

    file_mode = oct(0o444)
    dir_mode = oct(0o2775 if group else (0o777 & ~umask))

    expected = {
        os.path.join(cache_dir, "17"): dir_mode,
        os.path.join(cache_dir, "17", "4eaa1dd94050255b7b98a7e1924b31.dir"):
        file_mode,
        os.path.join(cache_dir, "97"): dir_mode,
        os.path.join(cache_dir, "97", "e17781c198500e2766ea56bd697c03"):
        file_mode,
        os.path.join(cache_dir, "d1"): dir_mode,
        os.path.join(cache_dir, "d1", "0b4c3ff123b26dc068d43a8bef2d23"):
        file_mode,
    }

    assert expected == actual
Beispiel #10
0
 def test_get(self):
     cache = ODBManager(self.dvc).local.hash_to_path(self.cache1_md5)
     self.assertEqual(os.fspath(cache), self.cache1)
Beispiel #11
0
 def test_all(self):
     md5_list = list(ODBManager(self.dvc).local.all())
     self.assertEqual(len(md5_list), 2)
     self.assertIn(self.cache1_md5, md5_list)
     self.assertIn(self.cache2_md5, md5_list)
Beispiel #12
0
    def __init__(
        self,
        root_dir=None,
        scm=None,
        rev=None,
        subrepos=False,
        uninitialized=False,
        config=None,
        url=None,
        repo_factory=None,
    ):
        from dvc.config import Config
        from dvc.data_cloud import DataCloud
        from dvc.fs.local import LocalFileSystem
        from dvc.lock import LockNoop, make_lock
        from dvc.machine import MachineManager
        from dvc.objects.db import ODBManager
        from dvc.repo.live import Live
        from dvc.repo.metrics import Metrics
        from dvc.repo.params import Params
        from dvc.repo.plots import Plots
        from dvc.repo.stage import StageLoad
        from dvc.scm import SCM
        from dvc.stage.cache import StageCache
        from dvc.state import State, StateNoop

        self.url = url
        self._fs_conf = {"repo_factory": repo_factory}

        if rev and not scm:
            scm = SCM(root_dir or os.curdir)

        self.root_dir, self.dvc_dir, self.tmp_dir = self._get_repo_dirs(
            root_dir=root_dir, scm=scm, rev=rev, uninitialized=uninitialized
        )

        if scm:
            self._fs = scm.get_fs(rev)
        else:
            self._fs = LocalFileSystem(url=self.root_dir)

        self.config = Config(self.dvc_dir, fs=self.fs, config=config)
        self._uninitialized = uninitialized
        self._scm = scm

        # used by RepoFileSystem to determine if it should traverse subrepos
        self.subrepos = subrepos

        self.cloud = DataCloud(self)
        self.stage = StageLoad(self)

        if scm or not self.dvc_dir:
            self.lock = LockNoop()
            self.state = StateNoop()
            self.odb = ODBManager(self)
        else:
            self.lock = make_lock(
                os.path.join(self.tmp_dir, "lock"),
                tmp_dir=self.tmp_dir,
                hardlink_lock=self.config["core"].get("hardlink_lock", False),
                friendly=True,
            )

            # NOTE: storing state and link_state in the repository itself to
            # avoid any possible state corruption in 'shared cache dir'
            # scenario.
            self.state = State(self.root_dir, self.tmp_dir, self.dvcignore)
            self.odb = ODBManager(self)

            self.stage_cache = StageCache(self)

            self._ignore()

        self.metrics = Metrics(self)
        self.plots = Plots(self)
        self.params = Params(self)
        self.live = Live(self)

        if self.tmp_dir and (
            self.config["feature"].get("machine", False)
            or env2bool("DVC_TEST")
        ):
            self.machine = MachineManager(self)
        else:
            self.machine = None

        self.stage_collection_error_handler: Optional[
            Callable[[str, Exception], None]
        ] = None
        self._lock_depth = 0
Beispiel #13
0
    def __init__(
        self,
        root_dir=None,
        scm=None,
        rev=None,
        subrepos=False,
        uninitialized=False,
        config=None,
        url=None,
        repo_factory=None,
    ):
        from dvc.config import Config
        from dvc.data_cloud import DataCloud
        from dvc.fs.git import GitFileSystem
        from dvc.fs.local import LocalFileSystem
        from dvc.lock import LockNoop, make_lock
        from dvc.objects.db import ODBManager
        from dvc.repo.live import Live
        from dvc.repo.metrics import Metrics
        from dvc.repo.params import Params
        from dvc.repo.plots import Plots
        from dvc.repo.stage import StageLoad
        from dvc.scm import SCM, Git
        from dvc.stage.cache import StageCache
        from dvc.state import State, StateNoop

        self.url = url
        self._fs_conf = {"repo_factory": repo_factory}

        if rev and not scm:
            scm = SCM(root_dir or os.curdir)

        self.root_dir, self.dvc_dir, self.tmp_dir = self._get_repo_dirs(
            root_dir=root_dir, scm=scm, rev=rev, uninitialized=uninitialized
        )

        if scm:
            assert isinstance(scm, Git)
            self._fs = GitFileSystem(scm=scm, rev=rev)
        else:
            self._fs = LocalFileSystem(url=self.root_dir)

        self.config = Config(self.dvc_dir, fs=self.fs, config=config)
        self._uninitialized = uninitialized
        self._scm = scm

        # used by RepoFileSystem to determine if it should traverse subrepos
        self.subrepos = subrepos

        self.cloud = DataCloud(self)
        self.stage = StageLoad(self)

        if scm or not self.dvc_dir:
            self.lock = LockNoop()
            self.state = StateNoop()
            self.odb = ODBManager(self)
        else:
            self.lock = make_lock(
                os.path.join(self.tmp_dir, "lock"),
                tmp_dir=self.tmp_dir,
                hardlink_lock=self.config["core"].get("hardlink_lock", False),
                friendly=True,
            )

            state_db_dir = self._get_database_dir("state")
            self.state = State(self.root_dir, state_db_dir, self.dvcignore)
            self.odb = ODBManager(self)

            self.stage_cache = StageCache(self)

            self._ignore()

        self.metrics = Metrics(self)
        self.plots = Plots(self)
        self.params = Params(self)
        self.live = Live(self)

        self.stage_collection_error_handler: Optional[
            Callable[[str, Exception], None]
        ] = None
        self._lock_depth = 0