コード例 #1
0
ファイル: config.py プロジェクト: mathiasbc/dvc
    def __init__(
        self,
        dvc_dir=None,
        validate=True,
        tree=None,
        config=None,
    ):  # pylint: disable=super-init-not-called
        from dvc.tree.local import LocalTree

        self.dvc_dir = dvc_dir

        if not dvc_dir:
            try:
                from dvc.repo import Repo

                self.dvc_dir = os.path.join(Repo.find_dvc_dir())
            except NotDvcRepoError:
                self.dvc_dir = None
        else:
            self.dvc_dir = os.path.abspath(os.path.realpath(dvc_dir))

        self.wtree = LocalTree(None, {"url": self.dvc_dir})
        self.tree = tree or self.wtree

        self.load(validate=validate, config=config)
コード例 #2
0
ファイル: state.py プロジェクト: vladiibine/dvc
    def __init__(self, repo):
        from dvc.tree.local import LocalTree

        super().__init__()

        self.repo = repo
        self.root_dir = repo.root_dir
        self.tree = LocalTree(None, {"url": self.root_dir})

        state_config = repo.config.get("state", {})
        self.row_limit = state_config.get("row_limit", self.STATE_ROW_LIMIT)
        self.row_cleanup_quota = state_config.get("row_cleanup_quota",
                                                  self.STATE_ROW_CLEANUP_QUOTA)

        if not repo.tmp_dir:
            self.state_file = None
            return

        self.state_file = os.path.join(repo.tmp_dir, self.STATE_FILE)

        # https://www.sqlite.org/tempfiles.html
        self.temp_files = [
            self.state_file + "-journal",
            self.state_file + "-wal",
        ]

        self.database = None
        self.cursor = None
        self.inserts = 0
コード例 #3
0
ファイル: executor.py プロジェクト: vladkol/dvc
    def __init__(
        self,
        baseline_rev: str,
        checkpoint_reset: Optional[bool] = False,
        **kwargs,
    ):
        from dvc.repo import Repo

        dvc_dir = kwargs.pop("dvc_dir")
        cache_dir = kwargs.pop("cache_dir")
        super().__init__(baseline_rev, **kwargs)
        self.tmp_dir = TemporaryDirectory()

        # init empty DVC repo (will be overwritten when input is uploaded)
        Repo.init(root_dir=self.tmp_dir.name, no_scm=True)
        logger.debug(
            "Init local executor in dir '%s' with baseline '%s'.",
            self.tmp_dir,
            baseline_rev[:7],
        )
        self.dvc_dir = os.path.join(self.tmp_dir.name, dvc_dir)
        self._config(cache_dir)
        self._tree = LocalTree(self.dvc, {"url": self.dvc.root_dir})
        # override default CACHE_MODE since files must be writable in order
        # to run repro
        self._tree.CACHE_MODE = 0o644
        self.checkpoint_reset = checkpoint_reset
        self.checkpoint = False
コード例 #4
0
ファイル: test_ignore.py プロジェクト: vladbelousov451/dvc
def test_ignore_collecting_dvcignores(tmp_dir, dvc, dname):
    tmp_dir.gen({"dir": {"subdir": {}}})

    top_ignore_file = (tmp_dir / dname).with_name(DvcIgnore.DVCIGNORE_FILE)
    top_ignore_file.write_text(os.path.basename(dname))
    dvc.tree.__dict__.pop("dvcignore", None)

    ignore_file = tmp_dir / dname / DvcIgnore.DVCIGNORE_FILE
    ignore_file.write_text("foo")

    assert len(dvc.tree.dvcignore.ignores) == 3
    assert DvcIgnoreDirs([".git", ".hg", ".dvc"]) in dvc.tree.dvcignore.ignores
    ignore_pattern_trie = None
    for ignore in dvc.tree.dvcignore.ignores:
        if isinstance(ignore, DvcIgnorePatternsTrie):
            ignore_pattern_trie = ignore

    assert ignore_pattern_trie is not None
    assert (DvcIgnorePatterns.from_files(
        os.fspath(top_ignore_file),
        LocalTree(None, {"url": dvc.root_dir}),
    ) == ignore_pattern_trie[os.fspath(ignore_file)])

    assert any(i for i in dvc.tree.dvcignore.ignores
               if isinstance(i, DvcIgnoreRepo))
コード例 #5
0
ファイル: test_local.py プロジェクト: zang3tsu/dvc
def test_is_protected(tmp_dir, dvc, link_name):
    tree = LocalTree(dvc, {})
    link_method = getattr(tree, link_name)

    (tmp_dir / "foo").write_text("foo")

    foo = PathInfo(tmp_dir / "foo")
    link = PathInfo(tmp_dir / "link")

    link_method(foo, link)

    assert not tree.is_protected(foo)
    assert not tree.is_protected(link)

    tree.protect(foo)

    assert tree.is_protected(foo)
    assert tree.is_protected(link)

    tree.unprotect(link)

    assert not tree.is_protected(link)
    if os.name == "nt" and link_name == "hardlink":
        # NOTE: NTFS doesn't allow deleting read-only files, which forces us to
        # set write perms on the link, which propagates to the source.
        assert not tree.is_protected(foo)
    else:
        assert tree.is_protected(foo)
コード例 #6
0
def test_track_from_multiple_files(tmp_dir):
    d1 = {"Train": {"us": {"lr": 10}}}
    d2 = {"Train": {"us": {"layers": 100}}}

    tree = LocalTree(None, config={})
    path1 = tmp_dir / "params.yaml"
    path2 = tmp_dir / "params2.yaml"
    dump_yaml(path1, d1, tree)
    dump_yaml(path2, d2, tree)

    context = Context.load_from(tree, path1)
    c = Context.load_from(tree, path2)
    context.merge_update(c)

    def key_tracked(d, path, key):
        return key in d[relpath(path)]

    with context.track() as tracked:
        context.select("Train")
        assert not (
            key_tracked(tracked, path1, "Train")
            or key_tracked(tracked, path2, "Train")
        )

        context.select("Train.us")
        assert not (
            key_tracked(tracked, path1, "Train.us")
            or key_tracked(tracked, path2, "Train.us")
        )

        context.select("Train.us.lr")
        assert key_tracked(tracked, path1, "Train.us.lr") and not key_tracked(
            tracked, path2, "Train.us.lr"
        )
        context.select("Train.us.layers")
        assert not key_tracked(
            tracked, path1, "Train.us.layers"
        ) and key_tracked(tracked, path2, "Train.us.layers")

    context = Context.clone(context)
    assert not context._tracked_data

    # let's see with an alias
    context["us"] = context["Train"]["us"]
    with context.track() as tracked:
        context.select("us")
        assert not (
            key_tracked(tracked, path1, "Train.us")
            or key_tracked(tracked, path2, "Train.us")
        )

        context.select("us.lr")
        assert key_tracked(tracked, path1, "Train.us.lr") and not key_tracked(
            tracked, path2, "Train.us.lr"
        )
        context.select("Train.us.layers")
        assert not key_tracked(
            tracked, path1, "Train.us.layers"
        ) and key_tracked(tracked, path2, "Train.us.layers")
コード例 #7
0
ファイル: test_local.py プロジェクト: zang3tsu/dvc
def test_protect_ignore_erofs(tmp_dir, mocker):
    tmp_dir.gen("foo", "foo")
    foo = PathInfo("foo")
    tree = LocalTree(None, {})

    mock_chmod = mocker.patch("os.chmod",
                              side_effect=OSError(errno.EROFS, "read-only fs"))
    tree.protect(foo)
    assert mock_chmod.called
コード例 #8
0
ファイル: test_local.py プロジェクト: zang3tsu/dvc
def test_protect_ignore_errors(tmp_dir, mocker, err):
    tmp_dir.gen("foo", "foo")
    foo = PathInfo("foo")
    tree = LocalTree(None, {})

    tree.protect(foo)

    mock_chmod = mocker.patch("os.chmod",
                              side_effect=OSError(err, "something"))
    tree.protect(foo)
    assert mock_chmod.called
コード例 #9
0
ファイル: test_tree.py プロジェクト: heyannag/dvc
 def test_nobranch(self):
     tree = LocalTree(None, {"url": self._root_dir}, use_dvcignore=True)
     self.assertWalkEqual(
         tree.walk("."),
         [
             (".", ["data_dir"], ["bar", "тест", "code.py", "foo"]),
             (join("data_dir"), ["data_sub_dir"], ["data"]),
             (join("data_dir", "data_sub_dir"), [], ["data_sub"]),
         ],
     )
     self.assertWalkEqual(
         tree.walk(join("data_dir", "data_sub_dir")),
         [(join("data_dir", "data_sub_dir"), [], ["data_sub"])],
     )
コード例 #10
0
ファイル: test_fs.py プロジェクト: zivzone/dvc
def test_path_object_and_str_are_valid_types_get_mtime_and_size(tmp_dir):
    tmp_dir.gen(
        {"dir": {"dir_file": "dir file content"}, "file": "file_content"}
    )
    tree = LocalTree(None, {"url": os.fspath(tmp_dir)}, use_dvcignore=True)

    time, size = get_mtime_and_size("dir", tree)
    object_time, object_size = get_mtime_and_size(PathInfo("dir"), tree)
    assert time == object_time
    assert size == object_size

    time, size = get_mtime_and_size("file", tree)
    object_time, object_size = get_mtime_and_size(PathInfo("file"), tree)
    assert time == object_time
    assert size == object_size
コード例 #11
0
ファイル: test_fs.py プロジェクト: zivzone/dvc
    def test(self):
        tree = LocalTree(None, {"url": self.root_dir}, use_dvcignore=True)
        file_time, file_size = get_mtime_and_size(self.DATA, tree)
        dir_time, dir_size = get_mtime_and_size(self.DATA_DIR, tree)

        actual_file_size = os.path.getsize(self.DATA)
        actual_dir_size = os.path.getsize(self.DATA) + os.path.getsize(
            self.DATA_SUB
        )

        self.assertIs(type(file_time), str)
        self.assertIs(type(file_size), str)
        self.assertEqual(file_size, str(actual_file_size))
        self.assertIs(type(dir_time), str)
        self.assertIs(type(dir_size), str)
        self.assertEqual(dir_size, str(actual_dir_size))
コード例 #12
0
def test_track(tmp_dir):
    d = {
        "lst": [
            {
                "foo0": "foo0",
                "bar0": "bar0"
            },
            {
                "foo1": "foo1",
                "bar1": "bar1"
            },
        ],
        "dct": {
            "foo": "foo",
            "bar": "bar",
            "baz": "baz"
        },
    }
    tree = LocalTree(None, config={})
    path = tmp_dir / "params.yaml"
    dump_yaml(path, d, tree)

    context = Context.load_from(tree, path)

    def key_tracked(d, key):
        assert len(d) == 1
        return key in d[relpath(path)]

    with context.track() as tracked:
        context.select("lst")
        assert key_tracked(tracked, "lst")

        context.select("dct")
        assert not key_tracked(tracked, "dct")

        context.select("dct.foo")
        assert key_tracked(tracked, "dct.foo")

        # Currently, it's unable to track dictionaries, as it can be merged
        # from multiple sources.
        context.select("lst.0")
        assert not key_tracked(tracked, "lst.0")

        # FIXME: either support tracking list values in ParamsDependency
        # or, prevent this from being tracked.
        context.select("lst.0.foo0")
        assert key_tracked(tracked, "lst.0.foo0")
コード例 #13
0
ファイル: test_tree.py プロジェクト: heyannag/dvc
 def test(self):
     tree = LocalTree(None, {"url": self._root_dir})
     self.assertWalkEqual(
         tree.walk(self._root_dir),
         [
             (
                 self._root_dir,
                 ["data_dir"],
                 ["code.py", "bar", "тест", "foo"],
             ),
             (join(self._root_dir, "data_dir"), ["data_sub_dir"], ["data"]),
             (
                 join(self._root_dir, "data_dir", "data_sub_dir"),
                 [],
                 ["data_sub"],
             ),
         ],
     )
コード例 #14
0
ファイル: test_local.py プロジェクト: zang3tsu/dvc
def test_status_download_optimization(mocker, dvc):
    """When comparing the status to pull a remote cache,
        And the desired files to fetch are already on the local cache,
        Don't check the existence of the desired files on the remote cache
    """
    cache = LocalCache(LocalTree(dvc, {}))

    infos = NamedCache()
    infos.add("local", "acbd18db4cc2f85cedef654fccc4a4d8", "foo")
    infos.add("local", "37b51d194a7513e45b56f6524f2d51f2", "bar")

    local_exists = list(infos["local"])
    mocker.patch.object(cache, "hashes_exist", return_value=local_exists)

    other_remote = mocker.Mock()
    other_remote.url = "other_remote"
    other_remote.hashes_exist.return_value = []
    other_remote.index = RemoteIndexNoop()

    cache.status(infos, other_remote, download=True)

    assert other_remote.hashes_exist.call_count == 0
コード例 #15
0
    def __init__(
        self,
        root_dir=None,
        scm=None,
        rev=None,
        subrepos=False,
        uninitialized=False,
    ):
        from dvc.cache import Cache
        from dvc.data_cloud import DataCloud
        from dvc.lock import LockNoop, make_lock
        from dvc.repo.experiments import Experiments
        from dvc.repo.metrics import Metrics
        from dvc.repo.params import Params
        from dvc.repo.plots import Plots
        from dvc.scm import SCM
        from dvc.stage.cache import StageCache
        from dvc.state import State, StateNoop
        from dvc.tree.local import LocalTree
        from dvc.utils.fs import makedirs

        try:
            tree = scm.get_tree(rev) if rev else None
            self.root_dir = self.find_root(root_dir, tree)
            self.dvc_dir = os.path.join(self.root_dir, self.DVC_DIR)
            self.tmp_dir = os.path.join(self.dvc_dir, "tmp")
            makedirs(self.tmp_dir, exist_ok=True)
        except NotDvcRepoError:
            if not uninitialized:
                raise
            self.root_dir = SCM(root_dir or os.curdir).root_dir
            self.dvc_dir = None
            self.tmp_dir = None

        tree_kwargs = dict(use_dvcignore=True, dvcignore_root=self.root_dir)
        if scm:
            self.tree = scm.get_tree(rev, **tree_kwargs)
        else:
            self.tree = LocalTree(self, {"url": self.root_dir}, **tree_kwargs)

        self.config = Config(self.dvc_dir, tree=self.tree)
        self._scm = scm

        # used by RepoTree to determine if it should traverse subrepos
        self.subrepos = subrepos

        self.cache = Cache(self)
        self.cloud = DataCloud(self)

        if scm or not self.dvc_dir:
            self.lock = LockNoop()
            self.state = StateNoop()
        else:
            self.lock = make_lock(
                os.path.join(self.tmp_dir, "lock"),
                tmp_dir=self.tmp_dir,
                hardlink_lock=self.config["core"].get("hardlink_lock", False),
                friendly=True,
            )

            # NOTE: storing state and link_state in the repository itself to
            # avoid any possible state corruption in 'shared cache dir'
            # scenario.
            self.state = State(self)
            self.stage_cache = StageCache(self)

            try:
                self.experiments = Experiments(self)
            except NotImplementedError:
                self.experiments = None

            self._ignore()

        self.metrics = Metrics(self)
        self.plots = Plots(self)
        self.params = Params(self)
コード例 #16
0
ファイル: test_tree.py プロジェクト: heyannag/dvc
 def setUp(self):
     super().setUp()
     self.tree = LocalTree(None, {})
コード例 #17
0
 def tree(self):
     if self.scm:
         return self.scm.get_tree(self.rev)
     return LocalTree(self, {"url": self.root_dir})
コード例 #18
0
ファイル: test_tree.py プロジェクト: heyannag/dvc
 def test_subdir(self):
     tree = LocalTree(None, {"url": self._root_dir})
     self.assertWalkEqual(
         tree.walk(join("data_dir", "data_sub_dir")),
         [(join("data_dir", "data_sub_dir"), [], ["data_sub"])],
     )
コード例 #19
0
ファイル: __init__.py プロジェクト: johnnychen94/dvc
    def __init__(
        self,
        root_dir=None,
        scm=None,
        rev=None,
        subrepos=False,
        uninitialized=False,
    ):
        from dvc.cache import Cache
        from dvc.data_cloud import DataCloud
        from dvc.lock import LockNoop, make_lock
        from dvc.repo.metrics import Metrics
        from dvc.repo.params import Params
        from dvc.repo.plots import Plots
        from dvc.repo.stage import StageLoad
        from dvc.stage.cache import StageCache
        from dvc.state import State, StateNoop
        from dvc.tree.local import LocalTree

        self.root_dir, self.dvc_dir, self.tmp_dir = self._get_repo_dirs(
            root_dir=root_dir, scm=scm, rev=rev, uninitialized=uninitialized
        )

        tree_kwargs = {"use_dvcignore": True, "dvcignore_root": self.root_dir}
        if scm:
            self.tree = scm.get_tree(rev, **tree_kwargs)
        else:
            self.tree = LocalTree(self, {"url": self.root_dir}, **tree_kwargs)

        self.config = Config(self.dvc_dir, tree=self.tree)
        self._scm = scm

        # used by RepoTree to determine if it should traverse subrepos
        self.subrepos = subrepos

        self.cache = Cache(self)
        self.cloud = DataCloud(self)
        self.stage = StageLoad(self)

        if scm or not self.dvc_dir:
            self.lock = LockNoop()
            self.state = StateNoop()
        else:
            self.lock = make_lock(
                os.path.join(self.tmp_dir, "lock"),
                tmp_dir=self.tmp_dir,
                hardlink_lock=self.config["core"].get("hardlink_lock", False),
                friendly=True,
            )

            # NOTE: storing state and link_state in the repository itself to
            # avoid any possible state corruption in 'shared cache dir'
            # scenario.
            self.state = State(self)
            self.stage_cache = StageCache(self)

            self._ignore()

        self.metrics = Metrics(self)
        self.plots = Plots(self)
        self.params = Params(self)
        self.stage_collection_error_handler = None
        self._lock_depth = 0
コード例 #20
0
ファイル: __init__.py プロジェクト: vladbelousov451/dvc
    def __init__(self, root_dir=None, scm=None, rev=None):
        from dvc.state import State, StateNoop
        from dvc.lock import make_lock
        from dvc.scm import SCM
        from dvc.cache import Cache
        from dvc.data_cloud import DataCloud
        from dvc.repo.experiments import Experiments
        from dvc.repo.metrics import Metrics
        from dvc.repo.plots import Plots
        from dvc.repo.params import Params
        from dvc.tree.local import LocalTree
        from dvc.utils.fs import makedirs
        from dvc.stage.cache import StageCache

        if scm:
            tree = scm.get_tree(rev)
            self.root_dir = self.find_root(root_dir, tree)
            self.scm = scm
            self.tree = scm.get_tree(
                rev, use_dvcignore=True, dvcignore_root=self.root_dir
            )
            self.state = StateNoop()
        else:
            root_dir = self.find_root(root_dir)
            self.root_dir = os.path.abspath(os.path.realpath(root_dir))
            self.tree = LocalTree(
                self,
                {"url": self.root_dir},
                use_dvcignore=True,
                dvcignore_root=self.root_dir,
            )

        self.dvc_dir = os.path.join(self.root_dir, self.DVC_DIR)
        self.config = Config(self.dvc_dir, tree=self.tree)

        if not scm:
            no_scm = self.config["core"].get("no_scm", False)
            self.scm = SCM(self.root_dir, no_scm=no_scm)

        self.tmp_dir = os.path.join(self.dvc_dir, "tmp")
        self.index_dir = os.path.join(self.tmp_dir, "index")
        makedirs(self.index_dir, exist_ok=True)

        hardlink_lock = self.config["core"].get("hardlink_lock", False)
        self.lock = make_lock(
            os.path.join(self.tmp_dir, "lock"),
            tmp_dir=self.tmp_dir,
            hardlink_lock=hardlink_lock,
            friendly=True,
        )

        self.cache = Cache(self)
        self.cloud = DataCloud(self)

        if not scm:
            # NOTE: storing state and link_state in the repository itself to
            # avoid any possible state corruption in 'shared cache dir'
            # scenario.
            self.state = State(self.cache.local)

        self.stage_cache = StageCache(self)

        self.metrics = Metrics(self)
        self.plots = Plots(self)
        self.params = Params(self)

        try:
            self.experiments = Experiments(self)
        except NotImplementedError:
            self.experiments = None

        self._ignore()