Beispiel #1
0
    def _paths_checksums():
        """
        A dictionary of checksums addressed by relpaths collected from
        the current tree outputs.

        To help distinguish between a directory and a file output,
        the former one will come with a trailing slash in the path:

            directory: "data/"
            file:      "data"
        """
        def _to_path(output):
            return (str(output) if not output.is_dir_checksum else
                    os.path.join(str(output), ""))

        on_working_tree = is_working_tree(self.tree)

        def _to_checksum(output):
            if on_working_tree:
                return self.cache.local.get_checksum(output.path_info)
            return output.checksum

        def _exists(output):
            if on_working_tree:
                return output.exists
            return True

        return {
            _to_path(output): _to_checksum(output)
            for stage in self.stages for output in stage.outs
            if _exists(output)
        }
Beispiel #2
0
    def _save_file(self, path_info, checksum, save_link=True, tree=None):
        assert checksum

        cache_info = self.checksum_to_path_info(checksum)
        if tree:
            if self.changed_cache(checksum):
                with tree.open(path_info, mode="rb") as fobj:
                    self.copy_fobj(fobj, cache_info)
        else:
            if self.changed_cache(checksum):
                self.move(path_info, cache_info, mode=self.CACHE_MODE)
                self.link(cache_info, path_info)
            elif self.iscopy(path_info) and self._cache_is_copy(path_info):
                # Default relink procedure involves unneeded copy
                self.unprotect(path_info)
            else:
                self.remove(path_info)
                self.link(cache_info, path_info)

            if save_link:
                self.state.save_link(path_info)

        # we need to update path and cache, since in case of reflink,
        # or copy cache type moving original file results in updates on
        # next executed command, which causes md5 recalculation
        if not tree or is_working_tree(tree):
            self.state.save(path_info, checksum)
        self.state.save(cache_info, checksum)
Beispiel #3
0
def get_mtime_and_size(path, tree):

    if os.path.isdir(fspath_py35(path)):
        assert is_working_tree(tree)

        size = 0
        files_mtimes = {}
        for file_path in tree.walk_files(path):
            try:
                stat = os.stat(file_path)
            except OSError as exc:
                # NOTE: broken symlink case.
                if exc.errno != errno.ENOENT:
                    raise
                continue
            size += stat.st_size
            files_mtimes[file_path] = stat.st_mtime

        # We track file changes and moves, which cannot be detected with simply
        # max(mtime(f) for f in non_ignored_files)
        mtime = dict_md5(files_mtimes)
    else:
        base_stat = os.stat(fspath_py35(path))
        size = base_stat.st_size
        mtime = base_stat.st_mtime
        mtime = int(nanotime.timestamp(mtime))

    # State of files handled by dvc is stored in db as TEXT.
    # We cast results to string for later comparisons with stored values.
    return str(mtime), str(size)
Beispiel #4
0
    def _save_dir(
        self, path_info, checksum, save_link=True, tree=None, **kwargs
    ):
        if tree:
            dir_info = self._collect_dir(
                path_info, tree=tree, save_tree=True, **kwargs
            )
            checksum = self._save_dir_info(dir_info)
        else:
            dir_info = self.get_dir_cache(checksum)

            for entry in Tqdm(
                dir_info, desc="Saving " + path_info.name, unit="file"
            ):
                entry_info = path_info / entry[self.PARAM_RELPATH]
                entry_checksum = entry[self.PARAM_CHECKSUM]
                self._save_file(entry_info, entry_checksum, save_link=False)

            if save_link:
                self.state.save_link(path_info)

        cache_info = self.checksum_to_path_info(checksum)
        self.state.save(cache_info, checksum)
        if not tree or is_working_tree(tree):
            self.state.save(path_info, checksum)
        return {self.PARAM_CHECKSUM: checksum}
Beispiel #5
0
    def __init__(self, stage, path, *args, **kwargs):
        if stage and path_isin(path, stage.repo.root_dir):
            path = relpath(path, stage.wdir)

        super().__init__(stage, path, *args, **kwargs)
        if self.is_in_repo and self.repo and is_working_tree(self.repo.tree):
            self.tree = self.repo.tree
Beispiel #6
0
 def work_tree(self):
     # When using repo.brancher, repo.tree may change to/from WorkingTree to
     # GitTree arbitarily. When repo.tree is GitTree, local cache needs to
     # use its own WorkingTree instance.
     if self.repo and not is_working_tree(self.repo.tree):
         return self._work_tree
     return None
Beispiel #7
0
 def tree(self, tree):
     if is_working_tree(tree) or tree.tree_root == self.root_dir:
         root = None
     else:
         root = self.root_dir
     self._tree = (tree if isinstance(tree, CleanTree) else CleanTree(
         tree, root))
     # Our graph cache is no longer valid, as it was based on the previous
     # tree.
     self._reset()
Beispiel #8
0
    def _save_file(
        self, path_info, checksum, save_link=True, tree=None, **kwargs
    ):
        assert checksum

        cache_info = self.checksum_to_path_info(checksum)
        if tree:
            if self.changed_cache(checksum):
                with tree.open(path_info, mode="rb") as fobj:
                    # if tree has fetch enabled, DVC out will be fetched on
                    # open and we do not need to read/copy any data
                    if not (
                        tree.isdvc(path_info, strict=False) and tree.fetch
                    ):
                        self.tree.copy_fobj(fobj, cache_info)
                callback = kwargs.get("download_callback")
                if callback:
                    callback(1)
        else:
            if self.changed_cache(checksum):
                self.tree.move(path_info, cache_info, mode=self.CACHE_MODE)
                self.link(cache_info, path_info)
            elif self.tree.iscopy(path_info) and self._cache_is_copy(
                path_info
            ):
                # Default relink procedure involves unneeded copy
                self.unprotect(path_info)
            else:
                self.tree.remove(path_info)
                self.link(cache_info, path_info)

            if save_link:
                self.state.save_link(path_info)

        # we need to update path and cache, since in case of reflink,
        # or copy cache type moving original file results in updates on
        # next executed command, which causes md5 recalculation
        if not tree or is_working_tree(tree):
            self.state.save(path_info, checksum)
        self.state.save(cache_info, checksum)
        return {self.PARAM_CHECKSUM: checksum}
Beispiel #9
0
    def _unprotect_dir(self, path):
        assert is_working_tree(self.repo.tree)

        for fname in self.repo.tree.walk_files(path):
            RemoteLOCAL._unprotect_file(fname)
Beispiel #10
0
    def walk_files(self, path_info):
        assert is_working_tree(self.repo.tree)

        for fname in self.repo.tree.walk_files(path_info):
            yield PathInfo(fname)
Beispiel #11
0
 def get_rev(self):
     if is_working_tree(self.tree):
         return self.scm.get_rev()
     if hasattr(self.tree, "tree"):
         return self.tree.tree.rev
     return self.tree.rev
Beispiel #12
0
 def exists(self, path_info):
     assert is_working_tree(self.repo.tree)
     assert isinstance(path_info, str) or path_info.scheme == "local"
     return self.repo.tree.exists(fspath_py35(path_info))