Exemple #1
0
    def __init__(
        self,
        stage,
        path,
        info=None,
        remote=None,
        cache=True,
        metric=False,
        persist=False,
    ):
        self._validate_output_path(path)
        # This output (and dependency) objects have too many paths/urls
        # here is a list and comments:
        #
        #   .def_path - path from definition in stage file
        #   .path_info - PathInfo/URLInfo structured resolved path
        #   .fspath - local only, resolved
        #   .__str__ - for presentation purposes, def_path/relpath
        #
        # By resolved path, which contains actual location,
        # should be absolute and don't contain remote:// refs.
        self.stage = stage
        self.repo = stage.repo if stage else None
        self.def_path = path
        self.info = info
        self.remote = remote or self.REMOTE(self.repo, {})
        self.use_cache = False if self.IS_DEPENDENCY else cache
        self.metric = False if self.IS_DEPENDENCY else metric
        self.persist = persist

        self.path_info = self._parse_path(remote, path)
        if self.use_cache and self.cache is None:
            raise RemoteCacheRequiredError(self.path_info)
Exemple #2
0
    def __init__(
        self,
        stage,
        path,
        info=None,
        cache=True,
        metric=False,
        plot=False,
        persist=False,
        checkpoint=False,
        live=False,
        desc=None,
        isexec=False,
        remote=None,
    ):
        self.repo = stage.repo if stage else None

        fs_cls, fs_config, path_info = get_cloud_fs(self.repo, url=path)
        self.fs = fs_cls(**fs_config)

        if (
            self.fs.scheme == "local"
            and stage
            and path_isin(path, stage.repo.root_dir)
        ):
            self.def_path = relpath(path, stage.wdir)
        else:
            self.def_path = path

        self._validate_output_path(path, stage)
        # This output (and dependency) objects have too many paths/urls
        # here is a list and comments:
        #
        #   .def_path - path from definition in DVC file
        #   .path_info - PathInfo/URLInfo structured resolved path
        #   .fspath - local only, resolved
        #   .__str__ - for presentation purposes, def_path/relpath
        #
        # By resolved path, which contains actual location,
        # should be absolute and don't contain remote:// refs.
        self.stage = stage
        self.hash_info = HashInfo.from_dict(info)
        self.use_cache = False if self.IS_DEPENDENCY else cache
        self.metric = False if self.IS_DEPENDENCY else metric
        self.plot = False if self.IS_DEPENDENCY else plot
        self.persist = persist
        self.checkpoint = checkpoint
        self.live = live
        self.desc = desc

        self.path_info = self._parse_path(self.fs, path_info)
        if self.use_cache and self.odb is None:
            raise RemoteCacheRequiredError(self.path_info)

        self.obj = None
        self.isexec = False if self.IS_DEPENDENCY else isexec

        self.remote = remote
Exemple #3
0
    def get_dir_checksum(self, path_info, tree=None):
        if not self.cache:
            raise RemoteCacheRequiredError(path_info)

        dir_info = self._collect_dir(path_info, tree=None)
        if tree:
            # don't save state entry for path_info if it is a tree path
            path_info = None
        return self._save_dir_info(dir_info, path_info)
Exemple #4
0
    def __init__(
        self,
        stage,
        path,
        info=None,
        fs=None,
        cache=True,
        metric=False,
        plot=False,
        persist=False,
        checkpoint=False,
        live=False,
        desc=None,
        isexec=False,
    ):
        self._validate_output_path(path, stage)
        # This output (and dependency) objects have too many paths/urls
        # here is a list and comments:
        #
        #   .def_path - path from definition in DVC file
        #   .path_info - PathInfo/URLInfo structured resolved path
        #   .fspath - local only, resolved
        #   .__str__ - for presentation purposes, def_path/relpath
        #
        # By resolved path, which contains actual location,
        # should be absolute and don't contain remote:// refs.
        self.stage = stage
        self.repo = stage.repo if stage else None
        self.def_path = path
        self.hash_info = HashInfo.from_dict(info)
        if fs:
            self.fs = fs
        else:
            self.fs = self.FS_CLS(self.repo, {})
        self.use_cache = False if self.IS_DEPENDENCY else cache
        self.metric = False if self.IS_DEPENDENCY else metric
        self.plot = False if self.IS_DEPENDENCY else plot
        self.persist = persist
        self.checkpoint = checkpoint
        self.live = live
        self.desc = desc

        self.path_info = self._parse_path(fs, path)
        if self.use_cache and self.odb is None:
            raise RemoteCacheRequiredError(self.path_info)

        self.obj = None
        self.isexec = False if self.IS_DEPENDENCY else isexec
        try:
            self.git_tracked_files = list(
                self.repo.scm.backends.backends["dulwich"]().repo.open_index())
            self.git_tracked_files = [
                os.path.abspath(each.decode("utf-8"))
                for each in self.git_tracked_files
            ]
        except (SCMError, AttributeError, NoIndexPresent):
            self.git_tracked_files = []
Exemple #5
0
    def get_dir_checksum(self, path_info):
        if not self.cache:
            raise RemoteCacheRequiredError(path_info)

        dir_info = self._collect_dir(path_info)
        checksum, tmp_info = self._get_dir_info_checksum(dir_info)
        new_info = self.cache.checksum_to_path_info(checksum)
        if self.cache.changed_cache_file(checksum):
            self.cache.makedirs(new_info.parent)
            self.cache.move(tmp_info, new_info, mode=self.CACHE_MODE)

        self.state.save(path_info, checksum)
        self.state.save(new_info, checksum)

        return checksum
Exemple #6
0
    def __init__(
        self,
        stage,
        path,
        info=None,
        tree=None,
        cache=True,
        metric=False,
        plot=False,
        persist=False,
        checkpoint=False,
        live=False,
        desc=None,
        isexec=False,
    ):
        self._validate_output_path(path, stage)
        # This output (and dependency) objects have too many paths/urls
        # here is a list and comments:
        #
        #   .def_path - path from definition in stage file
        #   .path_info - PathInfo/URLInfo structured resolved path
        #   .fspath - local only, resolved
        #   .__str__ - for presentation purposes, def_path/relpath
        #
        # By resolved path, which contains actual location,
        # should be absolute and don't contain remote:// refs.
        self.stage = stage
        self.repo = stage.repo if stage else None
        self.def_path = path
        self.hash_info = HashInfo.from_dict(info)
        if tree:
            self.tree = tree
        else:
            self.tree = self.TREE_CLS(self.repo, {})
        self.use_cache = False if self.IS_DEPENDENCY else cache
        self.metric = False if self.IS_DEPENDENCY else metric
        self.plot = False if self.IS_DEPENDENCY else plot
        self.persist = persist
        self.checkpoint = checkpoint
        self.live = live
        self.desc = desc

        self.path_info = self._parse_path(tree, path)
        if self.use_cache and self.cache is None:
            raise RemoteCacheRequiredError(self.path_info)

        self.isexec = False if self.IS_DEPENDENCY else isexec
Exemple #7
0
    def get_dir_hash(self, path_info, tree, **kwargs):
        if not self.cache:
            raise RemoteCacheRequiredError(path_info)

        dir_info = self._collect_dir(path_info, tree, **kwargs)
        return self._save_dir_info(dir_info, path_info)
Exemple #8
0
    def get_dir_checksum(self, path_info):
        if not self.cache:
            raise RemoteCacheRequiredError(path_info)

        dir_info = self._collect_dir(path_info)
        return self._save_dir_info(dir_info, path_info)
Exemple #9
0
 def odb(self):
     odb = getattr(self.repo.odb, self.scheme)
     if self.use_cache and odb is None:
         raise RemoteCacheRequiredError(self.fs.scheme, self.fs_path)
     return odb