Exemple #1
0
    def save(self):
        if not self.exists:
            raise self.DoesNotExistError(self)

        if not self.isfile and not self.isdir:
            raise self.IsNotFileOrDirError(self)

        if self.is_empty:
            logger.warning(f"'{self}' is empty.")

        self.ignore()

        if self.metric or self.plot:
            self.verify_metric()

        if not self.use_cache:
            _, self.meta, obj = ostage(
                self.repo.odb.local,
                self.fs_path,
                self.fs,
                self.fs.PARAM_CHECKSUM,
                ignore=self.dvcignore,
                dry_run=True,
            )
            self.hash_info = obj.hash_info
            if not self.IS_DEPENDENCY:
                logger.debug(
                    "Output '%s' doesn't use cache. Skipping saving.", self
                )
            return

        assert not self.IS_DEPENDENCY

        _, self.meta, self.obj = ostage(
            self.odb,
            self.fs_path,
            self.fs,
            self.odb.fs.PARAM_CHECKSUM,
            ignore=self.dvcignore,
        )
        self.hash_info = self.obj.hash_info
Exemple #2
0
Fichier : diff.py Projet : jear/dvc
def _output_paths(repo, targets):
    from dvc.fs import LocalFileSystem
    from dvc_data.stage import stage as ostage

    on_working_fs = isinstance(repo.fs, LocalFileSystem)

    def _exists(output):
        if on_working_fs:
            return output.exists
        return True

    def _to_path(output):
        relparts = output.fs.path.relparts(output.fs_path)
        base = os.path.join(*relparts)
        if output.is_dir_checksum:
            return os.path.join(base, "")
        return base

    for output in repo.index.outs:
        if _exists(output):
            yield_output = targets is None or any(
                output.fs.path.isin_or_eq(output.fs_path, target)
                for target in targets)

            if on_working_fs:
                _, _, obj = ostage(
                    repo.odb.local,
                    output.fs_path,
                    repo.odb.local.fs,
                    "md5",
                    dry_run=True,
                    ignore=output.dvcignore,
                )
                hash_info = obj.hash_info
            else:
                hash_info = output.hash_info
                obj = output.get_obj()

            if yield_output:
                yield _to_path(output), hash_info.value

            if not obj:
                continue

            if output.is_dir_checksum and (yield_output or any(
                    output.fs.path.isin(target, output.fs_path)
                    for target in targets)):
                yield from _dir_output_paths(output.fs, output.fs_path, obj,
                                             targets)
Exemple #3
0
 def get_hash(self):
     if self.use_cache:
         odb = self.odb
         name = self.odb.fs.PARAM_CHECKSUM
     else:
         odb = self.repo.odb.local
         name = self.fs.PARAM_CHECKSUM
     _, _, obj = ostage(
         odb,
         self.fs_path,
         self.fs,
         name,
         ignore=self.dvcignore,
         dry_run=not self.use_cache,
     )
     return obj.hash_info
Exemple #4
0
    def transfer(self,
                 source,
                 odb=None,
                 jobs=None,
                 update=False,
                 no_progress_bar=False):
        if odb is None:
            odb = self.odb

        cls, config, from_info = get_cloud_fs(self.repo, url=source)
        from_fs = cls(**config)

        # When running import-url --to-remote / add --to-remote/-o ... we
        # assume that it is unlikely that the odb will contain majority of the
        # hashes, so we transfer everything as is (even if that file might
        # already be in the cache) and don't waste an upload to scan the layout
        # of the source location. But when doing update --to-remote, there is
        # a high probability that the odb might contain some of the hashes, so
        # we first calculate all the hashes (but don't transfer anything) and
        # then only update the missing cache files.

        upload = not (update and from_fs.isdir(from_info))
        jobs = jobs or min((from_fs.jobs, odb.fs.jobs))
        staging, self.meta, obj = ostage(
            odb,
            from_info,
            from_fs,
            "md5",
            upload=upload,
            no_progress_bar=no_progress_bar,
        )
        otransfer(
            staging,
            odb,
            {obj.hash_info},
            jobs=jobs,
            hardlink=False,
            shallow=False,
        )

        self.hash_info = obj.hash_info
        return obj
Exemple #5
0
 def _commit_granular_dir(self, filter_info):
     prefix = self.fs.path.parts(
         self.fs.path.relpath(filter_info, self.fs_path)
     )
     staging, _, save_obj = ostage(
         self.odb,
         self.fs_path,
         self.fs,
         self.odb.fs.PARAM_CHECKSUM,
         ignore=self.dvcignore,
     )
     save_obj = save_obj.filter(prefix)
     checkout_obj = save_obj.get(self.odb, prefix)
     otransfer(
         staging,
         self.odb,
         {save_obj.hash_info} | {oid for _, _, oid in save_obj},
         shallow=True,
         hardlink=True,
     )
     return checkout_obj
Exemple #6
0
    def commit(self, filter_info=None):
        if not self.exists:
            raise self.DoesNotExistError(self)

        assert self.hash_info

        if self.use_cache:
            granular = (
                self.is_dir_checksum
                and filter_info
                and filter_info != self.fs_path
            )
            if granular:
                obj = self._commit_granular_dir(filter_info)
            else:
                staging, _, obj = ostage(
                    self.odb,
                    filter_info or self.fs_path,
                    self.fs,
                    self.odb.fs.PARAM_CHECKSUM,
                    ignore=self.dvcignore,
                )
                otransfer(
                    staging,
                    self.odb,
                    {obj.hash_info},
                    shallow=False,
                    hardlink=True,
                )
            self._checkout(
                filter_info or self.fs_path,
                self.fs,
                obj,
                self.odb,
                relink=True,
                ignore=self.dvcignore,
                state=self.repo.state,
                prompt=prompt.confirm,
            )
            self.set_exec()