def save(self): if not self.exists: raise self.DoesNotExistError(self) if not self.isfile and not self.isdir: raise self.IsNotFileOrDirError(self) if self.is_empty: logger.warning(f"'{self}' is empty.") self.ignore() if self.metric or self.plot: self.verify_metric() if not self.use_cache: _, self.meta, obj = ostage( self.repo.odb.local, self.fs_path, self.fs, self.fs.PARAM_CHECKSUM, ignore=self.dvcignore, dry_run=True, ) self.hash_info = obj.hash_info if not self.IS_DEPENDENCY: logger.debug( "Output '%s' doesn't use cache. Skipping saving.", self ) return assert not self.IS_DEPENDENCY _, self.meta, self.obj = ostage( self.odb, self.fs_path, self.fs, self.odb.fs.PARAM_CHECKSUM, ignore=self.dvcignore, ) self.hash_info = self.obj.hash_info
def _output_paths(repo, targets): from dvc.fs import LocalFileSystem from dvc_data.stage import stage as ostage on_working_fs = isinstance(repo.fs, LocalFileSystem) def _exists(output): if on_working_fs: return output.exists return True def _to_path(output): relparts = output.fs.path.relparts(output.fs_path) base = os.path.join(*relparts) if output.is_dir_checksum: return os.path.join(base, "") return base for output in repo.index.outs: if _exists(output): yield_output = targets is None or any( output.fs.path.isin_or_eq(output.fs_path, target) for target in targets) if on_working_fs: _, _, obj = ostage( repo.odb.local, output.fs_path, repo.odb.local.fs, "md5", dry_run=True, ignore=output.dvcignore, ) hash_info = obj.hash_info else: hash_info = output.hash_info obj = output.get_obj() if yield_output: yield _to_path(output), hash_info.value if not obj: continue if output.is_dir_checksum and (yield_output or any( output.fs.path.isin(target, output.fs_path) for target in targets)): yield from _dir_output_paths(output.fs, output.fs_path, obj, targets)
def get_hash(self): if self.use_cache: odb = self.odb name = self.odb.fs.PARAM_CHECKSUM else: odb = self.repo.odb.local name = self.fs.PARAM_CHECKSUM _, _, obj = ostage( odb, self.fs_path, self.fs, name, ignore=self.dvcignore, dry_run=not self.use_cache, ) return obj.hash_info
def transfer(self, source, odb=None, jobs=None, update=False, no_progress_bar=False): if odb is None: odb = self.odb cls, config, from_info = get_cloud_fs(self.repo, url=source) from_fs = cls(**config) # When running import-url --to-remote / add --to-remote/-o ... we # assume that it is unlikely that the odb will contain majority of the # hashes, so we transfer everything as is (even if that file might # already be in the cache) and don't waste an upload to scan the layout # of the source location. But when doing update --to-remote, there is # a high probability that the odb might contain some of the hashes, so # we first calculate all the hashes (but don't transfer anything) and # then only update the missing cache files. upload = not (update and from_fs.isdir(from_info)) jobs = jobs or min((from_fs.jobs, odb.fs.jobs)) staging, self.meta, obj = ostage( odb, from_info, from_fs, "md5", upload=upload, no_progress_bar=no_progress_bar, ) otransfer( staging, odb, {obj.hash_info}, jobs=jobs, hardlink=False, shallow=False, ) self.hash_info = obj.hash_info return obj
def _commit_granular_dir(self, filter_info): prefix = self.fs.path.parts( self.fs.path.relpath(filter_info, self.fs_path) ) staging, _, save_obj = ostage( self.odb, self.fs_path, self.fs, self.odb.fs.PARAM_CHECKSUM, ignore=self.dvcignore, ) save_obj = save_obj.filter(prefix) checkout_obj = save_obj.get(self.odb, prefix) otransfer( staging, self.odb, {save_obj.hash_info} | {oid for _, _, oid in save_obj}, shallow=True, hardlink=True, ) return checkout_obj
def commit(self, filter_info=None): if not self.exists: raise self.DoesNotExistError(self) assert self.hash_info if self.use_cache: granular = ( self.is_dir_checksum and filter_info and filter_info != self.fs_path ) if granular: obj = self._commit_granular_dir(filter_info) else: staging, _, obj = ostage( self.odb, filter_info or self.fs_path, self.fs, self.odb.fs.PARAM_CHECKSUM, ignore=self.dvcignore, ) otransfer( staging, self.odb, {obj.hash_info}, shallow=False, hardlink=True, ) self._checkout( filter_info or self.fs_path, self.fs, obj, self.odb, relink=True, ignore=self.dvcignore, state=self.repo.state, prompt=prompt.confirm, ) self.set_exec()