def from_bytes(cls, data: bytes, fs_cache: Optional[dict] = None): from dvc.fs import get_fs_cls from dvc.fs.repo import RepoFileSystem try: dict_ = pickle.loads(data) except pickle.PickleError as exc: raise ObjectFormatError("ReferenceHashFile is corrupted") from exc try: fs_path = dict_[cls.PARAM_PATH] hash_info = dict_[cls.PARAM_HASH] except KeyError as exc: raise ObjectFormatError("ReferenceHashFile is corrupted") from exc scheme, config_pairs = dict_.get(cls.PARAM_FS_CONFIG) fs = fs_cache.get((scheme, config_pairs)) if fs_cache else None if not fs: config = dict(config_pairs) if RepoFileSystem.PARAM_REPO_URL in config: fs = RepoFileSystem(**config) fs_path = fs.path.join(fs.root_dir, fs_path) else: fs_cls = get_fs_cls(config, scheme=scheme) fs = fs_cls(**config) return ReferenceHashFile( fs_path, fs, hash_info, checksum=dict_.get(cls.PARAM_CHECKSUM), )
def check(self, odb: "ObjectDB", check_hash: bool = True): assert self.fs if not self.fs.exists(self.fs_path): raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), self.fs_path) if self.checksum != self._get_checksum(): raise ObjectFormatError(f"{self} is changed") if check_hash: self._check_hash(odb)
def load(cls, odb, hash_info): obj = odb.get(hash_info) try: with obj.fs.open(obj.fs_path, "r") as fobj: raw = json.load(fobj) except ValueError as exc: raise ObjectFormatError(f"{obj} is corrupted") from exc if not isinstance(raw, list): logger.error( "dir cache file format error '%s' [skipping the file]", obj.fs_path, ) raise ObjectFormatError(f"{obj} is corrupted") tree = cls.from_list(raw) tree.fs_path = obj.fs_path tree.fs = obj.fs tree.hash_info = hash_info return tree
def _check_hash(self, odb): from dvc.data.stage import get_file_hash from dvc.objects.errors import ObjectFormatError _, actual = get_file_hash(self.fs_path, self.fs, self.hash_info.name, odb.state) logger.trace( "cache '%s' expected '%s' actual '%s'", self.fs_path, self.hash_info, actual, ) assert actual.name == self.hash_info.name if actual.value.split(".")[0] != self.hash_info.value.split(".")[0]: raise ObjectFormatError(f"{self} is corrupted")
def to_bytes(self): # NOTE: dumping reference FS's this way is insecure, as the # fully parsed remote FS config will include credentials # # ReferenceHashFiles should currently only be serialized in # memory and not to disk fs_path = self.fs_path dict_ = { self.PARAM_PATH: fs_path, self.PARAM_HASH: self.hash_info, self.PARAM_CHECKSUM: self.checksum, self.PARAM_FS_CONFIG: self.config_tuple(self.fs), } try: return pickle.dumps(dict_) except pickle.PickleError as exc: raise ObjectFormatError(f"Could not pickle {self}") from exc