def _clean_info( root: Optional[str], epoch: int, info: tarfile.TarInfo ) -> tarfile.TarInfo: """ Remove variable data from an archive entry. :param root: absolute path to the root directory from which the entry was added, or None to disable turning the name into a relative path :param epoch: fixed modification time to set :param info: tarinfo object to set :returns: changed tarinfo """ if root is not None: info.name = os.path.relpath("/" + info.name, root) if not info.name.startswith("."): info.name = "./" + info.name info.uid = 0 info.gid = 0 info.uname = "" info.gname = "" info.mtime = epoch return info
def tar_strip_file_attributes(tar_info: tarfile.TarInfo) -> tarfile.TarInfo: # set time to epoch timestamp 0, aka 00:00:00 UTC on 1 January 1970 # note that when extracting this tarfile, this time will be shown as the modified date tar_info.mtime = 0 # user/group info tar_info.uid = 0 tar_info.uname = "" tar_info.gid = 0 tar_info.gname = "" # stripping paxheaders may not be required # see https://stackoverflow.com/questions/34688392/paxheaders-in-tarball tar_info.pax_headers = {} return tar_info
def compute(self, conn, data=None): tarinfo = TarInfo() tarinfo.name = self.name tarinfo.mod = 0o700 tarinfo.uid = 0 tarinfo.gid = 0 tarinfo.type = REGTYPE tarinfo.linkname = "" if self.name == CONTAINER_PROPERTIES: meta = data or conn.container_get_properties(self.acct, self.ref) tarinfo.size = len(json.dumps(meta['properties'], sort_keys=True)) self._filesize = tarinfo.size self._buf = tarinfo.tobuf(format=PAX_FORMAT) return elif self.name == CONTAINER_MANIFEST: tarinfo.size = len(json.dumps(data, sort_keys=True)) self._filesize = tarinfo.size self._buf = tarinfo.tobuf(format=PAX_FORMAT) return entry = conn.object_get_properties(self.acct, self.ref, self.name) properties = entry['properties'] # x-static-large-object if properties.get(SLO, False): tarinfo.size = int(properties.get(SLO_SIZE)) _, slo = conn.object_fetch(self.acct, self.ref, self.name) self._slo = json.loads("".join(slo), object_pairs_hook=OrderedDict) else: tarinfo.size = int(entry['length']) self._filesize = tarinfo.size # XATTR # do we have to store basic properties like policy, ... ? for key, val in properties.items(): assert isinstance(val, basestring), \ "Invalid type for %s:%s:%s" % (self.acct, self.name, key) if self.slo and key in SLO_HEADERS: continue tarinfo.pax_headers[SCHILY + key] = val tarinfo.pax_headers['mime_type'] = entry['mime_type'] self._buf = tarinfo.tobuf(format=PAX_FORMAT)
def _reset_tarinfo( self, info: TarInfo, predicate: Optional[ArchiveAdapter.FileFilter], mtime: Optional[int], ) -> Optional[TarInfo]: if predicate is not None and not predicate(info.name): return None # remove user and group IDs as they are irrelevant for distribution and # may require subsequent `chown`ing on multi-tenant systems info.uid = 0 info.uname = "" info.gid = 0 info.gname = "" if mtime is not None: info.mtime = mtime return info
def compute(self, conn, data=None): tarinfo = TarInfo() tarinfo.name = self.name tarinfo.mod = 0o700 tarinfo.uid = 0 tarinfo.gid = 0 tarinfo.type = REGTYPE tarinfo.linkname = "" if self.name == CONTAINER_PROPERTIES: meta = data or conn.container_get_properties(self.acct, self.ref) tarinfo.size = len(json.dumps(meta['properties'], sort_keys=True)) self._filesize = tarinfo.size self._buf = tarinfo.tobuf(format=PAX_FORMAT) return elif self.name == CONTAINER_MANIFEST: tarinfo.size = len(json.dumps(data, sort_keys=True)) self._filesize = tarinfo.size self._buf = tarinfo.tobuf(format=PAX_FORMAT) return entry = conn.object_get_properties(self.acct, self.ref, self.name) properties = entry['properties'] # x-static-large-object if properties.get(SLO, False): tarinfo.size = int(properties.get(SLO_SIZE)) _, slo = conn.object_fetch(self.acct, self.ref, self.name, properties=False) self._slo = json.loads("".join(slo), object_pairs_hook=OrderedDict) self._checksums = {} # format MD5 to share same format as multi chunks object offset = 0 for idx, ck in enumerate(self._slo): self._checksums[idx] = { 'hash': ck['hash'].upper(), 'size': ck['bytes'], 'offset': offset } offset += ck['bytes'] else: tarinfo.size = int(entry['length']) meta, chunks = conn.object_locate(self.acct, self.ref, self.name, properties=False) storage_method = STORAGE_METHODS.load(meta['chunk_method']) chunks = _sort_chunks(chunks, storage_method.ec) for idx in chunks: chunks[idx] = chunks[idx][0] del chunks[idx]['url'] del chunks[idx]['score'] del chunks[idx]['pos'] self._checksums = chunks self._filesize = tarinfo.size # XATTR # do we have to store basic properties like policy, ... ? for key, val in properties.items(): assert isinstance(val, basestring), \ "Invalid type for %s:%s:%s" % (self.acct, self.name, key) if self.slo and key in SLO_HEADERS: continue tarinfo.pax_headers[SCHILY + key] = val tarinfo.pax_headers['mime_type'] = entry['mime_type'] self._buf = tarinfo.tobuf(format=PAX_FORMAT)
def _tar_filter(tar_info: tarfile.TarInfo) -> tarfile.TarInfo: tar_info.uid = 0 tar_info.gid = 0 return tar_info
def reset_tar_info(info: tarfile.TarInfo) -> tarfile.TarInfo: info.gid = 0 info.uid = 0 info.uname = 'root' info.gname = 'root' return info
def tar(host, backup, share, path): binary_stdout = stdout.buffer fbak = Fruitbak(confdir = Path('/dev/shm/conf')) backup = fbak[host][backup] if path is None: share, path = backup.locate_path(share) else: share = backup[share] def iterator(): for dentry in share.find(path): if dentry.is_file and not dentry.is_hardlink: yield from dentry.hashes with fbak.pool.agent().readahead(iterator()) as reader: for dentry in share.find(path): name = dentry.name or b'.' i = TarInfo(fsdecode(bytes(name))) i.mode = dentry.mode & 0o7777 i.uid = dentry.uid i.gid = dentry.gid i.mtime = dentry.mtime // 1000000000 if dentry.is_hardlink: i.type = LNKTYPE hardlink = dentry.hardlink or b'.' i.linkname = fsdecode(bytes(hardlink)) elif dentry.is_file: i.type = REGTYPE i.size = dentry.size elif dentry.is_symlink: i.type = SYMTYPE i.linkname = fsdecode(bytes(dentry.symlink)) elif dentry.is_chardev: i.type = CHRTYPE i.devmajor = dentry.major i.devminor = dentry.minor elif dentry.is_blockdev: i.type = BLKTYPE i.devmajor = dentry.major i.devminor = dentry.minor elif dentry.is_directory: i.type = DIRTYPE elif dentry.is_fifo: i.type = FIFOTYPE else: continue binary_stdout.write(i.tobuf(GNU_FORMAT)) if dentry.is_file and not dentry.is_hardlink: for hash in dentry.hashes: action = next(reader) if action.exception: raise action.exception[1] binary_stdout.write(action.value) padding = -i.size % BLOCKSIZE if padding: binary_stdout.write(bytes(padding)) binary_stdout.write(b'\0' * (BLOCKSIZE*2))