def iter_tar_members(tp: tarfile.TarFile) -> Generator[tarfile.TarInfo, None, None]: """ This is a lazy alternative to TarInfo.getmembers() that only reads one tar item at a time. We're reading the MTL file, which is almost always the first entry in the tar, and then closing it, so we're avoiding skipping through the entirety of the tar. """ member = tp.next() while member is not None: yield member member = tp.next()
def main(argv): import getopt def usage(): print('usage: %s [-b basedir] cmd [arg ...]' % argv[0]) return 100 try: (opts, args) = getopt.getopt(argv[1:], 'db:') except getopt.GetoptError: return usage() debug = 0 basedir = 'tar' for (k, v) in opts: if k == '-d': debug += 1 elif k == '-b': basedir = v tardb = TarDB(basedir) if not args: return usage() cmd = args.pop(0) if cmd == 'create': tardb.create() elif cmd == 'import': tardb.open() for path in args: tar = TarFile(path) while True: info = tar.next() if info is None: break fp = tar.fileobj fp.seek(info.offset+BLOCKSIZE) data = fp.read(info.size) tardb.add_record(info, data) tardb.flush() tardb.close() elif cmd == 'add': tardb.open() for path in args: name = os.path.basename(path) info = TarInfo(name) with open(path, 'rb') as fp: data = fp.read() recno = tardb.add_record(info, data) print(recno) tardb.close() elif cmd == 'get': tardb.open() for recno in args: recno = int(recno) (_, data) = tardb.get_recinfo(recno, True) sys.stdout.buffer.write(data) tardb.close() elif cmd == 'getinfo': tardb.open() for recno in args: recno = int(recno) (info, _) = tardb.get_recinfo(recno, False) print(info) tardb.close() else: return usage() return 0
def _open_archive_file(self, archive: TarFile, name: str) -> IO[bytes]: while True: member = archive.next() if member is None: break if member.name == name: fobj = archive.extractfile(member) if fobj is None: break return fobj # noinspection PyProtectedMember raise ObjectDoesNotExistError(f'File {name} is missing in archive', self._file_storage._driver, archive.name)
def _open_archive_file(self, archive: TarFile, name: str) -> IO[bytes]: while True: member = archive.next() if member is None: break if member.name == name: fobj = archive.extractfile(member) if fobj is None: break return fobj raise ObjectDoesNotExistError( 'File {} is missing in archive'.format(name), self._file_storage._driver, archive.name)
def main(argv): import getopt def usage(): print('usage: %s [-b basedir] cmd [arg ...]' % argv[0]) return 100 try: (opts, args) = getopt.getopt(argv[1:], 'db:') except getopt.GetoptError: return usage() debug = 0 basedir = 'msg' for (k, v) in opts: if k == '-d': debug += 1 elif k == '-b': basedir = v if not args: return usage() cmd = args.pop(0) msgdb = MessageDB(basedir) if cmd == 'create': msgdb.create() elif cmd == 'import': msgdb.open() for path in args: tar = TarFile(path) while True: info = tar.next() if info is None: break fp = tar.fileobj fp.seek(info.offset+BLOCKSIZE) data = fp.read(info.size) recno = msgdb.add_file(gzip2bytes(data)) print(recno) msgdb.flush() msgdb.close() elif cmd == 'add': msgdb.open() for path in args: with open(path, 'r') as fp: data = fp.read() recno = msgdb.add_file(data) print(recno) msgdb.close() elif cmd == 'search': msgdb.open() for data in msgdb.search_text(args): print(rmsp(data)[:80]) msgdb.close() else: return usage() return 0
def read_binary_mask(self, tf: tarfile.TarFile) -> bm.BinaryMaskCollection: log: Optional[Log] = None masks: MutableSequence[bm.MaskData] = [] pixel_ticks: Optional[Mapping[Axes, ArrayLike[int]]] = None physical_ticks: Optional[Mapping[Coordinates, ArrayLike[Number]]] = None while True: tarinfo: Optional[tarfile.TarInfo] = tf.next() if tarinfo is None: break # wrap it in a BytesIO object to ensure we never seek backwards. extracted_fh = tf.extractfile(tarinfo) if extracted_fh is None: raise ValueError(f"Unable to extract file {tarinfo.name}") byte_stream = io.BytesIO(extracted_fh.read()) if tarinfo.name == v0_0.LOG_FILENAME: string_stream = codecs.getreader("utf-8")(byte_stream) log = Log.decode(string_stream.read()) elif tarinfo.name == v0_0.PIXEL_TICKS_FILE: pixel_ticks = pickle.load(byte_stream) elif tarinfo.name == v0_0.PHYSICAL_TICKS_FILE: physical_ticks = pickle.load(byte_stream) elif tarinfo.name.startswith(v0_0.MASK_PREFIX): mask_on_disk: v0_0.MaskOnDisk = pickle.load(byte_stream) if not isinstance(mask_on_disk, v0_0.MaskOnDisk): raise TypeError( f"mask does not conform to expected mask structure") masks.append( bm.MaskData(mask_on_disk.binary_mask, mask_on_disk.offsets, None)) else: warnings.warn( f"Unexpected file in binary mask collection {tarinfo.name}", DataFormatWarning) if pixel_ticks is None: raise ValueError("pixel coordinates not found") if physical_ticks is None: raise ValueError("physical coordinates not found") return bm.BinaryMaskCollection(pixel_ticks, physical_ticks, masks, log)
def get_site_and_version_from_backup(tar: tarfile.TarFile) -> Tuple[str, str]: """Get the first file of the tar archive. Expecting <site>/version symlink for validation reasons.""" site_tarinfo = tar.next() if site_tarinfo is None: raise Exception("Failed to detect version of backed up site.") try: sitename, version_name = site_tarinfo.name.split("/", 1) except ValueError: raise Exception("Failed to detect version of backed up site. " "Maybe the backup is from an incompatible version.") if version_name == "version": version = site_tarinfo.linkname.split("/")[-1] else: raise Exception("Failed to detect version of backed up site.") return sitename, version
def main(argv): import getopt def usage(): print('usage: %s [-b basedir] cmd [arg ...]' % argv[0]) return 100 try: (opts, args) = getopt.getopt(argv[1:], 'db:') except getopt.GetoptError: return usage() debug = 0 basedir = 'tar' for (k, v) in opts: if k == '-d': debug += 1 elif k == '-b': basedir = v tardb = TarDB(basedir) if not args: return usage() cmd = args.pop(0) if cmd == 'create': tardb.create() elif cmd == 'import': tardb.open() for path in args: tar = TarFile(path) while True: info = tar.next() if info is None: break fp = tar.fileobj fp.seek(info.offset + BLOCKSIZE) data = fp.read(info.size) tardb.add_record(info, data) tardb.flush() tardb.close() elif cmd == 'add': tardb.open() for path in args: name = os.path.basename(path) info = TarInfo(name) with open(path, 'rb') as fp: data = fp.read() recno = tardb.add_record(info, data) print(recno) tardb.close() elif cmd == 'get': tardb.open() for recno in args: recno = int(recno) (_, data) = tardb.get_recinfo(recno, True) sys.stdout.buffer.write(data) tardb.close() elif cmd == 'getinfo': tardb.open() for recno in args: recno = int(recno) (info, _) = tardb.get_recinfo(recno, False) print(info) tardb.close() else: return usage() return 0