def __init__(self, path: str): """Initialize TarSubdirStream. Args: path (str): Specified path of the subdirectory on Blob Storage. Must refer to a subdirectory path within a .tar.gz file. """ from codalab.worker.file_util import OpenIndexedArchiveFile from codalab.worker.download_util import compute_target_info_blob_descendants_flat self.linked_bundle_path = parse_linked_bundle_url(path) # We add OpenIndexedArchiveFile to self._stack so that the context manager remains open and is exited # only in the method self.close(). with ExitStack() as stack: self.tf = stack.enter_context( OpenIndexedArchiveFile(self.linked_bundle_path.bundle_path)) self._stack = stack.pop_all() # Keep track of descendants of the specified subdirectory and the current descendant self.descendants = compute_target_info_blob_descendants_flat(path) self.current_desc = CurrentDescendant(desc=None, pos=0, finfo=EmptyFileInfo, tinfo=tarfile.TarInfo()) # Buffer that stores the underlying bytes of the output tar archive self._buffer = BytesBuffer() # Output tar archive self.output = tarfile.open(fileobj=self._buffer, mode="w:")
def __init__(self, tf: SQLiteIndexedTar, finfo: FileInfo): """Initialize TarFileStream. Args: tf (SQLiteIndexedTar): Tar archive indexed by ratarmount. finfo (FileInfo): FileInfo object describing the file that is to be read from the aforementioned tar archive. """ self.tf = tf self.finfo = finfo self._buffer = BytesBuffer() self.pos = 0
def test_zip_to_tar_read_byte_by_byte(self): """Test converting a zip to a tar stream, while reading the input fileobj and the output ZipToTarStream byte-by-byte (so that the final tar archive is also assembled byte-by-byte).""" for (name, zip_contents) in [ ("single file", self.create_zip_single_file()), ("complex file", self.create_zip_complex()), ]: with self.subTest(name=name): expected_tar_contents = ZipToTarStream( BytesIO(zip_contents)).read() buf = BytesBuffer() buf.write(zip_contents) zts = ZipToTarStream(ReadOneByOne(buf)) out = BytesBuffer() while True: chunk = zts.read(1) if not chunk: break out.write(chunk) self.assertEqual(out.read(), expected_tar_contents)
def __init__(self, client, path, mime_type='application/octet-stream'): self._client = client self._path = path self._container, self._blob = parse_azfs_path(path) self._content_settings = ContentSettings(mime_type) self._blob_to_upload = self._client.get_blob_client( self._container, self._blob) self.block_number = 1 self.buffer = BytesBuffer() self.block_list = []
def test_unseekable_file_read_partially(self): """Unseekable file can be read partially. Read a file within the archive byte by byte.""" zip_contents = self.create_zip_single_file() buf = BytesBuffer() buf.write(zip_contents) with StreamingZipFile(buf) as zf: for zinfo in zf: self.assertEqual(zinfo.filename, "file.txt") self.assertEqual(zinfo.file_size, 11) with zf.open(zinfo) as f: self.assertEqual(f.read(1), b"h") self.assertEqual(f.read(1), b"e") self.assertEqual(f.read(1), b"l") self.assertEqual(f.read(1), b"l") self.assertEqual(f.read(1), b"o") self.assertEqual(f.read(1), b" ") self.assertEqual(f.read(1), b"w") self.assertEqual(f.read(1), b"o") self.assertEqual(f.read(1), b"r") self.assertEqual(f.read(1), b"l") self.assertEqual(f.read(1), b"d") pass
def __init__(self, fileobj: IO[bytes]): self.__input = fileobj self.__buffer = BytesBuffer() self.__gzip = gzip.GzipFile(None, mode='wb', fileobj=self.__buffer)