def addfile(self, tarinfo, fileobj=None): """ Source copied from tarfile.TarFile, except for setting tarinfo.offset and tarinfo.offset_data. """ self._check("awx") tarinfo = copy.copy(tarinfo) # NEW LINE tarinfo.offset = self.offset buf = tarinfo.tobuf(self.format, self.encoding, self.errors) self.fileobj.write(buf) self.offset += len(buf) # NEW LINE tarinfo.offset_data = self.offset # If there's data to follow, append it. if fileobj is not None: tarfile.copyfileobj(fileobj, self.fileobj, tarinfo.size) blocks, remainder = divmod(tarinfo.size, tarfile.BLOCKSIZE) if remainder > 0: self.fileobj.write(tarfile.NUL * (tarfile.BLOCKSIZE - remainder)) blocks += 1 self.offset += blocks * tarfile.BLOCKSIZE self.members.append(tarinfo)
def _addfile(self, armember, fileobj=None): if self.mode == 'r': raise IOError("File not open for writing") armember = copy.copy(armember) hdr = armember.getheader() assert len(hdr) == 60, "Invalid header length" self._fileobj.write(hdr) if fileobj is None: fileobj = open(armember.name, 'rb') copyfileobj(fileobj, self._fileobj, armember.size) self._fileobj.write(armember.getpadding()) self.members.append(armember) self.members_dict[armember.name] = armember
def extract(self, member, path=None): """ Extracts an archive member to specified path or current working directory if path is None. If path is directory, extract into it, else use specified name as file name.""" m = self.extractfile(member) if path is None: path = '.' if os.path.isdir(path): path = os.path.join(path, m.name) fd = open(path, 'wb') m.seek(0) copyfileobj(m, fd, m.size) fd.close()
def _untar(path, fname, delete=True, flatten=False): """ Unpack the given archive file to the same directory. :param str path: The folder containing the archive. Will contain the contents. :param str fname: The filename of the archive file. :param bool delete: If true, the archive will be deleted after extraction. """ import tarfile logging.debug(f'unpacking {fname}') fullpath = os.path.join(path, fname) # very painfully manually extract files so that we can use PathManger.open # instead, lest we are using fb internal file services with tarfile.open(fileobj=PathManager.open(fullpath, 'rb')) as tf: for item in tf: item_name = item.name while item_name.startswith("./"): # internal file systems will actually create a literal "." # directory, so we gotta watch out for that item_name = item_name[2:] if flatten: # flatten the tar file if there are subdirectories fn = os.path.join(path, os.path.split(item_name)[-1]) else: fn = os.path.join(path, item_name) logging.debug(f"Extracting to {fn}") if item.isdir(): PathManager.mkdirs(fn) elif item.isfile(): with PathManager.open(fn, 'wb') as wf, tf.extractfile( item.name) as rf: tarfile.copyfileobj(rf, wf) else: raise NotImplementedError( "No support for symlinks etc. right now.") if delete: try: PathManager.rm(fullpath) except PermissionError: logging.error( f"Tried to delete {fullpath} but got a permission error. This " "is known to happen in Windows and is probably not fatal.")
def append_file_chunk(self, fileobj, chunk_size): """ Appends a chunk to the current active filestream started with open_filestream """ self._check("aw") if not self.filestream_active: raise FileStreamException("Cannot append file chunk without an "\ "active filestream. Start a filestream "\ "first.") if fileobj is None: raise FileStreamException("fileobj cannot be None.") tarfile.copyfileobj(fileobj, self.fileobj, chunk_size) self.offset += chunk_size self.filestream_sum += chunk_size
def _untar(path, fname, delete=True): """ Unpack the given archive file to the same directory. :param str path: The folder containing the archive. Will contain the contents. :param str fname: The filename of the archive file. :param bool delete: If true, the archive will be deleted after extraction. """ import tarfile logging.debug(f'unpacking {fname}') fullpath = os.path.join(path, fname) # very painfully manually extract files so that we can use PathManger.open # instead, lest we are using fb internal file services with tarfile.open(fileobj=PathManager.open(fullpath, 'rb')) as tf: for item in tf: item_name = item.name while item_name.startswith("./"): # internal file systems will actually create a literal "." # directory, so we gotta watch out for that item_name = item_name[2:] fn = os.path.join(path, item_name) logging.debug(f"Extracting to {fn}") if item.isdir(): PathManager.mkdirs(fn) elif item.isfile(): with PathManager.open(fn, 'wb') as wf, tf.extractfile( item.name) as rf: tarfile.copyfileobj(rf, wf) else: raise NotImplementedError( "No support for symlinks etc. right now.") if delete: PathManager.rm(fullpath)
def copy_from_container(src, dst, buffer_size): container_name, container_path = src.split(":") container = docker.from_env().containers.get(container_name) tar_stream, tar_info = container.get_archive(container_path) tar_bytes = b'' for bytestream in tar_stream: tar_bytes += bytestream tar = io.BytesIO(tar_bytes) archives = tarfile.open(fileobj=tar, mode='r|*', bufsize=buffer_size) with closing(archives) as tar_open: for item in tar_open: if os.path.isfile(dst): dst_path = dst else: dst_path = os.path.join(dst, item.name) with open(dst_path, 'wb') as dst_file: blocks, left = divmod(item.size, buffer_size) for block in range(blocks): tarfile.copyfileobj(tar_open.fileobj, dst_file, buffer_size) if left > 0: tarfile.copyfileobj(tar_open.fileobj, dst_file, left)
def _save_file(self, directory, tar_file, file_member): file_name = os.path.join(directory, file_member.name.split("/")[-1]) self.log.debug("Saving file %s" % file_name) source = tar_file.extractfile(file_member) tarfile.copyfileobj(source, codecs.open(file_name, "w", "utf-8"))
def update_event(self, inp=-1): self.set_output_val(0, tarfile.copyfileobj(self.input(0), self.input(1), self.input(2), self.input(3), self.input(4)))