def unpack(self): unpacked_files = [] if self.payload_format not in ['cpio', 'drpm']: return unpacked_files if self.compressor == 'bzip2': decompressor = bz2.BZ2Decompressor() payload = decompressor.decompress(self.data.payload) elif self.compressor == 'xz' or self.compressor == 'lzma': payload = lzma.decompress(self.data.payload) elif self.compressor == 'zstd': reader = zstandard.ZstdDecompressor().stream_reader( self.data.payload) payload = reader.read() else: payload = gzip.decompress(self.data.payload) if self.payload_format == 'drpm': out_labels = [] file_path = pathlib.Path('drpm') outfile_rel = self.rel_unpack_dir / file_path outfile_full = self.scan_environment.unpack_path(outfile_rel) os.makedirs(outfile_full.parent, exist_ok=True) outfile = open(outfile_full, 'wb') outfile.write(payload) outfile.close() fr = FileResult(self.fileresult, self.rel_unpack_dir / file_path, set(out_labels)) unpacked_files.append(fr) else: # write the payload to a temporary file first temporary_file = tempfile.mkstemp( dir=self.scan_environment.temporarydirectory) os.write(temporary_file[0], payload) os.fdopen(temporary_file[0]).close() payloadfile = temporary_file[1] payloadfile_full = self.scan_environment.unpack_path(payloadfile) # create a file result object and pass it to the CPIO unpacker fr = FileResult(self.fileresult, payloadfile, set([])) fr.set_filesize(len(payload)) # assuming that the CPIO data is always in "new ascii" format cpio_parser = cpio_unpack.CpioNewAsciiUnpackParser( fr, self.scan_environment, self.rel_unpack_dir, 0) try: cpio_parser.open() unpackresult = cpio_parser.parse_and_unpack() except UnpackParserException as e: raise UnpackParserException(e.args) finally: cpio_parser.close() # walk the results and add them. for i in unpackresult.unpacked_files: i.parent_path = self.fileresult.filename unpacked_files.append(i) return (unpacked_files)
def download_bz2(link, dest): print 'Downloading', link compressed = urllib2.urlopen(link) with open(dest, 'wb') as f: decompressor = bz2.BZ2Decompressor() for data in iter(lambda: compressed.read(100 * 1024), b''): f.write(decompressor.decompress(data))
def decompress(self, dat): if dat.startswith('BZ'): try: return bz2.BZ2Decompressor().decompress(dat) except IOError: pass return dat
def __init__(self, coders: List[Dict[str, Any]], size: int, crc: Optional[int]) -> None: self.input_size = size self.consumed = 0 # type: int self.crc = crc self.digest = None # type: Optional[int] filters = [] # type: List[Dict[str, Any]] try: for coder in coders: if coder['numinstreams'] != 1 or coder['numoutstreams'] != 1: raise UnsupportedCompressionMethodError('Only a simple compression method is currently supported.') filter = self.lzma_methods_map.get(coder['method'], None) if filter is not None: properties = coder.get('properties', None) if properties is not None: filters[:0] = [lzma._decode_filter_properties(filter, properties)] # type: ignore else: filters[:0] = [{'id': filter}] else: raise UnsupportedCompressionMethodError except UnsupportedCompressionMethodError as e: filter = self.alt_methods_map.get(coders[0]['method'], None) if len(coders) == 1 and filter is not None: if filter == self.FILTER_BZIP2: self.decompressor = bz2.BZ2Decompressor() # type: Union[bz2.BZ2Decompressor, lzma.LZMADecompressor] else: raise e self.can_partial_decompress = False else: raise e else: self.decompressor = lzma.LZMADecompressor(format=lzma.FORMAT_RAW, filters=filters) self.can_partial_decompress = True self.filters = filters
def _get_decoder(compression): if compression == 'zlib': try: import zlib except ImportError: raise ImportError( "Your Python does not have the zlib library, " "therefore the compressed block in this ASDF file " "can not be decompressed.") return zlib.decompressobj() elif compression == 'bzp2': try: import bz2 except ImportError: raise ImportError( "Your Python does not have the bz2 library, " "therefore the compressed block in this ASDF file " "can not be decompressed.") return bz2.BZ2Decompressor() elif compression == 'lz4': try: import lz4.block except ImportError: raise ImportError( "lz4 library in not installed in your Python environment, " "therefore the compressed block in this ASDF file " "can not be decompressed.") return Lz4Decompressor(lz4.block) else: raise ValueError("Unknown compression type: '{0}'".format(compression))
def extract(file): with open('sde.sqlite', 'wb') as newfile, open(file, 'rb') as file: decompressor = bz2.BZ2Decompressor() for data in iter(lambda: file.read(100 * 1024), b''): # noinspection PyArgumentList newfile.write(decompressor.decompress(data)) return True
def _decompressor_stream(self): dwnld_file = None compression = self.parameters.get('compression', False) fname, _ = self._url_to_fname_suffix(self.path, compression) decompressor = None if compression: if compression == 'gz': decompressor = zlib.decompressobj(16 + zlib.MAX_WBITS) elif compression == 'bz2': decompressor = bz2.BZ2Decompressor() elif compression == 'xz': decompressor = lzma.LZMADecompressor() # pylint: disable=no-member self.logger.debug("Using %s decompression" % compression) else: self.logger.debug("No compression specified.") def write(buff): if decompressor: buff = decompressor.decompress(buff) dwnld_file.write(buff) try: dwnld_file = open(fname, 'wb') yield (write, fname) finally: if dwnld_file: dwnld_file.close()
def _bunzip2_stream(fileobj, bufsize=1024): """Decompress gzipped data on the fly. :param fileobj: object supporting ``read()`` :param bufsize: number of bytes to read from *fileobj* at a time. This yields decompressed chunks; it does *not* split on lines. To get lines, wrap this in :py:func:`buffer_iterator_to_line_iterator`. This will replace :py:func:`bunzip2_stream` in v0.5.0 as part of an effort to be less line-based (see #715). """ if bz2 is None: raise Exception( 'bz2 module was not successfully imported (likely not installed).') d = bz2.BZ2Decompressor() while True: chunk = fileobj.read(bufsize) if not chunk: return parts = d.decompress(chunk) for part in parts: yield part
def _decompressor_stream(url, imgdir, decompress): fd = None decompressor = None fname, suffix = _url_to_fname_suffix(url, imgdir) if suffix == 'gz' and decompress: decompressor = zlib.decompressobj(16 + zlib.MAX_WBITS) elif suffix == 'bz2' and decompress: decompressor = bz2.BZ2Decompressor() elif suffix == 'xz' and decompress: decompressor = lzma.LZMADecompressor() else: # don't remove the file's real suffix fname = '%s.%s' % (fname, suffix) def write(buff): if decompressor: buff = decompressor.decompress(buff) fd.write(buff) try: fd = open(fname, 'wb') yield (write, fname) finally: if fd: fd.close()
def _decomp(self): is_proc_running = True input_list = [self.task_queue._reader.fileno()] while is_proc_running: inready, outread, errready = select.select(input_list, [], []) if self.task_queue._reader.fileno() in inready: input_task = self.task_queue.get() if input_task == Const.QUEUE_SUCCESS_MESSAGE: is_proc_running = False break (comp_type, comp_data) = input_task start = time.time() comp_string = "Unknown Compression Algorithm!" if comp_type == Const.COMPRESSION_LZMA: comp_string = "lzma" decompressor = lzma.LZMADecompressor() decomp_data = decompressor.decompress(comp_data) decomp_data += decompressor.flush() elif comp_type == Const.COMPRESSION_BZIP2: comp_string = "bzip2" decompressor = bz2.BZ2Decompressor() decomp_data = decompressor.decompress(comp_data) elif comp_type == Const.COMPRESSION_GZIP: comp_string = "gzip" decomp_data = zlib.decompress(comp_data, zlib.MAX_WBITS | 16) else: raise CompressionError("Not valid compression option") self.analysis_queue.put("B,D(%s),%5.3f" % (comp_string, time.time() - start)) self.output_queue.put(decomp_data) self.command_queue.put("Compressed processed everything")
def from_header(cls, header: bytes): if header == b'LZ00' and support['lzma']: return lzma.LZMADecompressor() elif header == b'BZ00' and support['bz2']: return bz2.BZ2Decompressor() else: return zlib.decompressobj()
def get(cls, algo: str): if algo == 'lzma' and support['lzma']: return lzma.LZMADecompressor() elif algo == 'bz2' and support['bz2']: return bz2.BZ2Decompressor() else: return zlib.decompressobj()
def extract(self, member, path=".", fileobj=None): """Extract and decompress `member` into `path` which defaults to the current directory.""" self.fileobj.seek(member._offset) decomp = bz2.BZ2Decompressor() if fileobj: data = self.fileobj.read(member.size) fileobj.write(decomp.decompress(data)) return dstpath = safe_join(path, member.name) dirname = os.path.dirname(dstpath) if not os.path.exists(dirname): os.makedirs(dirname) output = open(dstpath, "wb") toread = member.size while True: thisblock = min(128 * 1024, toread) block = self.fileobj.read(thisblock) if not block: break toread -= len(block) output.write(decomp.decompress(block)) output.close() os.chmod(dstpath, member.flags) return dstpath
class UrlZipReader: def __init__(self, _url): self._url = _url try: #self.fp = urllib2.urlopen(self._url) req = urllib2.Request(self._url) req.add_header( "User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.37 Safari/537.36" ) #req.add_header("User-agent", "Mozilla/5.0 (X11; U; FreeBSD i386; en-US; rv:1.7.8) Gecko/20050609 Firefox/1.0.4") req.add_header("Connection", "Keep-Alive") print req.headers self.fp = urllib2.urlopen(req) except urllib2.HTTPError, e: print e.code print e.msg return print self.fp.info() print self.fp.getcode() if self._url.endswith(".gz"): self.zip = zlib.decompressobj(31) elif self._url.endswith(".bz2"): self.zip = bz2.BZ2Decompressor() #self.zip = bz2.BZ2File(self.fp) elif self._url.endswith(".zip"): self.zip = zlib.decompressobj(15) else: self.zip = None
def bunzip2_stream(fileobj, bufsize=1024): """Decompress gzipped data on the fly. :param fileobj: object supporting ``read()`` :param bufsize: number of bytes to read from *fileobj* at a time. .. warning:: This yields decompressed chunks; it does *not* split on lines. To get lines, wrap this in :py:func:`to_lines`. """ if bz2 is None: raise Exception( 'bz2 module was not successfully imported (likely not installed).') d = bz2.BZ2Decompressor() while True: chunk = fileobj.read(bufsize) if not chunk: return part = d.decompress(chunk) if part: yield part
def decomp_overlayzip(overlay_path, outfilename): overlay_package = VMOverlayPackage(overlay_path) meta_raw = overlay_package.read_meta() meta_info = msgpack.unpackb(meta_raw) comp_overlay_files = meta_info[Const.META_OVERLAY_FILES] out_fd = open(outfilename, "w+b") for blob_info in comp_overlay_files: comp_filename = blob_info[Const.META_OVERLAY_FILE_NAME] comp_type = blob_info.get( Const.META_OVERLAY_FILE_COMPRESSION, Const.COMPRESSION_LZMA) if comp_type == Const.COMPRESSION_LZMA: comp_data = overlay_package.read_blob(comp_filename) decompressor = lzma.LZMADecompressor() decomp_data = decompressor.decompress(comp_data) decomp_data += decompressor.flush() out_fd.write(decomp_data) elif comp_type == Const.COMPRESSION_BZIP2: comp_data = overlay_package.read_blob(comp_filename) decompressor = bz2.BZ2Decompressor() decomp_data = decompressor.decompress(comp_data) out_fd.write(decomp_data) elif comp_type == Const.COMPRESSION_GZIP: comp_data = overlay_package.read_blob(comp_filename) decomp_data = zlib.decompress(comp_data, zlib.MAX_WBITS | 16) out_fd.write(decomp_data) else: raise CompressionError("Not valid compression option") out_fd.close() return meta_info
def _decomp(self): is_proc_running = True input_list = [self.task_queue._reader.fileno()] while is_proc_running: inready, outread, errready = select.select(input_list, [], []) if self.task_queue._reader.fileno() in inready: input_task = self.task_queue.get() if input_task == Const.QUEUE_SUCCESS_MESSAGE: is_proc_running = False break (comp_type, comp_data) = input_task if comp_type == Const.COMPRESSION_LZMA: decompressor = lzma.LZMADecompressor() decomp_data = decompressor.decompress(comp_data) decomp_data += decompressor.flush() elif comp_type == Const.COMPRESSION_BZIP2: decompressor = bz2.BZ2Decompressor() decomp_data = decompressor.decompress(comp_data) elif comp_type == Const.COMPRESSION_GZIP: decomp_data = zlib.decompress( comp_data, zlib.MAX_WBITS | 16) else: raise CompressionError("Not valid compression option") LOG.debug("%f\tdecompress one blob" % (time.time())) self.output_queue.put(decomp_data) self.command_queue.put("Compressed processed everything")
def _decompressor_stream(self): # pylint: disable=too-many-branches dwnld_file = None compression = False if 'images' in self.parameters and self.key in self.parameters[ 'images']: compression = self.parameters['images'][self.key].get( 'compression', False) else: if self.key == 'ramdisk': self.logger.debug( "Not decompressing ramdisk as can be used compressed.") else: compression = self.parameters[self.key].get( 'compression', False) fname, _ = self._url_to_fname_suffix(self.path, compression) if os.path.isdir(fname): raise JobError("Download '%s' is a directory, not a file" % fname) if os.path.exists(fname): os.remove(fname) decompressor = None if compression: if compression == 'gz': decompressor = zlib.decompressobj(16 + zlib.MAX_WBITS) elif compression == 'bz2': decompressor = bz2.BZ2Decompressor() elif compression == 'xz': decompressor = lzma.LZMADecompressor() # pylint: disable=no-member self.logger.debug("Using %s decompression" % compression) else: self.logger.debug("No compression specified.") def write(buff): if decompressor: try: buff = decompressor.decompress(buff) except EOFError as eof_exc: # EOFError can be raised when decompressing a bz2 archive # generated by pbzip2. If there is something in unsused_data # try to continue decompression. if compression == 'bz2' and decompressor.unused_data: buff = decompressor.unused_data else: error_message = str(eof_exc) self.logger.exception(error_message) raise JobError(error_message) except (IOError, lzma.error, zlib.error) as exc: # pylint: disable=no-member error_message = str(exc) self.logger.exception(error_message) raise JobError(error_message) dwnld_file.write(buff) try: with open(fname, 'wb') as dwnld_file: yield (write, fname) except (IOError, OSError) as exc: msg = "Unable to open %s: %s" % (fname, exc.strerror) self.logger.error(msg) raise InfrastructureError(msg)
def storage_write_bz2(self, data, session=None): self.mtda.debug(3, "main.storage_write_bz2()") self._check_expired(session) if self.sdmux_controller is None: result = -1 else: # Create a bz2 decompressor when called for the first time if self.bz2dec is None: self.bz2dec = bz2.BZ2Decompressor() cont = True start = time.monotonic() result = -1 while cont is True: # Decompress and write newly received data try: # Uncompress and write data result = self._storage_write_bz2(data) if result != 0: # Either got an error or needing more data; escape from # this loop to provide feedback cont = False else: # Check if this loop has been running for quite # some time, in which case we would to give our # client an update now = time.monotonic() if (now - start) >= self.fbintvl: cont = False # If we should continue and do not need more data # at this time, use an empty buffer for the next # iteration elif result == 0: data = b'' except EOFError: # Handle multi-streams: create a new decompressor and # we will start with data unused from the previous # decompressor data = self.bz2dec.unused_data self.bz2dec = bz2.BZ2Decompressor() cont = (len(data) > 0) # loop only if we have unused data result = 0 # we do not need more input data self.mtda.debug(3, "main.storage_write_bz2(): %s" % str(result)) return result
def data_for_op(op, out_file, old_file): args.payloadfile.seek(data_offset + op.data_offset) data = args.payloadfile.read(op.data_length) # assert hashlib.sha256(data).digest() == op.data_sha256_hash, 'operation data hash mismatch' if op.type == op.REPLACE_XZ: dec = lzma.LZMADecompressor() data = dec.decompress(data) out_file.seek(op.dst_extents[0].start_block * block_size) out_file.write(data) elif op.type == op.REPLACE_BZ: dec = bz2.BZ2Decompressor() data = dec.decompress(data) out_file.seek(op.dst_extents[0].start_block * block_size) out_file.write(data) elif op.type == op.REPLACE: out_file.seek(op.dst_extents[0].start_block * block_size) out_file.write(data) elif op.type == op.SOURCE_COPY: if not args.diff: print("SOURCE_COPY supported only for differential OTA") sys.exit(-2) out_file.seek(op.dst_extents[0].start_block * block_size) for ext in op.src_extents: old_file.seek(ext.start_block * block_size) data = old_file.read(ext.num_blocks * block_size) out_file.write(data) elif op.type == op.SOURCE_BSDIFF: if not args.diff: print("SOURCE_BSDIFF supported only for differential OTA") sys.exit(-3) out_file.seek(op.dst_extents[0].start_block * block_size) tmp_buff = io.BytesIO() for ext in op.src_extents: old_file.seek(ext.start_block * block_size) old_data = old_file.read(ext.num_blocks * block_size) tmp_buff.write(old_data) tmp_buff.seek(0) old_data = tmp_buff.read() tmp_buff.seek(0) tmp_buff.write(bsdiff4.patch(old_data, data)) n = 0 tmp_buff.seek(0) for ext in op.dst_extents: tmp_buff.seek(n * block_size) n += ext.num_blocks data = tmp_buff.read(ext.num_blocks * block_size) out_file.seek(ext.start_block * block_size) out_file.write(data) elif op.type == op.ZERO: for ext in op.dst_extents: out_file.seek(ext.start_block * block_size) out_file.write('\0' * ext.num_blocks * block_size) else: print("Unsupported type = %d" % op.type) sys.exit(-1) return data
def Parse(self, snbFile, metaOnly = False): # Read header vmbr = snbFile.read(44) (self.magic, self.rev80, self.revA3, self.revZ1, self.fileCount, self.vfatSize, self.vfatCompressed, self.binStreamSize, self.plainStreamSizeUncompressed, self.revZ2) = struct.unpack('>8siiiiiiiii', vmbr) # Read FAT self.vfat = zlib.decompress(snbFile.read(self.vfatCompressed)) self.ParseFile(self.vfat, self.fileCount) # Read tail snbFile.seek(-16, os.SEEK_END) #plainStreamEnd = snbFile.tell() tailblock = snbFile.read(16) (self.tailSize, self.tailOffset, self.tailMagic) = struct.unpack('>ii8s', tailblock) snbFile.seek(self.tailOffset) self.vTailUncompressed = zlib.decompress(snbFile.read(self.tailSize)) self.tailSizeUncompressed = len(self.vTailUncompressed) self.ParseTail(self.vTailUncompressed, self.fileCount) # Uncompress file data # Read files binPos = 0 plainPos = 0 uncompressedData = None for f in self.files: if f.attr & 0x41000000 == 0x41000000: # Compressed Files if uncompressedData == None: uncompressedData = "" for i in range(self.plainBlock): bzdc = bz2.BZ2Decompressor() if (i < self.plainBlock - 1): bSize = self.blocks[self.binBlock + i + 1].Offset - self.blocks[self.binBlock + i].Offset else: bSize = self.tailOffset - self.blocks[self.binBlock + i].Offset snbFile.seek(self.blocks[self.binBlock + i].Offset) try: data = snbFile.read(bSize) if len(data) < 32768: uncompressedData += bzdc.decompress(data) else: uncompressedData += data except Exception as e: print e if len(uncompressedData) != self.plainStreamSizeUncompressed: raise Exception() f.fileBody = uncompressedData[plainPos:plainPos+f.fileSize] plainPos += f.fileSize elif f.attr & 0x01000000 == 0x01000000: # Binary Files snbFile.seek(44 + self.vfatCompressed + binPos) f.fileBody = snbFile.read(f.fileSize) binPos += f.fileSize else: print f.attr, f.fileName raise Exception("Invalid file")
def decompress(self, data): """ Create a bz2 decompressor and decompress some data. :return: data as a bytes object. """ decompressor = bz2.BZ2Decompressor() data = decompressor.decompress(data) return data
def iter_decode(fileobj): """Iterate through decoded fragments of the file""" decompressor = bz2.BZ2Decompressor() for line in fileobj: try: yield decompressor.decompress(line) except EOFError: return
def __init__(self, fileobj): self._fileobj = fileobj self._bz2 = bz2.BZ2Decompressor() self._line_buffer = collections.deque([""]) self._current_line = "" self._current_offset = 0
def _initialize_decompressor(self): if self._compression_type == CompressionTypes.BZIP2: self._decompressor = bz2.BZ2Decompressor() elif self._compression_type == CompressionTypes.DEFLATE: self._decompressor = zlib.decompressobj() else: assert self._compression_type == CompressionTypes.GZIP self._decompressor = zlib.decompressobj(self._gzip_mask)
def get_content(self) -> Generator[str, None, None]: """Yields processed pieces of content""" # https://docs.python.org/3.6/library/bz2.html#bz2.BZ2Decompressor decompressor = bz2.BZ2Decompressor() with self.fetch() as content: for chunk in content: yield decompressor.decompress(chunk)
def bz2_stream(compressed, chunksize=100000): """ Stream lines from a chunk of compressed bz2 data """ decompressor = bz2.BZ2Decompressor() for i in range(0, len(compressed), chunksize): chunk = compressed[i:i + chunksize] decompressed = decompressor.decompress(chunk).decode() for line in decompressed.split('\n'): yield line + '\n'
def _decompress_arxiv(arxiv): inc_decompressor = bz2.BZ2Decompressor() logger.debug('Extracting archive {}'.format(arxiv)) output_arxiv_filepath = arxiv.rsplit('.bz2')[0] with open(arxiv, 'rb') as arxiv_byte_stream: with open(output_arxiv_filepath, 'wb') as out_stream: for data in iter(lambda: arxiv_byte_stream.read(100 * 1024), b''): out_stream.write(inc_decompressor.decompress(data))
def decompress(self, dat): dat = encode_something_to_bytes(dat) if dat.startswith(b'BZ'): try: return bz2.BZ2Decompressor().decompress(dat) except IOError: pass return dat
def _read_bzip(self, coder, input, level, num_coders): dec = bz2.BZ2Decompressor() return self._read_from_decompressor(coder, dec, input, level, num_coders, can_partial_decompress=False)