def startDecompressMessage(self): if self._isServer: if self._decompressor is None or self.c2s_no_context_takeover: self._decompressor = zlib.decompressobj(-self.c2s_max_window_bits) else: if self._decompressor is None or self.s2c_no_context_takeover: self._decompressor = zlib.decompressobj(-self.s2c_max_window_bits)
def parse_blob(self): """Unzip and parse the blob. Everything we get is big endian. Each block contains 16*16*16 nodes, a node is the ingame block size. """ dec_o = zlib.decompressobj() (self.param0, self.param1, self.param2) = struct.unpack("8192s4096s4096s", dec_o.decompress(self.blob[4:])) self.param0 = array.array("H", self.param0) self.param0.byteswap() #import pdb;pdb.set_trace() tail = dec_o.unused_data dec_o = zlib.decompressobj() #Must make new obj or .unused_data will get messed up. blah = dec_o.decompress(tail) #throw away metadata (static_version, static_count,) = struct.unpack(">BH", dec_o.unused_data[0:3]) ptr=3 if static_count: for i in range(static_count): (object_type, pos_x_nodes, pos_y_nodes, pos_z_nodes, data_size) = struct.unpack(">BiiiH", dec_o.unused_data[ptr:ptr+15]) ptr = ptr+15+data_size (self.timestamp,) = struct.unpack(">I", dec_o.unused_data[ptr:ptr+4]) if self.timestamp == 0xffffffff: #This is define as as unknown timestamp self.timestamp = None ptr=ptr+4 (name_id_mapping_version, num_name_id_mappings) = struct.unpack(">BH", dec_o.unused_data[ptr:ptr+3]) ptr=ptr+3 start=ptr self.id_to_name = {} for i in range(0, num_name_id_mappings): (node_id, name_len) = struct.unpack(">HH", dec_o.unused_data[start:start+4]) (name,) = struct.unpack(">{}s".format(name_len), dec_o.unused_data[start+4:start+4+name_len]) self.id_to_name[node_id] = name.decode('utf8') start=start+4+name_len
def format_body(message, body_fp): """ return (is_compressed, body) """ t_enc = message.get('Transfer-Encoding', '').strip().lower() c_enc = message.get('Content-Encoding', '').strip().lower() c_type = message.get('Content-Type', '').strip().lower() charset = 'latin1' m = RE_CHARSET.search(c_type) if m: charset = m.group(1) body = read_body(body_fp, t_enc == 'chunked') if c_enc in ('gzip', 'x-gzip', 'deflate'): try: if c_enc != 'deflate': buf = StringIO(body) read_gzip_header(buf) body = buf.read() do = zlib.decompressobj(-zlib.MAX_WBITS) else: do = zlib.decompressobj() decompressed = do.decompress(body) #print "<gzipped>\n" + decompressed return (True, decompressed) except: import traceback traceback.print_exc() else: return (False, body)
def decode_deflate(chunks, z=None): if z is None: z = zlib.decompressobj() retry = True else: retry = False for chunk in chunks: if hasattr(z, 'unconsumed_tail'): # zlib compressed = (z.unconsumed_tail + chunk) else: # brotli compressed = chunk try: decompressed = z.decompress(compressed) except zlib.error: if not retry: raise z = zlib.decompressobj(-zlib.MAX_WBITS) retry = False decompressed = z.decompress(compressed) if decompressed: yield decompressed yield z.flush()
def _parse_headers(self, data): idx = data.find(b("\r\n\r\n")) if idx < 0: # we don't have all headers return False # Split lines on \r\n keeping the \r\n on each line lines = [bytes_to_str(line) + "\r\n" for line in data[:idx].split(b("\r\n"))] # Parse headers into key/value pairs paying attention # to continuation lines. while len(lines): # Parse initial header name : value pair. curr = lines.pop(0) if curr.find(":") < 0: raise InvalidHeader("invalid line %s" % curr.strip()) name, value = curr.split(":", 1) name = name.rstrip(" \t").upper() if HEADER_RE.search(name): raise InvalidHeader("invalid header name %s" % name) name, value = name.strip(), [value.lstrip()] # Consume value continuation lines while len(lines) and lines[0].startswith((" ", "\t")): value.append(lines.pop(0)) value = ''.join(value).rstrip() # store new header value self._headers.add_header(name, value) # update WSGI environ key = 'HTTP_%s' % name.upper().replace('-', '_') self._environ[key] = value # detect now if body is sent by chunks. clen = self._headers.get('content-length') te = self._headers.get('transfer-encoding', '').lower() if clen is not None: try: self._clen_rest = self._clen = int(clen) except ValueError: pass else: self._chunked = (te == 'chunked') if not self._chunked: self._clen_rest = MAXSIZE # detect encoding and set decompress object encoding = self._headers.get('content-encoding') if self.decompress: if encoding == "gzip": self.__decompress_obj = zlib.decompressobj(16+zlib.MAX_WBITS) elif encoding == "deflate": self.__decompress_obj = zlib.decompressobj() rest = data[idx+4:] self._buf = [rest] self.__on_headers_complete = True return len(rest)
def recoverFile(filename, output_file): output = open(output_file, "wb") decompressor = zlib.decompressobj() unused = "" for response in readFile(filename): if not response: break to_decompress = decompressor.unconsumed_tail + unused + response unused = "" while to_decompress: try: decompressed = decompressor.decompress(to_decompress) except: print "%s couldn't be decompressed" % filename return if decompressed: output.write(decompressed) to_decompress = decompressor.unconsumed_tail if decompressor.unused_data: unused = decompressor.unused_data remainder = decompressor.flush() output.write(remainder) decompressor = zlib.decompressobj() else: to_decompress = None remainder = decompressor.flush() if remainder: output.write(remainder)
def compute(self, split): f = open(self.path, 'rb', 4096 * 1024) last_line = '' if split.index == 0: zf = gzip.GzipFile(fileobj=f) zf._read_gzip_header() start = f.tell() else: start = self.find_block(f, split.index * self.splitSize) if start >= split.index * self.splitSize + self.splitSize: return for i in xrange(1, 100): if start - i * self.BLOCK_SIZE <= 4: break last_block = self.find_block(f, start - i * self.BLOCK_SIZE) if last_block < start: f.seek(last_block) d = f.read(start - last_block) dz = zlib.decompressobj(-zlib.MAX_WBITS) last_line = dz.decompress(d).split('\n')[-1] break end = self.find_block(f, split.index * self.splitSize + self.splitSize) f.seek(start) d = f.read(end - start) f.close() if not d: return dz = zlib.decompressobj(-zlib.MAX_WBITS) io = cStringIO.StringIO(dz.decompress(d)) yield last_line + io.readline() for line in io: if line.endswith('\n'): # drop last line yield line
def __next__(self): chunk = self.read() if not chunk: if self._decoder: chunk = self._decoder.flush() self._decoder = None return chunk else: raise StopIteration else: ce = self._content_encoding if ce in ('gzip', 'deflate'): if not self._decoder: import zlib if ce == 'gzip': self._decoder = zlib.decompressobj(16 + zlib.MAX_WBITS) else: self._decoder = zlib.decompressobj() try: return self._decoder.decompress(chunk) except zlib.error: self._decoder = zlib.decompressobj(-zlib.MAX_WBITS) try: return self._decoder.decompress(chunk) except (IOError, zlib.error) as e: raise ContentDecodingError(e) if ce: raise ContentDecodingError('Unknown encoding: %s' % ce) return chunk
def start_decompress_message(self): if self._is_server: if self._decompressor is None or self.client_no_context_takeover: self._decompressor = zlib.decompressobj(-self.client_max_window_bits) else: if self._decompressor is None or self.server_no_context_takeover: self._decompressor = zlib.decompressobj(-self.server_max_window_bits)
def _decode(self, body, encoding, max_length=0): if encoding == 'gzip' or encoding == 'x-gzip': body = gunzip(body, max_length) elif encoding == 'deflate': try: if max_length: dobj = zlib.decompressobj() body = dobj.decompress(body, max_length) if dobj.unconsumed_tail: raise DecompressSizeError( 'Response exceeded %s bytes' % max_length) else: body = zlib.decompress(body) except zlib.error: # ugly hack to work with raw deflate content that may # be sent by microsoft servers. For more information, see: # http://carsten.codimi.de/gzip.yaws/ # http://www.port80software.com/200ok/archive/2005/10/31/868.aspx # http://www.gzip.org/zlib/zlib_faq.html#faq38 if max_length: dobj = zlib.decompressobj(-15) body = dobj.decompress(body, max_length) if dobj.unconsumed_tail: raise DecompressSizeError( 'Response exceeded %s bytes' % max_length) else: body = zlib.decompress(body, -15) return body
def _initialize_decompressor(self): if self._compression_type == CompressionTypes.BZIP2: self._decompressor = bz2.BZ2Decompressor() elif self._compression_type == CompressionTypes.DEFLATE: self._decompressor = zlib.decompressobj() else: assert self._compression_type == CompressionTypes.GZIP self._decompressor = zlib.decompressobj(self._gzip_mask)
def test_header_auto_detect(self): """autodetect zlib and gzip header""" do = zlib.decompressobj(zlib.MAX_WBITS | 32) self.assertEqual(do.decompress(self.gzip_data), self.text) do = zlib.decompressobj(zlib.MAX_WBITS | 32) self.assertEqual(do.decompress(self.zlib_data), self.text) self.assertEqual(zlib.decompress(self.gzip_data, zlib.MAX_WBITS | 32), self.text) self.assertEqual(zlib.decompress(self.zlib_data, zlib.MAX_WBITS | 32), self.text)
def _read_headers(self, data): """ Read the headers of an HTTP response from the socket, and the response body as well, into a new HTTPResponse instance. Then call the request handler. """ do_close = False try: initial_line, data = data.split(CRLF, 1) try: try: http_version, status, status_text = initial_line.split(' ', 2) status = int(status) except ValueError: http_version, status = initial_line.split(' ') status = int(status) status_text = HTTP.get(status, '') except ValueError: raise BadRequest('Invalid HTTP status line %r.' % initial_line) # Parse the headers. headers = read_headers(data) # Construct an HTTPResponse object. self.current_response = response = HTTPResponse(self, self._requests[0], http_version, status, status_text, headers) # Do we have a Content-Encoding header? if 'Content-Encoding' in headers: encoding = headers['Content-Encoding'] if encoding == 'gzip': response._decompressor = zlib.decompressobj(16+zlib.MAX_WBITS) elif encoding == 'deflate': response._decompressor = zlib.decompressobj(-zlib.MAX_WBITS) # Do we have a Content-Length header? if 'Content-Length' in headers: self._stream.on_read = self._read_body self._stream.read_delimiter = int(headers['Content-Length']) elif 'Transfer-Encoding' in headers: if headers['Transfer-Encoding'] == 'chunked': self._stream.on_read = self._read_chunk_head self._stream.read_delimiter = CRLF else: raise BadRequest("Unsupported Transfer-Encoding: %s" % headers['Transfer-Encoding']) # Is this a HEAD request? If so, then handle the request NOW. if response.method == 'HEAD': self._on_response() except BadRequest, e: log.info('Bad response from %r: %s', self._server, e) do_close = True
def decompress(self, value): if not self.decompressobj: try: self.decompressobj = zlib.decompressobj() return self.decompressobj.decompress(value) except zlib.error: self.decompressobj = zlib.decompressobj(-zlib.MAX_WBITS) return self.decompressobj.decompress(value) return self.decompressobj.decompress(value)
def zlib_gzin(self, compress = False, data = None): """Return the compressed or decompressed object with Zlib, string or file data""" if not compress: try: if data: return zlib.decompressobj().decompress('x\x9c' + data) else: return zlib.decompressobj().decompress('x\x9c' + self.data) except Exception, e: return '[!] Error Zlib inflate decompress: %s.' % e
def decrypt_file(self, file_content, filename): # each log file is built from a header section and a content section, the two are divided by a |==| mark file_split_content = file_content.split("|==|\n") # get the header section content file_header_content = file_split_content[0] # get the log section content file_log_content = file_split_content[1] # if the file is not encrypted - the "key" value in the file header is '-1' file_encryption_key = file_header_content.find("key:") if file_encryption_key == -1: # uncompress the log content uncompressed_and_decrypted_file_content = zlib.decompressobj().decompress(file_log_content) # if the file is encrypted else: content_encrypted_sym_key = file_header_content.split("key:")[1].splitlines()[0] # we expect to have a 'keys' folder that will have the stored private keys if not os.path.exists(os.path.join(self.config_path, "keys")): self.logger.error("No encryption keys directory was found and file %s is encrypted", filename) raise Exception("No encryption keys directory was found") # get the public key id from the log file header public_key_id = file_header_content.split("publicKeyId:")[1].splitlines()[0] # get the public key directory in the filesystem - each time we upload a new key this id is incremented public_key_directory = os.path.join(os.path.join(self.config_path, "keys"), public_key_id) # if the key directory does not exists if not os.path.exists(public_key_directory): self.logger.error( "Failed to find a proper certificate for : %s who has the publicKeyId of %s", filename, public_key_id, ) raise Exception("Failed to find a proper certificate") # get the checksum checksum = file_header_content.split("checksum:")[1].splitlines()[0] # get the private key private_key = open(os.path.join(public_key_directory, "Private.key"), "r").read() try: rsa_private_key = M2Crypto.RSA.load_key_string(private_key) content_decrypted_sym_key = rsa_private_key.private_decrypt( base64.b64decode(bytearray(content_encrypted_sym_key)), M2Crypto.RSA.pkcs1_padding ) uncompressed_and_decrypted_file_content = zlib.decompressobj().decompress( AES.new(base64.b64decode(bytearray(content_decrypted_sym_key)), AES.MODE_CBC, 16 * "\x00").decrypt( file_log_content ) ) # we check the content validity by checking the checksum content_is_valid = self.validate_checksum(checksum, uncompressed_and_decrypted_file_content) if not content_is_valid: self.logger.error("Checksum verification failed for file %s", filename) raise Exception("Checksum verification failed") except Exception, e: self.logger.error( "Error while trying to decrypt the file %s", filename, e.message, traceback.format_exc() ) raise Exception("Error while trying to decrypt the file" + filename)
def _fetch_to_internal_buffer(self, num_bytes): """Fetch up to num_bytes into the internal buffer.""" if (not self._read_eof and self._read_position > 0 and (self._read_buffer.tell() - self._read_position) < num_bytes): # There aren't enough number of bytes to accommodate a read, so we # prepare for a possibly large read by clearing up all internal buffers # but without dropping any previous held data. self._read_buffer.seek(self._read_position) data = self._read_buffer.read() self._clear_read_buffer() self._read_buffer.write(data) while not self._read_eof and (self._read_buffer.tell() - self._read_position ) < num_bytes: # Continue reading from the underlying file object until enough bytes are # available, or EOF is reached. buf = self._file.read(self._read_size) if buf: decompressed = self._decompressor.decompress(buf) del buf # Free up some possibly large and no-longer-needed memory. self._read_buffer.write(decompressed) else: # EOF of current stream reached. # # Any uncompressed data at the end of the stream of a gzip or bzip2 # file that is not corrupted points to a concatenated compressed # file. We read concatenated files by recursively creating decompressor # objects for the unused compressed data. if (self._compression_type == CompressionTypes.BZIP2 or self._compression_type == CompressionTypes.DEFLATE or self._compression_type == CompressionTypes.GZIP): if self._decompressor.unused_data != b'': buf = self._decompressor.unused_data if self._compression_type == CompressionTypes.BZIP2: self._decompressor = bz2.BZ2Decompressor() elif self._compression_type == CompressionTypes.DEFLATE: self._decompressor = zlib.decompressobj() else: self._decompressor = zlib.decompressobj(self._gzip_mask) decompressed = self._decompressor.decompress(buf) self._read_buffer.write(decompressed) continue else: # Deflate, Gzip and bzip2 formats do not require flushing # remaining data in the decompressor into the read buffer when # fully decompressing files. self._read_buffer.write(self._decompressor.flush()) # Record that we have hit the end of file, so we won't unnecessarily # repeat the completeness verification step above. self._read_eof = True
def compute(self, split): f = self.open_file() last_line = '' if split.index == 0: zf = gzip.GzipFile(fileobj=f) zf._read_gzip_header() start = f.tell() else: start = self.find_block(f, split.index * self.splitSize) if start >= split.index * self.splitSize + self.splitSize: return for i in xrange(1, 100): if start - i * self.BLOCK_SIZE <= 4: break last_block = self.find_block(f, start - i * self.BLOCK_SIZE) if last_block < start: f.seek(last_block) d = f.read(start - last_block) dz = zlib.decompressobj(-zlib.MAX_WBITS) last_line = dz.decompress(d).split('\n')[-1] if last_line.endswith('\n'): last_line = '' break end = self.find_block(f, split.index * self.splitSize + self.splitSize) # TODO: speed up f.seek(start) if self.fileinfo: f.length = end dz = zlib.decompressobj(-zlib.MAX_WBITS) while start < end: d = f.read(min(64<<10, end-start)) start += len(d) if not d: break io = cStringIO.StringIO(dz.decompress(d)) last_line += io.readline() yield last_line last_line = '' ll = list(io) if not ll: continue last_line = ll.pop() for line in ll: yield line if last_line.endswith('\n'): yield last_line last_line = '' f.close()
def deflate_decoder(wbits=None): if wbits is None: obj = zlib.decompressobj() else: obj = zlib.decompressobj(wbits) def enc(data, final): ret = obj.decompress(data) if final: ret += obj.flush() return ret return enc
def get_title_html(url, type, stream=None, **kwds): if stream is None: request = urllib2.Request(url) for header in default_headers: request.add_header(*header) stream = get_opener().open( request, timeout=TIMEOUT_S) with closing(stream): charset = stream.info().getparam('charset') content_enc = stream.info().dict.get('content-encoding', 'identity') if content_enc == 'identity': data = stream.read(READ_BYTES_MAX) elif content_enc == 'gzip': raw_data = stream.read(READ_BYTES_MAX) data = zlib.decompressobj(16 + zlib.MAX_WBITS).decompress(raw_data) elif content_enc == 'deflate': raw_data = stream.read(READ_BYTES_MAX) try: data = zlib.decompressobj().decompress(raw_data) except zlib.error: data = zlib.decompressobj(-zlib.MAX_WBITS).decompress(raw_data) else: raise PageURLError( 'Unsupported content-encoding: "%s"' % content_enc) soup = BeautifulSoup(data, BS4_PARSER, from_encoding=charset) # The page title according to the <title> tag. title = soup.find('title') if title: title = ''.join(re.sub(r'\s+', ' ', s) for s in title.strings).strip() # The page title according to the <meta> tags. title_meta = soup.find('meta', attrs={'name': 'title'}) or \ soup.find('meta', attrs={'name': 'og:title'}) if title_meta: title_meta = title_meta.attrs.get('content') if not title and not title_meta: return elif title and (not title_meta or title_meta in title): title_str = 'Title: %s' % format_title(title) elif title_meta and (not title or title in title_meta): title_str = 'Title: %s' % format_title(title_meta) else: title_str = 'Title (meta): %s -- Title (primary): %s' % ( format_title(title_meta), format_title(title)) return { 'title': title_str }
def uncompress_chunks(compressed_chunks, use_gzip): """Uncompress a list of data compressed with gzip or deflate. Args: compressed_chunks: a list of compressed data use_gzip: if True, uncompress with gzip. Otherwise, use deflate. Returns: [uncompressed_chunk_1, uncompressed_chunk_2, ...] """ if use_gzip: decompress = zlib.decompressobj(16 + zlib.MAX_WBITS).decompress else: decompress = zlib.decompressobj(-zlib.MAX_WBITS).decompress return [decompress(c) for c in compressed_chunks]
def _eq_zstream(expected, produced): """Compares the zstreams. Their decompressed bytes are compared. The compressed bytes differ, because of the different flushing used in the Python zlib and dictzip. """ import zlib deobj = zlib.decompressobj(-zlib.MAX_WBITS) expected_data = deobj.decompress(expected.read()) expected.seek(-len(deobj.unused_data), os.SEEK_CUR) deobj = zlib.decompressobj(-zlib.MAX_WBITS) got = deobj.decompress(produced.read()) produced.seek(-len(deobj.unused_data), os.SEEK_CUR) asserting.eq_bytes(expected_data, got)
def fetch_url(url, *, user_agent=USER_AGENT_PLAYER, cookie=None, fakeip=None): '''Fetch HTTP URL Arguments: url, user_agent, cookie Return value: (response_object, response_data) -> (http.client.HTTPResponse, bytes) ''' logging.debug('Fetch: %s' % url) req_headers = {'User-Agent': user_agent, 'Accept-Encoding': 'gzip, deflate'} if cookie: req_headers['Cookie'] = cookie if fakeip: req_headers['X-Forwarded-For'] = fakeip req_headers['Client-IP'] = fakeip req = urllib.request.Request(url=url, headers=req_headers) response = urllib.request.urlopen(req, timeout=120) content_encoding = response.getheader('Content-Encoding') if content_encoding == 'gzip': data = gzip.GzipFile(fileobj=response).read() elif content_encoding == 'deflate': decompressobj = zlib.decompressobj(-zlib.MAX_WBITS) data = decompressobj.decompress(response.read())+decompressobj.flush() else: data = response.read() return response, data
def post_init(self): import zlib import re load_dir = self.image_dir self.train_tar = os.path.join(load_dir, 'ILSVRC2012_img_train.tar') self.val_tar = os.path.join(load_dir, 'ILSVRC2012_img_val.tar') self.devkit = os.path.join(load_dir, 'ILSVRC2012_devkit_t12.tar.gz') for infile in (self.train_tar, self.val_tar, self.devkit): if not os.path.exists(infile): raise IOError(infile + " not found. Please ensure you have ImageNet downloaded." "More info here: http://www.image-net.org/download-imageurls") with tarfile.open(self.devkit, "r:gz") as tf: synsetfile = 'ILSVRC2012_devkit_t12/data/meta.mat' valfile = 'ILSVRC2012_devkit_t12/data/ILSVRC2012_validation_ground_truth.txt' # get the synset mapping by hacking around matlab's terrible compressed format meta_buff = tf.extractfile(synsetfile).read() decomp = zlib.decompressobj() self.synsets = re.findall(re.compile('n\d+'), decomp.decompress(meta_buff[136:])) self.train_labels = {s: i for i, s in enumerate(self.synsets)} # get the ground truth validation labels and offset to zero self.val_labels = {"%08d" % (i + 1): int(x) - 1 for i, x in enumerate(tf.extractfile(valfile))} self.validation_pct = None self.train_nrec = 1281167 self.train_start = 0 self.val_nrec = 50000 self.val_start = -(-self.train_nrec // self.macro_size) self.pixel_mean = [104.41227722, 119.21331787, 126.80609131]
def expand_content(namespace, source): """Yields expanded data from source.""" # TODO(maruel): Add bzip2. # TODO(maruel): Remove '-gzip' since it's a misnomer. if namespace.endswith(('-deflate', '-gzip')): zlib_state = zlib.decompressobj() for i in source: data = zlib_state.decompress(i, gcs.CHUNK_SIZE) yield data del data while zlib_state.unconsumed_tail: data = zlib_state.decompress( zlib_state.unconsumed_tail, gcs.CHUNK_SIZE) yield data del data del i data = zlib_state.flush() yield data del data # Forcibly delete the state. del zlib_state else: # Returns the source as-is. for i in source: yield i del i
def __init__(self, conn): asyncore.dispatcher_with_send.__init__(self, conn) self.ssled = False self.secure_connection(certfile="server.passless.crt", keyfile="server.passless.key", server_side=True) self.consumed_ace = False self.data = "" self.binary_mode = False self.decompressor = zlib.decompressobj() self.compressor = zlib.compressobj() self.unzipped_input = "" self.unzipped_output_buffer = "" self.output_buffer = "" self.speech = dict() self.pong = 1 self.ping = 0 self.httpClient = AsyncOpenHttp(self.handle_google_data, self.handle_google_failure) self.gotGoogleAnswer = False self.googleData = None self.lastRequestId = None self.dictation = None self.dbConnection = db.getConnection() self.assistant = None self.sendLock = threading.Lock() self.current_running_plugin = None self.current_location = None self.plugin_lastAceId = None self.logger = logging.getLogger("logger")
def stream_decompress(iterator, mode='gzip'): """ Stream decodes an iterator over compressed data :param iterator: An iterator over compressed data :param mode: 'gzip' or 'deflate' :return: An iterator over decompressed data """ if mode not in ['gzip', 'deflate']: raise ValueError('stream_decompress mode must be gzip or deflate') zlib_mode = 16 + zlib.MAX_WBITS if mode == 'gzip' else -zlib.MAX_WBITS dec = zlib.decompressobj(zlib_mode) try: for chunk in iterator: rv = dec.decompress(chunk) if rv: yield rv except zlib.error: # If there was an error decompressing, just return the raw chunk yield chunk # Continue to return the rest of the raw data for chunk in iterator: yield chunk else: # Make sure everything has been returned from the decompression object buf = dec.decompress(bytes()) rv = buf + dec.flush() if rv: yield rv
def uncompress(self): # type: () -> IO[bytes] import zlib decomp = zlib.decompressobj(-13) # RFC 2440, pg 61. # This doubles the memory usage. stream = BytesIO(decomp.decompress(self.data)) return stream
def send_request(self, request): data = (request.format(),) payload = zlib.compress(rencode.dumps(data)) self.conn.sendall(payload) buf = b"" while True: data = self.conn.recv(1024) if not data: self.connected = False break buf += data dobj = zlib.decompressobj() try: message = rencode.loads(dobj.decompress(buf)) except (ValueError, zlib.error, struct.error): # Probably incomplete data, read more continue else: buf = dobj.unused_data yield message
def loadcompressed(self): if self.data[0:4] != 'cmpr': raise Exception("can't happen") self.compression=True self.infocollector.compression=True [compressedsize] = struct.unpack('<I', self.data[4:8]) [uncompressedsize] = struct.unpack('<I', self.data[8:12]) [blocksizessize] = struct.unpack('<I', self.data[12:16]) assert(self.data[20:24] == 'CPng') assert(struct.unpack('<H', self.data[24:26])[0] == 1) assert(struct.unpack('<H', self.data[26:28])[0] == 4) if (20 + compressedsize + blocksizessize + 1) & ~1 != self.rawsize: raise Exception('mismatched blocksizessize value (20 + %u + %u != %u)' % (compressedsize, blocksizessize, self.rawsize)) decomp = zlib.decompressobj() self.uncompresseddata = decomp.decompress(self.data[28:]) if len(decomp.unconsumed_tail): raise Exception('unconsumed tail in compressed data (%u bytes)' % len(decomp.unconsumed_tail)) if len(decomp.unused_data) != blocksizessize: raise Exception('mismatch in unused data after compressed data (%u != %u)' % (len(decomp.unused_data), blocksizessize)) if len(self.uncompresseddata) != uncompressedsize: raise Exception('mismatched compressed data size: expected %u got %u' % (uncompressedsize, len(self.uncompresseddata))) chunk = RiffChunk(infocollector=self.infocollector) blocksizesdata = zlib.decompress(self.data[28+compressedsize:]) blocksizes = [] for i in range(0, len(blocksizesdata), 4): blocksizes.append(struct.unpack('<I', blocksizesdata[i:i+4])[0]) offset = 0 self.contents = [] while offset < len(self.uncompresseddata): chunk = RiffChunk(infocollector=self.infocollector) chunk.parent = self chunk.load(self.uncompresseddata, offset, blocksizes) self.contents.append(chunk) offset += 8 + chunk.rawsize
def test_empty_flush(self): import zlib co = zlib.compressobj(zlib.Z_BEST_COMPRESSION) assert co.flush() # Returns a zlib header dco = zlib.decompressobj() assert dco.flush() == b""
def uncompress_dcx_content(content): """Decompress the file content from a .dcx file. Returns the uncompressed content. Raising ValueError if the header does not match the required format. """ master_offset = 0 master_offset = consume_byte(content, master_offset, b'D', 1) master_offset = consume_byte(content, master_offset, b'C', 1) master_offset = consume_byte(content, master_offset, b'X', 1) master_offset = consume_byte(content, master_offset, b'\x00', 1) (req_1, ) = struct.unpack_from("<I", content, offset=master_offset) master_offset += struct.calcsize("<I") (req_2, req_3, req_4, req_5) = struct.unpack_from(">IIII", content, offset=master_offset) master_offset += struct.calcsize(">IIII") if req_1 != 0x100: raise ValueError("Expected DCX header int 0x100, but received " + hex(req_1)) if req_2 != 0x18: raise ValueError("Expected DCX header int 0x18, but received " + hex(req_2)) if req_3 != 0x24: raise ValueError("Expected DCX header int 0x24, but received " + hex(req_3)) if req_4 != 0x24 and req_4 != 0x44: raise ValueError("Expected DCX header int 0x24|0x44, but received " + hex(req_4)) if req_5 != 0x2c and req_5 != 0x4c: raise ValueError("Expected DCX header int 0x24|0x4c, but received " + hex(req_5)) master_offset = consume_byte(content, master_offset, b'D', 1) master_offset = consume_byte(content, master_offset, b'C', 1) master_offset = consume_byte(content, master_offset, b'S', 1) master_offset = consume_byte(content, master_offset, b'\x00', 1) (uncomp_size, comp_size) = struct.unpack_from(">II", content, offset=master_offset) master_offset += struct.calcsize(">II") master_offset = consume_byte(content, master_offset, b'D', 1) master_offset = consume_byte(content, master_offset, b'C', 1) master_offset = consume_byte(content, master_offset, b'P', 1) master_offset = consume_byte(content, master_offset, b'\x00', 1) master_offset = consume_byte(content, master_offset, b'D', 1) master_offset = consume_byte(content, master_offset, b'F', 1) master_offset = consume_byte(content, master_offset, b'L', 1) master_offset = consume_byte(content, master_offset, b'T', 1) # Skip the portion of the header whose meaning is unknown. master_offset += 0x18 master_offset = consume_byte(content, master_offset, b'D', 1) master_offset = consume_byte(content, master_offset, b'C', 1) master_offset = consume_byte(content, master_offset, b'A', 1) master_offset = consume_byte(content, master_offset, b'\x00', 1) (comp_header_length, ) = struct.unpack_from(">I", content, offset=master_offset) master_offset += struct.calcsize(">I") master_offset = consume_byte(content, master_offset, b'0x78', 1) master_offset = consume_byte(content, master_offset, b'0xDA', 1) comp_size -= 2 # The previous two bytes are included in the compressed data, for some reason. decomp_obj = zlib.decompressobj(-15) return decomp_obj.decompress( content[master_offset:master_offset + comp_size], uncomp_size)
def _decompress(string): dcomp = zlib.decompressobj() dcomped = dcomp.decompress(string) dcomped += dcomp.flush() return dcomped
def _cacheChunk(self): newText = '' newLen = 0 procDataLen = 1 # While I've downloaded additional data and haven't added anything to # my cached text, keep trying while newLen == 0 and procDataLen > 0: data = None # If this is true, we likely have a partial block if self._dc and self._dc.unused_data: data = self._dc.unused_data else: data = self._filePtr.read(self._chunkSize) datalen = len(data) if data: self._dc = zlib.decompressobj( zlib.MAX_WBITS | 32) # autodetect gzip or zlib header if len(self._foq) == 0: # Append the file offset to the file offset queue self._foq.append(self._filePtr.tell() - len(data)) # If we're here and we have no data, we've likely hit the end of # the compressed file if data: newText = self._dc.decompress(data) dlMore = True while len(self._dc.unused_data) == 0 and dlMore: newDat = self._filePtr.read(self._chunkSize) dlMore = len(newDat) > 0 datalen += len(newDat) newText += self._dc.decompress(newDat) procDataLen = datalen - len(self._dc.unused_data) newLen = len(newText) self._boq.append(newLen) self._foq.append(self._foq[-1] + procDataLen) self._text += newText data = None #print "decompressing!, added", newLen, "bytes from", procDataLen, "data" elif self._dc: newText = self._dc.flush() newLen = len(newText) procDataLen = datalen self._boq.append(newLen) self._foq.append(self._foq[-1] + procDataLen) self._text += newText self._dc = None #print "No data to be had, flushed and got", newLen, "bytes from", procDataLen, "data" else: # break out with no data read! # also kill the _foq added previously self._foq.popleft() newText = "" newLen = 0 procDataLen = 0 # end while loop #print >> sys.stderr, "Cached chunk" #print >> sys.stderr, "FOQ:", self._foq #print >> sys.stderr, "BOQ:", self._boq # return the # of bytes decompressed return newLen
def inflate(data): decompress = zlib.decompressobj(-zlib.MAX_WBITS # see above ) inflated = decompress.decompress(data) inflated += decompress.flush() return inflated
def __init__(self): self._first_try = True self._data = b'' self._obj = zlib.decompressobj()
def __init__(self): self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) self._state = GzipDecoderState.FIRST_MEMBER
def pako_inflate_raw(data): decompress = zlib.decompressobj(-15) decompressed_data = decompress.decompress(data) decompressed_data += decompress.flush() return decompressed_data
def decompress(value): dco = zlib.decompressobj() return dco.decompress(value) + dco.flush()