def startDecompressMessage(self):
     if self._isServer:
         if self._decompressor is None or self.c2s_no_context_takeover:
             self._decompressor = zlib.decompressobj(-self.c2s_max_window_bits)
     else:
         if self._decompressor is None or self.s2c_no_context_takeover:
             self._decompressor = zlib.decompressobj(-self.s2c_max_window_bits)
 def parse_blob(self):
     """Unzip and parse the blob. Everything we get is big endian. Each block contains 16*16*16 nodes, a node is the ingame block size. """
     dec_o = zlib.decompressobj()
     (self.param0, self.param1, self.param2) = struct.unpack("8192s4096s4096s", dec_o.decompress(self.blob[4:]))
     self.param0 = array.array("H", self.param0)
     self.param0.byteswap()
     #import pdb;pdb.set_trace()
     tail = dec_o.unused_data
     dec_o = zlib.decompressobj() #Must make new obj or .unused_data will get messed up.
     blah = dec_o.decompress(tail) #throw away metadata
      
     (static_version, static_count,) = struct.unpack(">BH", dec_o.unused_data[0:3])
     ptr=3
     if static_count:
         for i in range(static_count):
             (object_type, pos_x_nodes, pos_y_nodes, pos_z_nodes, data_size) = struct.unpack(">BiiiH", dec_o.unused_data[ptr:ptr+15])
             ptr = ptr+15+data_size
     
     (self.timestamp,) = struct.unpack(">I", dec_o.unused_data[ptr:ptr+4])
     if self.timestamp == 0xffffffff: #This is define as as unknown timestamp
         self.timestamp = None
     ptr=ptr+4
     (name_id_mapping_version, num_name_id_mappings) = struct.unpack(">BH", dec_o.unused_data[ptr:ptr+3])
     ptr=ptr+3
     start=ptr
     self.id_to_name = {}
     for i in range(0, num_name_id_mappings):
         (node_id, name_len) = struct.unpack(">HH", dec_o.unused_data[start:start+4])
         (name,) = struct.unpack(">{}s".format(name_len), dec_o.unused_data[start+4:start+4+name_len])
         self.id_to_name[node_id] = name.decode('utf8')
         start=start+4+name_len
Beispiel #3
0
def format_body(message, body_fp):
    """ return (is_compressed, body) """

    t_enc = message.get('Transfer-Encoding', '').strip().lower()
    c_enc = message.get('Content-Encoding', '').strip().lower()
    c_type = message.get('Content-Type', '').strip().lower()
    charset = 'latin1'
    m = RE_CHARSET.search(c_type)
    if m:
        charset = m.group(1)

    body = read_body(body_fp, t_enc == 'chunked')
    if c_enc in ('gzip', 'x-gzip', 'deflate'):
        try:
            if c_enc != 'deflate':
                buf = StringIO(body)
                read_gzip_header(buf)
                body = buf.read()
                do = zlib.decompressobj(-zlib.MAX_WBITS)
            else:
                do = zlib.decompressobj()
            decompressed = do.decompress(body)
            #print "<gzipped>\n" + decompressed
            return (True, decompressed)
        except:
            import traceback
            traceback.print_exc()
    else:
        return (False, body)
def decode_deflate(chunks, z=None):

    if z is None:
        z = zlib.decompressobj()
        retry = True
    else:
        retry = False

    for chunk in chunks:
        if hasattr(z, 'unconsumed_tail'): # zlib
            compressed = (z.unconsumed_tail + chunk)
        else: # brotli
            compressed = chunk
        try:
            decompressed = z.decompress(compressed)
        except zlib.error:
            if not retry:
                raise
            z = zlib.decompressobj(-zlib.MAX_WBITS)
            retry = False
            decompressed = z.decompress(compressed)

        if decompressed:
            yield decompressed

    yield z.flush()
Beispiel #5
0
    def _parse_headers(self, data):
        idx = data.find(b("\r\n\r\n"))
        if idx < 0:  # we don't have all headers
            return False

        # Split lines on \r\n keeping the \r\n on each line
        lines = [bytes_to_str(line) + "\r\n"
                 for line in data[:idx].split(b("\r\n"))]

        # Parse headers into key/value pairs paying attention
        # to continuation lines.
        while len(lines):
            # Parse initial header name : value pair.
            curr = lines.pop(0)
            if curr.find(":") < 0:
                raise InvalidHeader("invalid line %s" % curr.strip())
            name, value = curr.split(":", 1)
            name = name.rstrip(" \t").upper()
            if HEADER_RE.search(name):
                raise InvalidHeader("invalid header name %s" % name)
            name, value = name.strip(), [value.lstrip()]

            # Consume value continuation lines
            while len(lines) and lines[0].startswith((" ", "\t")):
                value.append(lines.pop(0))
            value = ''.join(value).rstrip()

            # store new header value
            self._headers.add_header(name, value)

            # update WSGI environ
            key = 'HTTP_%s' % name.upper().replace('-', '_')
            self._environ[key] = value

        # detect now if body is sent by chunks.
        clen = self._headers.get('content-length')
        te = self._headers.get('transfer-encoding', '').lower()

        if clen is not None:
            try:
                self._clen_rest = self._clen = int(clen)
            except ValueError:
                pass
        else:
            self._chunked = (te == 'chunked')
            if not self._chunked:
                self._clen_rest = MAXSIZE

        # detect encoding and set decompress object
        encoding = self._headers.get('content-encoding')
        if self.decompress:
            if encoding == "gzip":
                self.__decompress_obj = zlib.decompressobj(16+zlib.MAX_WBITS)
            elif encoding == "deflate":
                self.__decompress_obj = zlib.decompressobj()

        rest = data[idx+4:]
        self._buf = [rest]
        self.__on_headers_complete = True
        return len(rest)
def recoverFile(filename, output_file):
    output = open(output_file, "wb")
    decompressor = zlib.decompressobj()
    unused = ""
    for response in readFile(filename):
        if not response:
            break
        to_decompress = decompressor.unconsumed_tail + unused + response
        unused = ""
        while to_decompress:
            try:
                decompressed = decompressor.decompress(to_decompress)
            except:
                print "%s couldn't be decompressed" % filename
                return
            if decompressed:
                output.write(decompressed)
                to_decompress = decompressor.unconsumed_tail
                if decompressor.unused_data:
                    unused = decompressor.unused_data
                    remainder = decompressor.flush()
                    output.write(remainder)
                    decompressor = zlib.decompressobj()
            else:
                to_decompress = None
    remainder = decompressor.flush()
    if remainder:
        output.write(remainder)
Beispiel #7
0
Datei: rdd.py Projekt: cute/dpark
    def compute(self, split):
        f = open(self.path, 'rb', 4096 * 1024)
        last_line = ''
        if split.index == 0:
            zf = gzip.GzipFile(fileobj=f)
            zf._read_gzip_header()
            start = f.tell()
        else:
            start = self.find_block(f, split.index * self.splitSize)
            if start >= split.index * self.splitSize + self.splitSize:
                return
            for i in xrange(1, 100):
                if start - i * self.BLOCK_SIZE <= 4:
                    break
                last_block = self.find_block(f, start - i * self.BLOCK_SIZE)
                if last_block < start:
                    f.seek(last_block)
                    d = f.read(start - last_block)
                    dz = zlib.decompressobj(-zlib.MAX_WBITS)
                    last_line = dz.decompress(d).split('\n')[-1]
                    break

        end = self.find_block(f, split.index * self.splitSize + self.splitSize)
        f.seek(start)
        d = f.read(end - start)
        f.close()
        if not d: return

        dz = zlib.decompressobj(-zlib.MAX_WBITS)
        io = cStringIO.StringIO(dz.decompress(d))
        yield last_line + io.readline()
        for line in io:
            if line.endswith('\n'): # drop last line
                yield line
Beispiel #8
0
    def __next__(self):
        chunk = self.read()
        if not chunk:
            if self._decoder:
                chunk = self._decoder.flush()
                self._decoder = None
                return chunk
            else:
                raise StopIteration
        else:
            ce = self._content_encoding
            if ce in ('gzip', 'deflate'):
                if not self._decoder:
                    import zlib
                    if ce == 'gzip':
                        self._decoder = zlib.decompressobj(16 + zlib.MAX_WBITS)
                    else:
                        self._decoder = zlib.decompressobj()
                        try:
                            return self._decoder.decompress(chunk)
                        except zlib.error:
                            self._decoder = zlib.decompressobj(-zlib.MAX_WBITS)
                try:
                    return self._decoder.decompress(chunk)
                except (IOError, zlib.error) as e:
                    raise ContentDecodingError(e)

            if ce:
                raise ContentDecodingError('Unknown encoding: %s' % ce)
            return chunk
 def start_decompress_message(self):
     if self._is_server:
         if self._decompressor is None or self.client_no_context_takeover:
             self._decompressor = zlib.decompressobj(-self.client_max_window_bits)
     else:
         if self._decompressor is None or self.server_no_context_takeover:
             self._decompressor = zlib.decompressobj(-self.server_max_window_bits)
Beispiel #10
0
 def _decode(self, body, encoding, max_length=0):
     if encoding == 'gzip' or encoding == 'x-gzip':
         body = gunzip(body, max_length)
     elif encoding == 'deflate':
         try:
             if max_length:
                 dobj = zlib.decompressobj()
                 body = dobj.decompress(body, max_length)
                 if dobj.unconsumed_tail:
                     raise DecompressSizeError(
                         'Response exceeded %s bytes' % max_length)
             else:
                 body = zlib.decompress(body)
         except zlib.error:
             # ugly hack to work with raw deflate content that may
             # be sent by microsoft servers. For more information, see:
             # http://carsten.codimi.de/gzip.yaws/
             # http://www.port80software.com/200ok/archive/2005/10/31/868.aspx
             # http://www.gzip.org/zlib/zlib_faq.html#faq38
             if max_length:
                 dobj = zlib.decompressobj(-15)
                 body = dobj.decompress(body, max_length)
                 if dobj.unconsumed_tail:
                     raise DecompressSizeError(
                         'Response exceeded %s bytes' % max_length)
             else:
                 body = zlib.decompress(body, -15)
     return body
Beispiel #11
0
 def _initialize_decompressor(self):
   if self._compression_type == CompressionTypes.BZIP2:
     self._decompressor = bz2.BZ2Decompressor()
   elif self._compression_type == CompressionTypes.DEFLATE:
     self._decompressor = zlib.decompressobj()
   else:
     assert self._compression_type == CompressionTypes.GZIP
     self._decompressor = zlib.decompressobj(self._gzip_mask)
Beispiel #12
0
 def test_header_auto_detect(self):
     """autodetect zlib and gzip header"""
     do = zlib.decompressobj(zlib.MAX_WBITS | 32)
     self.assertEqual(do.decompress(self.gzip_data), self.text)
     do = zlib.decompressobj(zlib.MAX_WBITS | 32)
     self.assertEqual(do.decompress(self.zlib_data), self.text)
     self.assertEqual(zlib.decompress(self.gzip_data, zlib.MAX_WBITS | 32), self.text)
     self.assertEqual(zlib.decompress(self.zlib_data, zlib.MAX_WBITS | 32), self.text)
Beispiel #13
0
    def _read_headers(self, data):
        """
        Read the headers of an HTTP response from the socket, and the response
        body as well, into a new HTTPResponse instance. Then call the request
        handler.
        """
        do_close = False

        try:
            initial_line, data = data.split(CRLF, 1)
            try:
                try:
                    http_version, status, status_text = initial_line.split(' ', 2)
                    status = int(status)
                except ValueError:
                    http_version, status = initial_line.split(' ')
                    status = int(status)
                    status_text = HTTP.get(status, '')
            except ValueError:
                raise BadRequest('Invalid HTTP status line %r.' % initial_line)

            # Parse the headers.
            headers = read_headers(data)

            # Construct an HTTPResponse object.
            self.current_response = response = HTTPResponse(self,
                self._requests[0], http_version, status, status_text, headers)

            # Do we have a Content-Encoding header?
            if 'Content-Encoding' in headers:
                encoding = headers['Content-Encoding']
                if encoding == 'gzip':
                    response._decompressor = zlib.decompressobj(16+zlib.MAX_WBITS)
                elif encoding == 'deflate':
                    response._decompressor = zlib.decompressobj(-zlib.MAX_WBITS)

            # Do we have a Content-Length header?
            if 'Content-Length' in headers:
                self._stream.on_read = self._read_body
                self._stream.read_delimiter = int(headers['Content-Length'])

            elif 'Transfer-Encoding' in headers:
                if headers['Transfer-Encoding'] == 'chunked':
                    self._stream.on_read = self._read_chunk_head
                    self._stream.read_delimiter = CRLF
                else:
                    raise BadRequest("Unsupported Transfer-Encoding: %s" % headers['Transfer-Encoding'])

            # Is this a HEAD request? If so, then handle the request NOW.
            if response.method == 'HEAD':
                self._on_response()

        except BadRequest, e:
            log.info('Bad response from %r: %s',
                self._server, e)
            do_close = True
Beispiel #14
0
    def decompress(self, value):
        if not self.decompressobj:
            try:
                self.decompressobj = zlib.decompressobj()
                return self.decompressobj.decompress(value)
            except zlib.error:
                self.decompressobj = zlib.decompressobj(-zlib.MAX_WBITS)
                return self.decompressobj.decompress(value)

        return self.decompressobj.decompress(value)
Beispiel #15
0
 def zlib_gzin(self, compress = False, data = None):
     """Return the compressed or decompressed object with Zlib, string or file data"""
     if not compress:
         try:
             if data:
                 return zlib.decompressobj().decompress('x\x9c' + data)
             else:
                 return zlib.decompressobj().decompress('x\x9c' + self.data)
         except Exception, e:
             return '[!] Error Zlib inflate decompress: %s.' % e
 def decrypt_file(self, file_content, filename):
     # each log file is built from a header section and a content section, the two are divided by a |==| mark
     file_split_content = file_content.split("|==|\n")
     # get the header section content
     file_header_content = file_split_content[0]
     # get the log section content
     file_log_content = file_split_content[1]
     # if the file is not encrypted - the "key" value in the file header is '-1'
     file_encryption_key = file_header_content.find("key:")
     if file_encryption_key == -1:
         # uncompress the log content
         uncompressed_and_decrypted_file_content = zlib.decompressobj().decompress(file_log_content)
     # if the file is encrypted
     else:
         content_encrypted_sym_key = file_header_content.split("key:")[1].splitlines()[0]
         # we expect to have a 'keys' folder that will have the stored private keys
         if not os.path.exists(os.path.join(self.config_path, "keys")):
             self.logger.error("No encryption keys directory was found and file %s is encrypted", filename)
             raise Exception("No encryption keys directory was found")
         # get the public key id from the log file header
         public_key_id = file_header_content.split("publicKeyId:")[1].splitlines()[0]
         # get the public key directory in the filesystem - each time we upload a new key this id is incremented
         public_key_directory = os.path.join(os.path.join(self.config_path, "keys"), public_key_id)
         # if the key directory does not exists
         if not os.path.exists(public_key_directory):
             self.logger.error(
                 "Failed to find a proper certificate for : %s who has the publicKeyId of %s",
                 filename,
                 public_key_id,
             )
             raise Exception("Failed to find a proper certificate")
         # get the checksum
         checksum = file_header_content.split("checksum:")[1].splitlines()[0]
         # get the private key
         private_key = open(os.path.join(public_key_directory, "Private.key"), "r").read()
         try:
             rsa_private_key = M2Crypto.RSA.load_key_string(private_key)
             content_decrypted_sym_key = rsa_private_key.private_decrypt(
                 base64.b64decode(bytearray(content_encrypted_sym_key)), M2Crypto.RSA.pkcs1_padding
             )
             uncompressed_and_decrypted_file_content = zlib.decompressobj().decompress(
                 AES.new(base64.b64decode(bytearray(content_decrypted_sym_key)), AES.MODE_CBC, 16 * "\x00").decrypt(
                     file_log_content
                 )
             )
             # we check the content validity by checking the checksum
             content_is_valid = self.validate_checksum(checksum, uncompressed_and_decrypted_file_content)
             if not content_is_valid:
                 self.logger.error("Checksum verification failed for file %s", filename)
                 raise Exception("Checksum verification failed")
         except Exception, e:
             self.logger.error(
                 "Error while trying to decrypt the file %s", filename, e.message, traceback.format_exc()
             )
             raise Exception("Error while trying to decrypt the file" + filename)
Beispiel #17
0
  def _fetch_to_internal_buffer(self, num_bytes):
    """Fetch up to num_bytes into the internal buffer."""
    if (not self._read_eof and self._read_position > 0 and
        (self._read_buffer.tell() - self._read_position) < num_bytes):
      # There aren't enough number of bytes to accommodate a read, so we
      # prepare for a possibly large read by clearing up all internal buffers
      # but without dropping any previous held data.
      self._read_buffer.seek(self._read_position)
      data = self._read_buffer.read()
      self._clear_read_buffer()
      self._read_buffer.write(data)

    while not self._read_eof and (self._read_buffer.tell() - self._read_position
                                 ) < num_bytes:
      # Continue reading from the underlying file object until enough bytes are
      # available, or EOF is reached.
      buf = self._file.read(self._read_size)
      if buf:
        decompressed = self._decompressor.decompress(buf)
        del buf  # Free up some possibly large and no-longer-needed memory.
        self._read_buffer.write(decompressed)
      else:
        # EOF of current stream reached.
        #
        # Any uncompressed data at the end of the stream of a gzip or bzip2
        # file that is not corrupted points to a concatenated compressed
        # file. We read concatenated files by recursively creating decompressor
        # objects for the unused compressed data.
        if (self._compression_type == CompressionTypes.BZIP2 or
            self._compression_type == CompressionTypes.DEFLATE or
            self._compression_type == CompressionTypes.GZIP):
          if self._decompressor.unused_data != b'':
            buf = self._decompressor.unused_data

            if self._compression_type == CompressionTypes.BZIP2:
              self._decompressor = bz2.BZ2Decompressor()
            elif self._compression_type == CompressionTypes.DEFLATE:
              self._decompressor = zlib.decompressobj()
            else:
              self._decompressor = zlib.decompressobj(self._gzip_mask)

            decompressed = self._decompressor.decompress(buf)
            self._read_buffer.write(decompressed)
            continue
        else:
          # Deflate, Gzip and bzip2 formats do not require flushing
          # remaining data in the decompressor into the read buffer when
          # fully decompressing files.
          self._read_buffer.write(self._decompressor.flush())

        # Record that we have hit the end of file, so we won't unnecessarily
        # repeat the completeness verification step above.
        self._read_eof = True
Beispiel #18
0
    def compute(self, split):
        f = self.open_file()
        last_line = ''
        if split.index == 0:
            zf = gzip.GzipFile(fileobj=f)
            zf._read_gzip_header()
            start = f.tell()
        else:
            start = self.find_block(f, split.index * self.splitSize)
            if start >= split.index * self.splitSize + self.splitSize:
                return
            for i in xrange(1, 100):
                if start - i * self.BLOCK_SIZE <= 4:
                    break
                last_block = self.find_block(f, start - i * self.BLOCK_SIZE)
                if last_block < start:
                    f.seek(last_block)
                    d = f.read(start - last_block)
                    dz = zlib.decompressobj(-zlib.MAX_WBITS)
                    last_line = dz.decompress(d).split('\n')[-1]
                    if last_line.endswith('\n'):
                        last_line = ''
                    break

        end = self.find_block(f, split.index * self.splitSize + self.splitSize)
        # TODO: speed up
        f.seek(start)
        if self.fileinfo:
            f.length = end
        dz = zlib.decompressobj(-zlib.MAX_WBITS)
        while start < end:
            d = f.read(min(64<<10, end-start))
            start += len(d)
            if not d: break

            io = cStringIO.StringIO(dz.decompress(d))
            
            last_line += io.readline()
            yield last_line
            last_line = ''

            ll = list(io)
            if not ll: continue

            last_line = ll.pop()
            for line in ll:
                yield line
            if last_line.endswith('\n'):
                yield last_line
                last_line = ''

        f.close()
Beispiel #19
0
def deflate_decoder(wbits=None):
    if wbits is None:
        obj = zlib.decompressobj()
    else:
        obj = zlib.decompressobj(wbits)

    def enc(data, final):
        ret = obj.decompress(data)
        if final:
            ret += obj.flush()
        return ret

    return enc
Beispiel #20
0
def get_title_html(url, type, stream=None, **kwds):
    if stream is None:
        request = urllib2.Request(url)
        for header in default_headers:
            request.add_header(*header)
        stream = get_opener().open(
            request, timeout=TIMEOUT_S)

    with closing(stream):
        charset = stream.info().getparam('charset')
        content_enc = stream.info().dict.get('content-encoding', 'identity')
        if content_enc == 'identity':
            data = stream.read(READ_BYTES_MAX)
        elif content_enc == 'gzip':
            raw_data = stream.read(READ_BYTES_MAX)
            data = zlib.decompressobj(16 + zlib.MAX_WBITS).decompress(raw_data)
        elif content_enc == 'deflate':
            raw_data = stream.read(READ_BYTES_MAX)
            try:
                data = zlib.decompressobj().decompress(raw_data)
            except zlib.error:
                data = zlib.decompressobj(-zlib.MAX_WBITS).decompress(raw_data)
        else:
            raise PageURLError(
                'Unsupported content-encoding: "%s"' % content_enc)

    soup = BeautifulSoup(data, BS4_PARSER, from_encoding=charset)

    # The page title according to the <title> tag.
    title = soup.find('title')
    if title:
        title = ''.join(re.sub(r'\s+', ' ', s) for s in title.strings).strip()

    # The page title according to the <meta> tags.
    title_meta = soup.find('meta', attrs={'name': 'title'}) or \
                 soup.find('meta', attrs={'name': 'og:title'})
    if title_meta:
        title_meta = title_meta.attrs.get('content')

    if not title and not title_meta:
        return
    elif title and (not title_meta or title_meta in title):
        title_str = 'Title: %s' % format_title(title)
    elif title_meta and (not title or title in title_meta):
        title_str = 'Title: %s' % format_title(title_meta)
    else:
        title_str = 'Title (meta): %s -- Title (primary): %s' % (
            format_title(title_meta), format_title(title))
    return { 'title': title_str }
Beispiel #21
0
def uncompress_chunks(compressed_chunks, use_gzip):
  """Uncompress a list of data compressed with gzip or deflate.

  Args:
    compressed_chunks: a list of compressed data
    use_gzip: if True, uncompress with gzip. Otherwise, use deflate.

  Returns:
    [uncompressed_chunk_1, uncompressed_chunk_2, ...]
  """
  if use_gzip:
    decompress = zlib.decompressobj(16 + zlib.MAX_WBITS).decompress
  else:
    decompress = zlib.decompressobj(-zlib.MAX_WBITS).decompress
  return [decompress(c) for c in compressed_chunks]
Beispiel #22
0
def _eq_zstream(expected, produced):
    """Compares the zstreams.
    Their decompressed bytes are compared.
    The compressed bytes differ, because of the different
    flushing used in the Python zlib and dictzip.
    """
    import zlib
    deobj = zlib.decompressobj(-zlib.MAX_WBITS)
    expected_data = deobj.decompress(expected.read())
    expected.seek(-len(deobj.unused_data), os.SEEK_CUR)

    deobj = zlib.decompressobj(-zlib.MAX_WBITS)
    got = deobj.decompress(produced.read())
    produced.seek(-len(deobj.unused_data), os.SEEK_CUR)
    asserting.eq_bytes(expected_data, got)
Beispiel #23
0
def fetch_url(url, *, user_agent=USER_AGENT_PLAYER, cookie=None, fakeip=None):
    '''Fetch HTTP URL

    Arguments: url, user_agent, cookie

    Return value: (response_object, response_data) -> (http.client.HTTPResponse, bytes)
    '''
    logging.debug('Fetch: %s' % url)
    req_headers = {'User-Agent': user_agent, 'Accept-Encoding': 'gzip, deflate'}
    if cookie:
        req_headers['Cookie'] = cookie
    if fakeip:
        req_headers['X-Forwarded-For'] = fakeip
        req_headers['Client-IP'] = fakeip
    req = urllib.request.Request(url=url, headers=req_headers)
    response = urllib.request.urlopen(req, timeout=120)
    content_encoding = response.getheader('Content-Encoding')
    if content_encoding == 'gzip':
        data = gzip.GzipFile(fileobj=response).read()
    elif content_encoding == 'deflate':
        decompressobj = zlib.decompressobj(-zlib.MAX_WBITS)
        data = decompressobj.decompress(response.read())+decompressobj.flush()
    else:
        data = response.read()
    return response, data
Beispiel #24
0
    def post_init(self):
        import zlib
        import re

        load_dir = self.image_dir
        self.train_tar = os.path.join(load_dir, 'ILSVRC2012_img_train.tar')
        self.val_tar = os.path.join(load_dir, 'ILSVRC2012_img_val.tar')
        self.devkit = os.path.join(load_dir, 'ILSVRC2012_devkit_t12.tar.gz')

        for infile in (self.train_tar, self.val_tar, self.devkit):
            if not os.path.exists(infile):
                raise IOError(infile + " not found. Please ensure you have ImageNet downloaded."
                              "More info here: http://www.image-net.org/download-imageurls")

        with tarfile.open(self.devkit, "r:gz") as tf:
            synsetfile = 'ILSVRC2012_devkit_t12/data/meta.mat'
            valfile = 'ILSVRC2012_devkit_t12/data/ILSVRC2012_validation_ground_truth.txt'

            # get the synset mapping by hacking around matlab's terrible compressed format
            meta_buff = tf.extractfile(synsetfile).read()
            decomp = zlib.decompressobj()
            self.synsets = re.findall(re.compile('n\d+'), decomp.decompress(meta_buff[136:]))
            self.train_labels = {s: i for i, s in enumerate(self.synsets)}

            # get the ground truth validation labels and offset to zero
            self.val_labels = {"%08d" % (i + 1): int(x) - 1 for i, x in
                               enumerate(tf.extractfile(valfile))}
        self.validation_pct = None

        self.train_nrec = 1281167
        self.train_start = 0

        self.val_nrec = 50000
        self.val_start = -(-self.train_nrec // self.macro_size)
        self.pixel_mean = [104.41227722, 119.21331787, 126.80609131]
Beispiel #25
0
def expand_content(namespace, source):
  """Yields expanded data from source."""
  # TODO(maruel): Add bzip2.
  # TODO(maruel): Remove '-gzip' since it's a misnomer.
  if namespace.endswith(('-deflate', '-gzip')):
    zlib_state = zlib.decompressobj()
    for i in source:
      data = zlib_state.decompress(i, gcs.CHUNK_SIZE)
      yield data
      del data
      while zlib_state.unconsumed_tail:
        data = zlib_state.decompress(
            zlib_state.unconsumed_tail, gcs.CHUNK_SIZE)
        yield data
        del data
      del i
    data = zlib_state.flush()
    yield data
    del data
    # Forcibly delete the state.
    del zlib_state
  else:
    # Returns the source as-is.
    for i in source:
      yield i
      del i
Beispiel #26
0
    def __init__(self, conn):
        asyncore.dispatcher_with_send.__init__(self, conn)
        
        self.ssled = False
        self.secure_connection(certfile="server.passless.crt", keyfile="server.passless.key", server_side=True)               

        self.consumed_ace = False
        self.data = ""
        self.binary_mode = False
        self.decompressor = zlib.decompressobj()
        self.compressor = zlib.compressobj()
        self.unzipped_input = ""
        self.unzipped_output_buffer = ""
        self.output_buffer = ""
        self.speech = dict()
        self.pong = 1
        self.ping = 0
        self.httpClient = AsyncOpenHttp(self.handle_google_data, self.handle_google_failure)
        self.gotGoogleAnswer = False
        self.googleData = None
        self.lastRequestId = None
        self.dictation = None
        self.dbConnection = db.getConnection()
        self.assistant = None
        self.sendLock = threading.Lock()
        self.current_running_plugin = None
        self.current_location = None
        self.plugin_lastAceId = None
        self.logger = logging.getLogger("logger")
Beispiel #27
0
def stream_decompress(iterator, mode='gzip'):
    """
    Stream decodes an iterator over compressed data

    :param iterator: An iterator over compressed data
    :param mode: 'gzip' or 'deflate'
    :return: An iterator over decompressed data
    """

    if mode not in ['gzip', 'deflate']:
        raise ValueError('stream_decompress mode must be gzip or deflate')

    zlib_mode = 16 + zlib.MAX_WBITS if mode == 'gzip' else -zlib.MAX_WBITS
    dec = zlib.decompressobj(zlib_mode)
    try:
        for chunk in iterator:
            rv = dec.decompress(chunk)
            if rv:
                yield rv
    except zlib.error:
        # If there was an error decompressing, just return the raw chunk
        yield chunk
        # Continue to return the rest of the raw data
        for chunk in iterator:
            yield chunk
    else:
        # Make sure everything has been returned from the decompression object
        buf = dec.decompress(bytes())
        rv = buf + dec.flush()
        if rv:
            yield rv
Beispiel #28
0
 def uncompress(self):
     # type: () -> IO[bytes]
     import zlib
     decomp = zlib.decompressobj(-13)    # RFC 2440, pg 61.
     # This doubles the memory usage.
     stream = BytesIO(decomp.decompress(self.data))
     return stream
Beispiel #29
0
    def send_request(self, request):
        data = (request.format(),)
        payload = zlib.compress(rencode.dumps(data))
        self.conn.sendall(payload)

        buf = b""

        while True:
            data = self.conn.recv(1024)

            if not data:
                self.connected = False
                break

            buf += data
            dobj = zlib.decompressobj()

            try:
                message = rencode.loads(dobj.decompress(buf))
            except (ValueError, zlib.error, struct.error):
                # Probably incomplete data, read more
                continue
            else:
                buf = dobj.unused_data

            yield message
Beispiel #30
0
	def loadcompressed(self):
		if self.data[0:4] != 'cmpr':
			raise Exception("can't happen")
		self.compression=True
		self.infocollector.compression=True
		[compressedsize] = struct.unpack('<I', self.data[4:8])
		[uncompressedsize] = struct.unpack('<I', self.data[8:12])
		[blocksizessize] = struct.unpack('<I', self.data[12:16])
		assert(self.data[20:24] == 'CPng')
		assert(struct.unpack('<H', self.data[24:26])[0] == 1)
		assert(struct.unpack('<H', self.data[26:28])[0] == 4)
		if (20 + compressedsize + blocksizessize + 1) & ~1 != self.rawsize:
			raise Exception('mismatched blocksizessize value (20 + %u + %u != %u)' % (compressedsize, blocksizessize, self.rawsize))
		decomp = zlib.decompressobj()
		self.uncompresseddata = decomp.decompress(self.data[28:])
		if len(decomp.unconsumed_tail):
			raise Exception('unconsumed tail in compressed data (%u bytes)' % len(decomp.unconsumed_tail))
		if len(decomp.unused_data) != blocksizessize:
			raise Exception('mismatch in unused data after compressed data (%u != %u)' % (len(decomp.unused_data), blocksizessize))
		if len(self.uncompresseddata) != uncompressedsize:
			raise Exception('mismatched compressed data size: expected %u got %u' % (uncompressedsize, len(self.uncompresseddata)))
		chunk = RiffChunk(infocollector=self.infocollector)
		blocksizesdata = zlib.decompress(self.data[28+compressedsize:])
		blocksizes = []
		for i in range(0, len(blocksizesdata), 4):
			blocksizes.append(struct.unpack('<I', blocksizesdata[i:i+4])[0])
		offset = 0
		self.contents = []
		while offset < len(self.uncompresseddata):
			chunk = RiffChunk(infocollector=self.infocollector)
			chunk.parent = self
			chunk.load(self.uncompresseddata, offset, blocksizes)
			self.contents.append(chunk)
			offset += 8 + chunk.rawsize
Beispiel #31
0
 def test_empty_flush(self):
     import zlib
     co = zlib.compressobj(zlib.Z_BEST_COMPRESSION)
     assert co.flush()  # Returns a zlib header
     dco = zlib.decompressobj()
     assert dco.flush() == b""
def uncompress_dcx_content(content):
    """Decompress the file content from a .dcx file. Returns the uncompressed
    content. Raising ValueError if the header does not match the required format.
    """
    master_offset = 0
    master_offset = consume_byte(content, master_offset, b'D', 1)
    master_offset = consume_byte(content, master_offset, b'C', 1)
    master_offset = consume_byte(content, master_offset, b'X', 1)
    master_offset = consume_byte(content, master_offset, b'\x00', 1)

    (req_1, ) = struct.unpack_from("<I", content, offset=master_offset)
    master_offset += struct.calcsize("<I")
    (req_2, req_3, req_4, req_5) = struct.unpack_from(">IIII",
                                                      content,
                                                      offset=master_offset)
    master_offset += struct.calcsize(">IIII")
    if req_1 != 0x100:
        raise ValueError("Expected DCX header int 0x100, but received " +
                         hex(req_1))
    if req_2 != 0x18:
        raise ValueError("Expected DCX header int 0x18, but received " +
                         hex(req_2))
    if req_3 != 0x24:
        raise ValueError("Expected DCX header int 0x24, but received " +
                         hex(req_3))
    if req_4 != 0x24 and req_4 != 0x44:
        raise ValueError("Expected DCX header int 0x24|0x44, but received " +
                         hex(req_4))
    if req_5 != 0x2c and req_5 != 0x4c:
        raise ValueError("Expected DCX header int 0x24|0x4c, but received " +
                         hex(req_5))

    master_offset = consume_byte(content, master_offset, b'D', 1)
    master_offset = consume_byte(content, master_offset, b'C', 1)
    master_offset = consume_byte(content, master_offset, b'S', 1)
    master_offset = consume_byte(content, master_offset, b'\x00', 1)

    (uncomp_size, comp_size) = struct.unpack_from(">II",
                                                  content,
                                                  offset=master_offset)
    master_offset += struct.calcsize(">II")

    master_offset = consume_byte(content, master_offset, b'D', 1)
    master_offset = consume_byte(content, master_offset, b'C', 1)
    master_offset = consume_byte(content, master_offset, b'P', 1)
    master_offset = consume_byte(content, master_offset, b'\x00', 1)
    master_offset = consume_byte(content, master_offset, b'D', 1)
    master_offset = consume_byte(content, master_offset, b'F', 1)
    master_offset = consume_byte(content, master_offset, b'L', 1)
    master_offset = consume_byte(content, master_offset, b'T', 1)

    # Skip the portion of the header whose meaning is unknown.
    master_offset += 0x18
    master_offset = consume_byte(content, master_offset, b'D', 1)
    master_offset = consume_byte(content, master_offset, b'C', 1)
    master_offset = consume_byte(content, master_offset, b'A', 1)
    master_offset = consume_byte(content, master_offset, b'\x00', 1)
    (comp_header_length, ) = struct.unpack_from(">I",
                                                content,
                                                offset=master_offset)
    master_offset += struct.calcsize(">I")

    master_offset = consume_byte(content, master_offset, b'0x78', 1)
    master_offset = consume_byte(content, master_offset, b'0xDA', 1)
    comp_size -= 2  # The previous two bytes are included in the compressed data, for some reason.

    decomp_obj = zlib.decompressobj(-15)
    return decomp_obj.decompress(
        content[master_offset:master_offset + comp_size], uncomp_size)
Beispiel #33
0
def _decompress(string):
    dcomp = zlib.decompressobj()
    dcomped = dcomp.decompress(string)
    dcomped += dcomp.flush()
    return dcomped
Beispiel #34
0
    def _cacheChunk(self):

        newText = ''
        newLen = 0
        procDataLen = 1

        # While I've downloaded additional data and haven't added anything to
        # my cached text, keep trying
        while newLen == 0 and procDataLen > 0:

            data = None

            # If this is true, we likely have a partial block
            if self._dc and self._dc.unused_data:
                data = self._dc.unused_data
            else:
                data = self._filePtr.read(self._chunkSize)

            datalen = len(data)
            if data:
                self._dc = zlib.decompressobj(
                    zlib.MAX_WBITS | 32)  # autodetect gzip or zlib header

            if len(self._foq) == 0:
                # Append the file offset to the file offset queue
                self._foq.append(self._filePtr.tell() - len(data))

            # If we're here and we have no data, we've likely hit the end of
            # the compressed file
            if data:
                newText = self._dc.decompress(data)

                dlMore = True
                while len(self._dc.unused_data) == 0 and dlMore:
                    newDat = self._filePtr.read(self._chunkSize)
                    dlMore = len(newDat) > 0
                    datalen += len(newDat)
                    newText += self._dc.decompress(newDat)

                procDataLen = datalen - len(self._dc.unused_data)
                newLen = len(newText)
                self._boq.append(newLen)
                self._foq.append(self._foq[-1] + procDataLen)
                self._text += newText
                data = None
                #print "decompressing!, added", newLen, "bytes from", procDataLen, "data"
            elif self._dc:
                newText = self._dc.flush()
                newLen = len(newText)
                procDataLen = datalen
                self._boq.append(newLen)
                self._foq.append(self._foq[-1] + procDataLen)
                self._text += newText
                self._dc = None
                #print "No data to be had, flushed and got", newLen, "bytes from", procDataLen, "data"
            else:
                # break out with no data read!
                # also kill the _foq added previously
                self._foq.popleft()
                newText = ""
                newLen = 0
                procDataLen = 0

        # end while loop

        #print >> sys.stderr, "Cached chunk"
        #print >> sys.stderr, "FOQ:", self._foq
        #print >> sys.stderr, "BOQ:", self._boq

        # return the # of bytes decompressed
        return newLen
def inflate(data):
    decompress = zlib.decompressobj(-zlib.MAX_WBITS  # see above
                                    )
    inflated = decompress.decompress(data)
    inflated += decompress.flush()
    return inflated
Beispiel #36
0
 def __init__(self):
     self._first_try = True
     self._data = b''
     self._obj = zlib.decompressobj()
Beispiel #37
0
 def __init__(self):
     self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
     self._state = GzipDecoderState.FIRST_MEMBER
Beispiel #38
0
def pako_inflate_raw(data):
    decompress = zlib.decompressobj(-15)
    decompressed_data = decompress.decompress(data)
    decompressed_data += decompress.flush()
    return decompressed_data
Beispiel #39
0
 def decompress(value):
     dco = zlib.decompressobj()
     return dco.decompress(value) + dco.flush()