Ejemplo n.º 1
0
Archivo: codec.py Proyecto: ciena/afkak
def snappy_decode(payload):
    if not has_snappy():
        raise NotImplementedError("Snappy codec is not available")

    if payload.startswith(_XERIAL_HEADER):
        # TODO ? Should become a fileobj ?
        view = memoryview(payload)
        out = []
        length = len(payload)

        cursor = 16
        while cursor < length:
            block_size = struct.unpack_from('!i', view, cursor)[0]
            # Skip the block size
            cursor += 4
            end = cursor + block_size
            # XXX snappy requires a bytes-like object but doesn't accept
            # a memoryview, so we must copy.
            out.append(snappy.decompress(view[cursor:end].tobytes()))
            cursor = end

        # See https://atleastfornow.net/blog/not-all-bytes/
        return b''.join(out)
    else:
        return snappy.decompress(payload)
Ejemplo n.º 2
0
def decode_snappy(buff):
    """Decode a buffer using Snappy

    If xerial is found to be in use, the buffer is decoded in a fashion
    compatible with the xerial snappy library.

    Adapted from kafka-python
    https://github.com/mumrah/kafka-python/pull/127/files
    """
    if snappy is None:
        raise ImportError("Please install python-snappy")
    if _detect_xerial_stream(buff):
        out = StringIO()
        body = buffer(buff[16:])
        length = len(body)
        cursor = 0
        while cursor < length:
            block_size = struct.unpack_from('!i', body[cursor:])[0]
            cursor += 4
            end = cursor + block_size
            out.write(snappy.decompress(body[cursor:end]))
            cursor = end
        out.seek(0)
        return out.read()
    else:
        return snappy.decompress(buff)
Ejemplo n.º 3
0
def snappy_unpack_blob(blob, sep=SEP):
    if len(blob) == 0: return None
    if blob[0] == 'S':
        return np.array(snappy.decompress(blob[1:]).split(sep))
    dt = lookup[blob[0]]
    arr = np.frombuffer(snappy.decompress(blob[1:]), dtype=dt)
    # hack since arrays arent writable from buffer and we need this for comp_het
    # phasing.
    if blob[0] == '?':
        arr.setflags(write=True)
    return arr
Ejemplo n.º 4
0
    def get_subvolume(self, box_zyx, scale=0):
        """
        Fetch a subvolume from the remote BrainMaps volume.
        
        Args:
            box: (start, stop) tuple, in ZYX order.
            scale: Which scale to fetch the subvolume from.
        
        Returns:
            volume (ndarray), where volume.shape = (stop - start)
        """
        box_zyx = np.asarray(box_zyx)
        corner_zyx = box_zyx[0]
        shape_zyx = box_zyx[1] - box_zyx[0]

        corner_xyz = corner_zyx[::-1]
        shape_xyz = shape_zyx[::-1]

        snappy_data = fetch_subvol_data(self.http, self.project, self.dataset,
                                        self.volume_id, corner_xyz, shape_xyz,
                                        scale, self.change_stack_id,
                                        self.use_gzip)

        volume_buffer = snappy.decompress(snappy_data)
        volume = np.frombuffer(volume_buffer,
                               dtype=self.dtype).reshape(shape_zyx)
        return volume
Ejemplo n.º 5
0
def _read_page(fo, page_header, column_metadata):
    """Internal function to read the data page from the given file-object
    and convert it to raw, uncompressed bytes (if necessary)."""
    bytes_from_file = fo.read(page_header.compressed_page_size)
    codec = column_metadata.codec
    if codec is not None and codec != CompressionCodec.UNCOMPRESSED:
        if column_metadata.codec == CompressionCodec.SNAPPY:
            raw_bytes = snappy.decompress(bytes_from_file)
        elif column_metadata.codec == CompressionCodec.GZIP:
            io_obj = StringIO.StringIO(bytes_from_file)
            with gzip.GzipFile(fileobj=io_obj, mode='rb') as f:
                raw_bytes = f.read()
        else:
            raise ParquetFormatException(
                "Unsupported Codec: {0}".format(codec))
    else:
        raw_bytes = bytes_from_file
    logger.debug(
        "Read page with compression type {0}. Bytes {1} -> {2}".format(
        _get_name(CompressionCodec, codec),
        page_header.compressed_page_size,
        page_header.uncompressed_page_size))
    assert len(raw_bytes) == page_header.uncompressed_page_size, \
        "found {0} raw bytes (expected {1})".format(
            len(raw_bytes),
            page_header.uncompressed_page_size)
    return raw_bytes
Ejemplo n.º 6
0
def load_experience_pairs(filename):
    with open(INTEGRATE_DIR + filename, 'r') as file_ref:
        json_str = snappy.decompress(file_ref.read())
    print 'finished decompressing ' + filename
    experiences = json.loads(json_str)
    # compress_screen_hex(experiences)
    return pair_experiences(experiences, filename)
Ejemplo n.º 7
0
    def _ReadChunkFromBevy(self, chunk_id, bevy, bevy_index, index_size):
        chunk_id_in_bevy = chunk_id % self.chunks_per_segment

        if index_size == 0:
            LOGGER.error("Index empty in %s: %s", self.urn, chunk_id)
            raise IOError("Index empty in %s: %s" % (self.urn, chunk_id))
        # The segment is not completely full.
        if chunk_id_in_bevy >= index_size:
            LOGGER.error("Bevy index too short in %s: %s", self.urn, chunk_id)
            raise IOError("Bevy index too short in %s: %s" %
                          (self.urn, chunk_id))

        (bevvy_offset, compressed_chunk_size) = bevy_index[chunk_id_in_bevy]

        bevy.Seek(bevvy_offset, 0)
        cbuffer = bevy.Read(compressed_chunk_size)

        if self.compression == lexicon.AFF4_IMAGE_COMPRESSION_STORED or compressed_chunk_size == self.chunk_size:
            return cbuffer

        elif self.compression == lexicon.AFF4_IMAGE_COMPRESSION_ZLIB:
            return zlib.decompress(cbuffer)

        elif self.compression == lexicon.AFF4_IMAGE_COMPRESSION_SNAPPY:
            return snappy.decompress(cbuffer)

        else:
            raise RuntimeError("Unable to process compression %s" %
                               self.compression)
Ejemplo n.º 8
0
 def _read_block_header(self):
     self.block_count = self.raw_decoder.read_long()
     if self.codec == "null":
         # Skip a long; we don't need to use the length.
         self.raw_decoder.skip_long()
         self._datum_decoder = self._raw_decoder
     elif self.codec == 'deflate':
         # Compressed data is stored as (length, data), which
         # corresponds to how the "bytes" type is encoded.
         data = self.raw_decoder.read_bytes()
         # -15 is the log of the window size; negative indicates
         # "raw" (no zlib headers) decompression.  See zlib.h.
         uncompressed = zlib.decompress(data, -15)
         self._datum_decoder = io.BinaryDecoder(StringIO(uncompressed))
     elif self.codec == 'snappy':
         # Compressed data includes a 4-byte CRC32 checksum
         length = self.raw_decoder.read_long()
         data = self.raw_decoder.read(length - 4)
         uncompressed = snappy.decompress(data)
         self._datum_decoder = io.BinaryDecoder(StringIO(uncompressed))
         self.raw_decoder.check_crc32(uncompressed)
     elif self.codec == 'zstandard':
         length = self.raw_decoder.read_long()
         data = self.raw_decoder.read(length)
         uncompressed = bytearray()
         dctx = zstd.ZstdDecompressor()
         with dctx.stream_reader(StringIO(data)) as reader:
             while True:
                 chunk = reader.read(16384)
                 if not chunk:
                     break
                 uncompressed.extend(chunk)
         self._datum_decoder = io.BinaryDecoder(StringIO(uncompressed))
     else:
         raise DataFileException("Unknown codec: %r" % self.codec)
Ejemplo n.º 9
0
    def get_subvolume(self, box_zyx, scale=0):
        """
        Fetch a subvolume from the remote BrainMaps volume.
        
        Args:
            box: (start, stop) tuple, in ZYX order.
            scale: Which scale to fetch the subvolume from.
        
        Returns:
            volume (ndarray), where volume.shape = (stop - start)
        """
        box_zyx = np.asarray(box_zyx)
        corner_zyx = box_zyx[0]
        shape_zyx = box_zyx[1] - box_zyx[0]
        
        corner_xyz = corner_zyx[::-1]
        shape_xyz = shape_zyx[::-1]
        
        snappy_data = fetch_subvol_data( self.http,
                                         self.project,
                                         self.dataset,
                                         self.volume_id,
                                         corner_xyz,
                                         shape_xyz,
                                         scale,
                                         self.change_stack_id,
                                         self.use_gzip )

        volume_buffer = snappy.decompress(snappy_data)
        volume = np.frombuffer(volume_buffer, dtype=self.dtype).reshape(shape_zyx)
        return volume
Ejemplo n.º 10
0
def demo_get(service, params, compress=False):
    url = build_url(service, params, compress)

    print("REQ:")
    print(url)

    ua = UAResponse()
    msg = MSG()

    print()

    print("RESP:")

    if compress:
        data = snappy.decompress(urlopen(url).read())
    else:
        data = urlopen(url).read()

    ua.ParseFromString(data)
    if ua.Err == 0:
        msg.ParseFromString(ua.Data)

        # TODO: handle msg here
        print(msg)
    else:
        print(ua)
Ejemplo n.º 11
0
def UnpackState(packed_state):
  """Convert a packed State binary string into a StateStruct object. If the
  input doesn't have the STATE_MARK_ZIP prefix, it is assumed to be an old-style
  compressed state object, and is directly decompressed.

  Args:
    packed_state - Binary string of the type produces by PackState.

  Returns:
    Populated StateStruct object.
  """
  if not packed_state:
    return None

  if ord(packed_state[0]) == STATE_MARK_ZIP:
    # Extract the meta-data Struct from the packed data.
    meta = StateMeta()
    meta.Deserialize(packed_state)

    # Extract the compressed State from the packed data.
    compressed_state = packed_state[meta.Size():]

    # Compute the checksum and make sure it matches the metadata.
    cksum = zlib.adler32(compressed_state)
    if cksum != meta.checksum:
      raise ValueError('Compressed State Checksum Error')

    # Return the decompressed State.
    return pickle.loads(zlib.decompress(compressed_state))

  elif ord(packed_state[0]) == STATE_MARK_SNAPPY:
    # Extract the meta-data Struct from the packed data.
    meta = StateMeta()
    meta.Deserialize(packed_state)

    # Extract the compressed State from the packed data.
    compressed_state = packed_state[meta.Size():]

    # Compute the checksum and make sure it matches the metadata.
    cksum = zlib.adler32(compressed_state)
    if cksum != meta.checksum:
      raise ValueError('Compressed State Checksum Error')

    # Return the decompressed State.
    return pickle.loads(snappy.decompress(compressed_state))

  elif ord(packed_state[0]) == STATE_MARK_LIGHT:
    # Extract the meta-data Struct from the packed data.
    meta = StateMeta()
    meta.Deserialize(packed_state)

    # Extract the State buffer from the packed data.
    state_buffer = packed_state[meta.Size():]

    # Return the decompressed State.
    return pickle.load(state_buffer)

  else:
    # Unsupported format.
    raise ValueError('Unrecognized State serialization format')
Ejemplo n.º 12
0
def u_slug(username, slug):
    user = current_user
    post = user.posts.filter_by(slug=slug).first()
    if post:
        _prev = user.posts.filter(Post.created_timestamp < post.created_timestamp).slice(0, 4)
        _next = Post.query.filter(User.username==user.username,Post.created_timestamp > post.created_timestamp).order_by(Post.created_timestamp).slice(0, 4)
        _prev_count = _prev.count()
        _next_count = _next.count()

        if _prev_count < 2:
            _next = _next.slice(0, 4 - _prev_count)
        elif _next_count < 2:
            _prev = _prev.slice(0, 4 - _next_count)
        else:
            _prev = _prev.slice(0, 2)
            _next = _next.slice(0, 2)

        if post.content:
            # Decrypt
            half_key = session[generate_hash(user.user_key_salt)]
            key = xor_keys(half_key, app.config['MASTER_KEY'])
            content = AES_decrypt(key, post.content)
            content = snappy.decompress(content)
            return render_template("post.html", content=content, user=user, post=post, next=_next, prev=_prev)
        return render_template("post.html", content='', user=user, post=post, next=_next, prev=_prev)
    abort(404)
Ejemplo n.º 13
0
    def _get_data(cls, stream, encoding='utf-8', compress_option=None):
        if isinstance(stream, six.text_type):
            data = stream.encode(encoding)
        elif isinstance(stream, six.binary_type):
            data = stream
        else:
            data = stream.read(
            )  # due to the restriction of protobuf api, just read the data all
            stream.close()  # directly close the stream
            if isinstance(data, six.text_type):
                data = data.encode(encoding)

        if compress_option is None or \
                compress_option.algorithm == CompressOption.CompressAlgorithm.ODPS_RAW:
            return data
        elif compress_option.algorithm == CompressOption.CompressAlgorithm.ODPS_ZLIB:
            return data  # because requests do the unzip automatically, thanks to them O.O
        elif compress_option.algorithm == CompressOption.CompressAlgorithm.ODPS_SNAPPY:
            try:
                import snappy
            except ImportError:
                raise errors.DependencyNotInstalledError(
                    'python-snappy library is required for snappy support')
            data = snappy.decompress(data)
            return data
        else:
            raise IOError('invalid compression option.')
Ejemplo n.º 14
0
def _read_page(file_obj, page_header, column_metadata):
    """Read the data page from the given file-object and convert it to raw, uncompressed bytes (if necessary)."""
    bytes_from_file = file_obj.read(page_header.compressed_page_size)
    codec = column_metadata.codec
    if codec is not None and codec != parquet_thrift.CompressionCodec.UNCOMPRESSED:
        if column_metadata.codec == parquet_thrift.CompressionCodec.SNAPPY:
            raw_bytes = snappy.decompress(bytes_from_file)
        elif column_metadata.codec == parquet_thrift.CompressionCodec.GZIP:
            io_obj = io.BytesIO(bytes_from_file)
            with gzip.GzipFile(fileobj=io_obj, mode='rb') as file_data:
                raw_bytes = file_data.read()
        else:
            raise ParquetFormatException(
                "Unsupported Codec: {0}".format(codec))
    else:
        raw_bytes = bytes_from_file

    if logger.isEnabledFor(logging.DEBUG):
        logger.debug(
            "Read page with compression type %s. Bytes %d -> %d",
            _get_name(parquet_thrift.CompressionCodec, codec),
            page_header.compressed_page_size,
            page_header.uncompressed_page_size)
    assert len(raw_bytes) == page_header.uncompressed_page_size, \
        "found {0} raw bytes (expected {1})".format(
            len(raw_bytes),
            page_header.uncompressed_page_size)
    return raw_bytes
Ejemplo n.º 15
0
    def _decompress_event(self, data: Union[BaseEvent, bytes]) -> BaseEvent:
        if isinstance(data, BaseEvent):
            return data
        else:
            import snappy

            return cast(BaseEvent, pickle.loads(snappy.decompress(data)))
Ejemplo n.º 16
0
    def _ReadChunkFromBevy(self, chunk_id, bevy, bevy_index, index_size):
        chunk_id_in_bevy = chunk_id % self.chunks_per_segment

        if index_size == 0:
            LOGGER.error("Index empty in %s: %s", self.urn, chunk_id)
            raise IOError("Index empty in %s: %s" % (self.urn, chunk_id))
        # The segment is not completely full.
        if chunk_id_in_bevy >= index_size:
            LOGGER.error("Bevy index too short in %s: %s", self.urn, chunk_id)
            raise IOError("Bevy index too short in %s: %s" %
                          (self.urn, chunk_id))

        # For the last chunk in the bevy, consume to the end of the bevy
        # segment.
        if chunk_id_in_bevy == index_size - 1:
            compressed_chunk_size = bevy.Size() - bevy.Tell()
        else:
            compressed_chunk_size = (bevy_index[chunk_id_in_bevy + 1] -
                                     bevy_index[chunk_id_in_bevy])

        bevy.Seek(bevy_index[chunk_id_in_bevy], 0)
        cbuffer = bevy.Read(compressed_chunk_size)
        if self.compression == lexicon.AFF4_IMAGE_COMPRESSION_ZLIB:
            return zlib.decompress(cbuffer)

        if snappy and self.compression == lexicon.AFF4_IMAGE_COMPRESSION_SNAPPY:
            return snappy.decompress(cbuffer)

        if self.compression == lexicon.AFF4_IMAGE_COMPRESSION_STORED:
            return cbuffer

        raise RuntimeError("Unable to process compression %s" %
                           self.compression)
Ejemplo n.º 17
0
    def _get_data(cls, stream, encoding='utf-8', compress_option=None):
        if isinstance(stream, six.text_type):
            data = stream.encode(encoding)
        elif isinstance(stream, six.binary_type):
            data = stream
        else:
            data = stream.read()  # due to the restriction of protobuf api, just read the data all
            stream.close()  # directly close the stream
            if isinstance(data, six.text_type):
                data = data.encode(encoding)

        if compress_option is None or \
                compress_option.algorithm == CompressOption.CompressAlgorithm.ODPS_RAW:
            return data
        elif compress_option.algorithm == CompressOption.CompressAlgorithm.ODPS_ZLIB:
            return data  # because requests do the unzip automatically, thanks to them O.O
        elif compress_option.algorithm == CompressOption.CompressAlgorithm.ODPS_SNAPPY:
            try:
                import snappy
            except ImportError:
                raise errors.DependencyNotInstalledError(
                    'python-snappy library is required for snappy support')
            data = snappy.decompress(data)
            return data
        else:
            raise IOError('invalid compression option.')
Ejemplo n.º 18
0
 def decode(cls, raw_msg: RawHeaderBody,
            deserializer: AbstractDeserializer) -> RPCMessage:
     header = munpackb(raw_msg.header)
     msgtype = RPCMessageTypes(header['type'])
     compressed = header['zip']
     raw_data = raw_msg.body
     if compressed:
         if not has_snappy:
             raise ConfigurationError('python-snappy is not installed')
         raw_data = snappy.decompress(raw_data)
     data = munpackb(raw_data)
     metadata = metadata_types[msgtype].decode(data['meta'])
     if msgtype in (RPCMessageTypes.FUNCTION, RPCMessageTypes.RESULT):
         body = deserializer(data['body'])
     else:
         body = data['body']
     return cls(
         raw_msg.peer_id,
         msgtype,
         header['meth'],
         header['okey'],
         header['seq'],
         metadata,
         body,
     )
Ejemplo n.º 19
0
    def _ReadChunkFromBevy(self, chunk_id, bevy, bevy_index, index_size):
        chunk_id_in_bevy = chunk_id % self.chunks_per_segment

        if index_size == 0:
            LOGGER.error("Index empty in %s: %s", self.urn, chunk_id)
            raise IOError("Index empty in %s: %s" % (self.urn, chunk_id))
        # The segment is not completely full.
        if chunk_id_in_bevy >= index_size:
            LOGGER.error("Bevy index too short in %s: %s",
                         self.urn, chunk_id)
            raise IOError("Bevy index too short in %s: %s" % (
                self.urn, chunk_id))

        # For the last chunk in the bevy, consume to the end of the bevy
        # segment.
        if chunk_id_in_bevy == index_size - 1:
            compressed_chunk_size = bevy.Size() - bevy.Tell()
        else:
            compressed_chunk_size = (bevy_index[chunk_id_in_bevy + 1] -
                                     bevy_index[chunk_id_in_bevy])

        bevy.Seek(bevy_index[chunk_id_in_bevy], 0)
        cbuffer = bevy.Read(compressed_chunk_size)
        if self.compression == lexicon.AFF4_IMAGE_COMPRESSION_ZLIB:
            return zlib.decompress(cbuffer)

        if snappy and self.compression == lexicon.AFF4_IMAGE_COMPRESSION_SNAPPY:
            return snappy.decompress(cbuffer)

        if self.compression == lexicon.AFF4_IMAGE_COMPRESSION_STORED:
            return cbuffer

        raise RuntimeError(
            "Unable to process compression %s" % self.compression)
Ejemplo n.º 20
0
def _read_page(fo, page_header, column_metadata):
    """Internal function to read the data page from the given file-object
    and convert it to raw, uncompressed bytes (if necessary)."""
    bytes_from_file = fo.read(page_header.compressed_page_size)
    codec = column_metadata.codec
    if codec is not None and codec != CompressionCodec.UNCOMPRESSED:
        if column_metadata.codec == CompressionCodec.SNAPPY:
            raw_bytes = snappy.decompress(bytes_from_file)
        elif column_metadata.codec == CompressionCodec.GZIP:
            io_obj = StringIO.StringIO(bytes_from_file)
            with gzip.GzipFile(fileobj=io_obj, mode='rb') as f:
                raw_bytes = f.read()
        else:
            raise ParquetFormatException(
                "Unsupported Codec: {0}".format(codec))
    else:
        raw_bytes = bytes_from_file
    logger.debug(
        "Read page with compression type {0}. Bytes {1} -> {2}".format(
            _get_name(CompressionCodec,
                      codec), page_header.compressed_page_size,
            page_header.uncompressed_page_size))
    assert len(raw_bytes) == page_header.uncompressed_page_size, \
        "found {0} raw bytes (expected {1})".format(
            len(raw_bytes),
            page_header.uncompressed_page_size)
    return raw_bytes
Ejemplo n.º 21
0
    def _decompress_bytes(data, codec):
        if codec == b'null':
            return data
        elif codec == b'deflate':
            # zlib.MAX_WBITS is the window size. '-' sign indicates that this is
            # raw data (without headers). See zlib and Avro documentations for more
            # details.
            return zlib.decompress(data, -zlib.MAX_WBITS)
        elif codec == b'snappy':
            # Snappy is an optional avro codec.
            # See Snappy and Avro documentation for more details.
            try:
                import snappy
            except ImportError:
                raise ValueError(
                    'python-snappy does not seem to be installed.')

            # Compressed data includes a 4-byte CRC32 checksum which we verify.
            # We take care to avoid extra copies of data while slicing large objects
            # by use of a memoryview.
            result = snappy.decompress(memoryview(data)[:-4])
            avroio.BinaryDecoder(io.BytesIO(data[-4:])).check_crc32(result)
            return result
        else:
            raise ValueError('Unknown codec: %r' % codec)
Ejemplo n.º 22
0
def _read_page(file_obj, page_header, column_metadata):
    """Read the data page from the given file-object and convert it to raw, uncompressed bytes (if necessary)."""
    bytes_from_file = file_obj.read(page_header.compressed_page_size)
    codec = column_metadata.codec
    if codec is not None and codec != parquet_thrift.CompressionCodec.UNCOMPRESSED:
        if column_metadata.codec == parquet_thrift.CompressionCodec.SNAPPY:
            raw_bytes = snappy.decompress(bytes_from_file)
        elif column_metadata.codec == parquet_thrift.CompressionCodec.GZIP:
            io_obj = io.BytesIO(bytes_from_file)
            with gzip.GzipFile(fileobj=io_obj, mode='rb') as file_data:
                raw_bytes = file_data.read()
        else:
            raise ParquetFormatException(
                "Unsupported Codec: {0}".format(codec))
    else:
        raw_bytes = bytes_from_file

    if logger.isEnabledFor(logging.DEBUG):
        logger.debug("Read page with compression type %s. Bytes %d -> %d",
                     _get_name(parquet_thrift.CompressionCodec,
                               codec), page_header.compressed_page_size,
                     page_header.uncompressed_page_size)
    assert len(raw_bytes) == page_header.uncompressed_page_size, \
        "found {0} raw bytes (expected {1})".format(
            len(raw_bytes),
            page_header.uncompressed_page_size)
    return raw_bytes
Ejemplo n.º 23
0
 def _read_block_header(self):
     self._block_count = self.raw_decoder.read_long()
     if self.codec == "null":
         # Skip a long; we don't need to use the length.
         self.raw_decoder.skip_long()
         self._datum_decoder = self._raw_decoder
     elif self.codec == 'deflate':
         # Compressed data is stored as (length, data), which
         # corresponds to how the "bytes" type is encoded.
         data = self.raw_decoder.read_bytes()
         # -15 is the log of the window size; negative indicates
         # "raw" (no zlib headers) decompression.  See zlib.h.
         uncompressed = zlib.decompress(data, -15)
         self._datum_decoder = avro_io.BinaryDecoder(
             io.BytesIO(uncompressed))
     elif self.codec == 'snappy':
         # Compressed data includes a 4-byte CRC32 checksum
         length = self.raw_decoder.read_long()
         data = self.raw_decoder.read(length - 4)
         uncompressed = snappy.decompress(data)
         self._datum_decoder = avro_io.BinaryDecoder(
             io.BytesIO(uncompressed))
         self.raw_decoder.check_crc32(uncompressed)
     else:
         raise DataFileException("Unknown codec: %r" % self.codec)
Ejemplo n.º 24
0
def process_sqlite(path):
    try:
        import snappy
    except ImportError:
        print("Please install python-snappy module.\n", file=sys.stderr)
        sys.exit(-1)
    try:
        import sqlite3
    except ImportError:
        print("Please install sqlite3 module.\n", file=sys.stderr)
        sys.exit(-1)

    conn = sqlite3.connect(path)
    cur = conn.cursor()
    data = cur.execute('SELECT * FROM object_data')
    fetched = data.fetchall()

    # uses undocumented nonstandard data format
    # probably can break in the future
    dataValue = snappy.decompress(fetched[0][4])

    key_hash = dataValue.split(b"keyHash")[1][9:53].decode()
    email = dataValue.split(b"email")[1][11:].split(b'\x00')[0].decode()
    iterations = int.from_bytes(dataValue.split(b"kdfIterations")[1][3:7],
                                byteorder="little")

    return [(email, key_hash, iterations)]
Ejemplo n.º 25
0
def decompress(x):
    # Luckily \x78\x9c is an invalid preamble for Snappy:
    # If the block was 120 bytes, the preamble would be \x78\x00.
    # The first byte cannot be \x78 in any other case.
    if x[0] == '\x78' and x[1] in ('\x9c', '\xda', '\x01'):
        return zlib.decompress(x)
    else:
        return snappy.decompress(x)
Ejemplo n.º 26
0
def decompress(x):
    # Luckily \x78\x9c is an invalid preamble for Snappy:
    # If the block was 120 bytes, the preamble would be \x78\x00.
    # The first byte cannot be \x78 in any other case.
    if x[0] == '\x78' and x[1] in ('\x9c', '\xda', '\x01'):
        return zlib.decompress(x)
    else:
        return snappy.decompress(x)
Ejemplo n.º 27
0
 def decompress(self, readers_decoder):
     # Compressed data includes a 4-byte CRC32 checksum
     length = readers_decoder.read_long()
     data = readers_decoder.read(length - 4)
     uncompressed = snappy.decompress(data)
     checksum = readers_decoder.read(4)
     self.check_crc32(uncompressed, checksum)
     return avro.io.BinaryDecoder(io.BytesIO(uncompressed))
Ejemplo n.º 28
0
 def untransform(self, buf):
     for trans_id in self.__read_transforms:
         if trans_id == TRANSFORM.ZLIB:
             buf = zlib.decompress(buf)
         elif trans_id == TRANSFORM.SNAPPY:
             buf = snappy.decompress(buf)
         if trans_id not in self.__write_transforms:
             self.__write_transforms.append(trans_id)
     return buf
Ejemplo n.º 29
0
 def decode_ins_ops(self, event):
     """Parses the data field of a MicroEventLog event and returns
     a sequence of instruction ops (micro ops, grouped by instruction)."""
     assert event.HasField('micro_event_log')
     io_class = self._io_for_arch()
     fp = StringIO.StringIO(snappy.decompress(event.micro_event_log.data))
     with contextlib.closing(fp):
         for ins_op in InstructionOpsDecoder(io_class(fp)).decode_stream():
             yield ins_op
Ejemplo n.º 30
0
 def untransform(self, buf):
     for trans_id in self.__read_transforms:
         if trans_id == self.ZLIB_TRANSFORM:
             buf = zlib.decompress(buf)
         elif trans_id == self.SNAPPY_TRANSFORM:
             buf = snappy.decompress(buf)
         if not trans_id in self.__write_transforms:
             self.__write_transforms.append(trans_id)
     return buf
Ejemplo n.º 31
0
    def get_decompressed(self):
        if self._decompressed != None:
            return self._decompressed

        if self._compressed != None:
            self._decompressed = snappy.decompress(self._compressed)
            return self._decompressed

        return None
Ejemplo n.º 32
0
    def decompress(self, compressed: bytes) -> bytes:
        """only one type for now"""

        raw = compressed

        if self._compression_type == CompressionType.SNAPPY:
            raw = decompress(compressed)

        return raw
Ejemplo n.º 33
0
 def decode_micro_ops(self, event):
     """Parses the data field of a MicroEventLog event and returns
     a sequence of micro ops. """
     assert event.HasField('micro_event_log')
     io_class = self._io_for_arch()
     fp = StringIO.StringIO(snappy.decompress(event.micro_event_log.data))
     with contextlib.closing(fp):
         for op in MicroOpDecoder(io_class(fp)).decode_stream():
             yield op
Ejemplo n.º 34
0
def memory_profile():

    import snappy

    data = bytearray(FILES[-1].read_bytes())
    out1 = bytes(cramjam.snappy.compress_raw(data))
    _out1 = bytes(cramjam.snappy.decompress_raw(out1))
    out2 = snappy.compress(data)
    _ou2 = snappy.decompress(out2)
Ejemplo n.º 35
0
def loads(classifier_ser):
    d = pickle.loads(snappy.decompress(classifier_ser))
    if d['classifier_name'] == 'plslinearsvmxval':
        def decision_function(x):
            for step_name, step in d['classifier'].steps[:-1]:
                x = step.transform(x)
            return d['classifier'].steps[-1][1].decision_function(x)
        d['classifier'].decision_function = decision_function
    return d['classifier']
Ejemplo n.º 36
0
    def __init__(self, filename, writable=False, like=None):
        try:
            mode = os.stat(filename).st_mode
            if not stat.S_ISREG(mode):
                raise ValueError(
                    "filename %s doesn't refer to a regular file" % filename)
            exists = True

            if writable:
                self.fp = open(filename, 'r+b')
            else:
                self.fp = open(filename, 'rb')
        except FileNotFoundError as e:
            exists = False
            if not writable:
                raise e

        self.filename = filename

        if exists:
            if like != None:
                raise ValueError(
                    "can't re-intilize existing pack like another")

            fp = self.fp

            magic = fp.read(4).decode('ascii')

            if magic != "P4cK":
                raise Exception("%s not a pack file" % filename)

            try:
                (self.frames_offset, hdr_len) = struct.unpack('II', fp.read(8))

                snappy_header = fp.read(hdr_len)
                header = snappy.decompress(snappy_header)

                (self.major, self.minor, part0_size, n_properties,
                 n_sections) = struct.unpack('5I', header[:20])

                if self.major != 1:
                    raise ValueError("Unsupported pack file version")

                for n in range(0, n_sections):
                    off = 20 + n * 64
                    section_name = header[off:off +
                                          64].decode('ascii').split('\0')[0]
                    self.section_names.append(section_name)

                self.properties = Pack._unpack_properties(
                    header[20 + n_sections * 64:], max_properties=n_properties)
                self._is_empty = False
            except struct.error as e:
                raise Exception('corrupt, truncated pack file')
        else:
            self._is_empty = True
Ejemplo n.º 37
0
    def _decode(self, data):
        """
        @return [val,owner="",schema="",expire=0,acl={}]
        """

        crcint = j.data.hash.crc32_string(data[:-4])
        crc = crcint.to_bytes(4, byteorder='big', signed=False)

        if not crc == data[-4:]:
            raise j.exceptions.Input(
                message="Invalid checksum (CRC), is this a valid object ?:%s" %
                data)

        #
        # parsing header
        #
        header = data[0]

        counter = 1
        owner = j.data.hash.bin2hex(data[counter:counter + 16]).decode()

        counter += 16

        if header & 0b1000000:
            # schema defined
            schema = j.data.hash.bin2hex(data[counter:counter + 16])
            counter += 16
        else:
            # no schema
            schema = ""

        if header & 0b0100000:
            # expire is set
            expire = int.from_bytes(data[counter:counter + 4],
                                    byteorder='big',
                                    signed=False)
            counter += 4
        else:
            expire = 0

        nrsecrets = int.from_bytes(data[counter:counter + 1],
                                   byteorder='big',
                                   signed=False)
        aclbin = data[counter:counter + 17 * nrsecrets + 1]
        counter += 17 * nrsecrets + 1

        acl = j.servers.kvs._aclUnserialze(aclbin)

        val = data[counter:-4]

        val = snappy.decompress(val)

        if header & 0b0010000:
            val = j.data.serializer.msgpack.loads(val)

        return (val, owner, schema, expire, acl)
Ejemplo n.º 38
0
 def decompress(self, data):
     if self._message_encoding == "gzip" or self._message_encoding == "deflate":
         import zlib
         return zlib.decompress(data)
     elif self._message_encoding == "snappy":
         import snappy
         return snappy.decompress(data)
     else:
         raise UnsupportedMessageEncodingError(
             "Unsupported compression: {}".format(self._message_encoding))
Ejemplo n.º 39
0
def snappy_read_block(stream, buffer):
    """Read a block of data with the 'snappy' codec."""
    block_len = read_long(stream)
    data = stream.read(block_len)
    # Trim off last 4 bytes which hold the CRC32
    decompressed = snappy.decompress(data[:-4])
    buffer.truncate(0)
    buffer.seek(0)
    buffer.write(decompressed)
    buffer.seek(0)
Ejemplo n.º 40
0
def _get(key, callback, args):
    r = cache_table.find_one({'_id': key})
    if not r:
        content = callback(*args)
        data = bson.binary.Binary(snappy.compress(content))
        cache_table.insert_one({'_id': key, 'data': data})
    else:
        data = r['data']
    content = snappy.decompress(data)
    return content
Ejemplo n.º 41
0
def _get(key, callback, args):
    r = cache_table.find_one({'_id': key})
    if not r:
        content = callback(*args)
        data = bson.binary.Binary(snappy.compress(content))
        cache_table.insert_one({'_id': key, 'data': data})
    else:
        data = r['data']
    content = snappy.decompress(data)
    return content
Ejemplo n.º 42
0
def snappy_read_block(stream, buffer):
    """Read a block of data with the 'snappy' codec."""
    block_len = read_long(stream)
    data = stream.read(block_len)
    # Trim off last 4 bytes which hold the CRC32
    decompressed = snappy.decompress(data[:-4])
    buffer.truncate(0)
    buffer.seek(0)
    buffer.write(decompressed)
    buffer.seek(0)
def test_label(prob_dir, index, gt_label):
    datum = caffe_pb2.Datum()
    
    fn = '%010d' % index
    f = open(os.path.join(prob_dir, fn), 'rb')
    data = f.read()
    f.close()
    datum.ParseFromString(snappy.decompress(data))
    pred_lb = np.argmax(np.asarray(datum.float_data))
    return 1 if pred_lb == gt_label else 0
Ejemplo n.º 44
0
 def untransform(self, buf):
     for trans_id in self.__read_transforms:
         if trans_id == TRANSFORM.ZLIB:
             buf = zlib.decompress(buf)
         elif trans_id == TRANSFORM.SNAPPY:
             buf = snappy.decompress(buf)
         elif trans_id == TRANSFORM.ZSTD:
             buf = zstd.ZstdDecompressor().decompress(buf)
         if trans_id not in self.__write_transforms:
             self.__write_transforms.append(trans_id)
     return buf
Ejemplo n.º 45
0
 def read(self):
     header = safe_recv(self._sock, self.HEADER_LEN)
     if not header: return False
     length = struct.unpack(self.HEADER_STRUCT, header)[0]
     chunks = []
     while length:
         recv = safe_recv(self._sock, length)
         if not recv: return False
         chunks.append(recv)
         length -= len(recv)
     return snappy.decompress("".join(chunks))
Ejemplo n.º 46
0
 def decompress_payload(self, raw_payload: bytes) -> bytes:
     # Do the Snappy Decompression only if Snappy Compression is supported by the protocol
     if self.snappy_support:
         try:
             return snappy.decompress(raw_payload)
         except Exception as err:
             # log this just in case it's a library error of some kind on valid messages.
             self.logger.debug("Snappy decompression error on payload: %s", raw_payload.hex())
             raise MalformedMessage from err
     else:
         return raw_payload
Ejemplo n.º 47
0
def Decompress(Input):
	Output = Input + '.unsnappy'
	file_in = file(Input, "rb")
	c_data = file_in.read()

	file_out = file(Output, "wb")
	data = snappy.decompress(c_data)
	file_out.write(data)
	file_out.close()

	file_in.close()
Ejemplo n.º 48
0
 def get_question_title_desc(self, post_id: int) -> dict:
     """dict including title and excerpt fo a question by PostId"""
     try:
         data = json.loads(
             snappy.decompress(
                 self.safe_get(self.question_details_key(post_id))))
     except Exception:
         # we might not have a record for that post_id:
         # - post_id can be erroneous (from a mistyped link)
         # - post_id can reference an excluded question (no answer)
         data = [None, None]
     return {"title": data[0], "excerpt": data[1]}
Ejemplo n.º 49
0
def receive(sock):

    unpickler = pickle.Unpickler(sock)
    result = OrderedDict([])
    keylist = unpickler.load()

    for col in keylist:
        (length, ) = struct.unpack("!I", sock.read(4))
        data = snappy.decompress(sock.read(length)).decode('utf-8')
        result[col] = json.loads(data)

    return result
Ejemplo n.º 50
0
 def fget(self , inst):
     
     if hasattr(inst, self.name+'_array') :
         return getattr(inst, self.name+'_array')
     
     nprow = getattr(inst, 'NumpyArrayTable__'+self.name)
     
     
     #~ print 'fget',self.name,  nprow, inst.id
     
     
     if nprow is None or nprow.shape is None or nprow.dtype is None:
         return None
     
     if nprow.shape =='':
         shape = ()
     else:
         shape = tuple([ int(v) for v in  nprow.shape.split(',') ])
     
     dt = np.dtype(nprow.dtype)
     
     if nprow.compress == 'blosc':
         buf = blosc.decompress(nprow.blob)
     elif nprow.compress == 'zlib':
         buf = zlib.decompress(nprow.blob)
     elif nprow.compress == 'lz4':
         buf = lz4.decompress(nprow.blob)
     elif nprow.compress == 'snappy':
         buf = snappy.decompress(nprow.blob)        
     elif nprow.compress is None:
         buf = nprow.blob
         
         
     if np.prod(shape)==0:
         if len(buf) != 0:
             arr = np.frombuffer( buf , dtype = dt)
         else:
             arr= np.empty( shape, dtype = dt )
     else:
         arr = np.frombuffer( buf , dtype = dt)
         arr.flags.writeable = True
         arr = arr.reshape(shape)
     
     if self.arraytype == pq.Quantity:
         arr = pq.Quantity(arr, units = nprow.units, copy =False)
     
     # next access will be direct
     setattr(inst, self.name+'_array', arr)
     
     #~ delattr(inst, 'NumpyArrayTable__'+self.name)
     
     return arr
Ejemplo n.º 51
0
 def recv(self):
     header = self.__fileobj.read(self.HEADER_LEN)
     if len(header) < self.HEADER_LEN:
         return None
     length = struct.unpack(self.HEADER_STRUCT, header)[0]
     chunks = []
     while length:
         recv = self.__fileobj.read(length)
         if not recv:
             return None
         chunks.append(recv)
         length -= len(recv)
     return SerLib.loads(ZipLib.decompress("".join(chunks)))
Ejemplo n.º 52
0
def _get(url, callback, *args):
    key = get_sha1_key(url)
    r = cache_table.find_one({'_id': key})
    if not r:
        throttle.run()
        r = requests.get(url)
        content = callback(r, *args)
        data = bson.binary.Binary(snappy.compress(content))
        cache_table.insert_one({'_id': key, 'data': data})
    else:
        data = r['data']
    content = snappy.decompress(data)
    return content
Ejemplo n.º 53
0
    def decode_micro_events(self, msg):
        """Parses the data field of a MicroEventLog event and returns a sequence
        of MicroEvent messages."""
        if self.arch == ZTrace_pb2.ARCH_X86:
            read_ptr = read_uint32
        elif self.arch == ZTrace_pb2.ARCH_X86_64:
            read_ptr = read_uint64
        else:
            read_ptr = None  # TBD, die?

        if msg.HasField('micro_event_log'):
            fp = cStringIO.StringIO(snappy.decompress(msg.micro_event_log.data))
            curr_pc = 0
            ent = None
            while True:
                rec_type = fp.read(1)
                if not rec_type:
                    break
                else:
                    rec_type = ord(rec_type)

                if rec_type == OP_NEWPC:
                    pc = read_ptr(fp)
                    ent = MicroEvent(type=OP_NEWPC, pc=pc)
                    curr_pc = pc
                elif rec_type == OP_MEMREAD:
                    ea = read_ptr(fp)
                    size = read_uint32(fp)
                    ent = MicroEvent(type=OP_MEMREAD, pc=curr_pc, ea=ea, size=size)
                elif rec_type == OP_MEMWRITE:
                    ea = read_ptr(fp)
                    size = read_uint32(fp)
                    wdata = fp.read(size)
                    ent = MicroEvent(type=OP_MEMWRITE, pc=curr_pc, ea=ea, data=wdata, size=size)
                elif rec_type == OP_REGWRITE:
                    assert 0 # TBD
                elif rec_type == OP_XMMWRITE:
                    assert 0 # TBD
                elif rec_type == OP_CALL:
                    target = read_ptr(fp)
                    sp = read_ptr(fp)
                    is_direct_call = read_bool(fp)
                    ent = MicroEvent(type=OP_CALL, pc=curr_pc, target=target, sp=sp, is_direct_call=is_direct_call)
                elif rec_type == OP_RET:
                   sp = read_ptr(fp)
                   ent = MicroEvent(type=OP_RET, pc=curr_pc, sp=sp)
                else:
                    print "%x" % (ord(rec_type))
                    assert 0
        
                yield ent
Ejemplo n.º 54
0
def snappy_decode(payload):
    if not has_snappy():
        raise NotImplementedError("Snappy codec is not available")

    if _detect_xerial_stream(payload):
        # TODO ? Should become a fileobj ?
        out = BytesIO()
        byt = payload[16:]
        length = len(byt)
        cursor = 0

        while cursor < length:
            block_size = struct.unpack_from('!i', byt[cursor:])[0]
            # Skip the block size
            cursor += 4
            end = cursor + block_size
            out.write(snappy.decompress(byt[cursor:end]))
            cursor = end

        out.seek(0)
        return out.read()
    else:
        return snappy.decompress(payload)
Ejemplo n.º 55
0
def view(filename, skip=1):
    cv2.namedWindow('rgb')
    cv2.namedWindow('depth')
    cv2.moveWindow('rgb',640,0)
    cv2.moveWindow('depth',0,0)
    files = glob.glob('%s/*.snappy' % (filename,)) + glob.glob('%s/*.jpg' % (filename,))
    files = sorted(files, key=lambda f: os.path.basename(f)[2:])[::skip]
    for f in files:
        if f.endswith('.jpg'):
            cv2.imshow('rgb', cv2.imread(f))
            cv2.waitKey(25)
        if f.endswith('.snappy'):
            depth = np.fromstring(snappy.decompress(open(f).read()), dtype='uint16').reshape((480,640))
            cv2.imshow('depth', 1024./depth)
            cv2.waitKey(25)
Ejemplo n.º 56
0
    def read_message(self, message_type, compressed=False, read_size=True):
        """
        Read a protobuf message
        """
        if read_size:
            size = self.read_vint32()
            b = self.read(size)
        else:
            b = self.read()

        if compressed:
            b = snappy.decompress(b)

        m = message_type()
        m.ParseFromString(b)
        return m
    def __decompress(self, event):

        original_event=event
        try:
            event['data']=snappy.decompress(event['data'])
            self.logging.debug("Incoming data decompressed.")
            event['header']['snappy']=False
        except Exception as err:
            self.logging.warn("Decompressing failed. Reason: %s"%err)
            if self.purge == True:
                return
        try:
            self.queuepool.outbox.put(event)
        except QueueLocked:
            self.queuepool.inbox.rescue(original_event)
            self.queuepool.outbox.waitUntilPutAllowed()
Ejemplo n.º 58
0
def iter(rgb=True, depth=True, skip=1):
    # Load the image
    fns = []
    fns_ = glob.glob(current_path+'/host-*/*/*.snappy')+glob.glob(current_path+'/host-*/*/*.jpg')
    print len(fns_)
    for fn in fns_:
        host,_,fnbase = fn.split('/')[-3:]
        fngroups = list(FN_RE.search(fnbase).groups())
        cam = int(host[-1])-1
        fns.append((fn, cam, float(fngroups[0])))
    fns.sort(key=lambda x: x[2])
    fns = fns[::skip]
    for fn, cam, ts in fns:
        if (fn.endswith('.ppm') or fn.endswith('.jpg')) and rgb:
            yield ((cam, ts, cv2.imread(fn)),), ()
        elif fn.endswith('.snappy') and depth:
            d = np.fromstring(snappy.decompress(open(fn).read()), dtype=np.uint16).reshape((480, 640))
            yield (), ((cam, ts, d),)
Ejemplo n.º 59
0
    def _backend_get(self, keys):
        keys, is_single = tup(keys, ret_is_single=True)
        rows = self.cf.multiget(keys, columns=['value', 'compressed', 'format'])

        ret = {}

        for key, columns in rows.iteritems():
            value = columns['value']

            compressed = columns.get('compressed')

            if compressed == 'zlib':
                with g.stats.get_timer('permacache.deserialize.decompress_zlib'):
                    value = zlib.decompress(value)
            elif compressed == 'snappy':
                with g.stats.get_timer('permacache.deserialize.decompress_snappy'):
                    value = snappy.decompress(value)
            elif compressed:
                raise Exception("Unknown compression format %r(%r)"
                                 % (compression, format))

            format = columns.get('format') or 'pickle'

            if format == 'pickle':
                with g.stats.get_timer('permacache.deserialize.pickle'):
                    value = pickle.loads(value)
            elif format == 'json':
                with g.stats.get_timer('permacache.deserialize.json'):
                    value = json.loads(value)
            else:
                # we don't know how to deal with any other formats
                raise Exception("Unknown permacache serialization format %r"
                                % (format,))

            ret[key] = value

        if is_single:
            if ret:
                return ret.values()[0]
            else:
                return None
        else:
            return ret