def snappy_decode(payload): if not has_snappy(): raise NotImplementedError("Snappy codec is not available") if payload.startswith(_XERIAL_HEADER): # TODO ? Should become a fileobj ? view = memoryview(payload) out = [] length = len(payload) cursor = 16 while cursor < length: block_size = struct.unpack_from('!i', view, cursor)[0] # Skip the block size cursor += 4 end = cursor + block_size # XXX snappy requires a bytes-like object but doesn't accept # a memoryview, so we must copy. out.append(snappy.decompress(view[cursor:end].tobytes())) cursor = end # See https://atleastfornow.net/blog/not-all-bytes/ return b''.join(out) else: return snappy.decompress(payload)
def decode_snappy(buff): """Decode a buffer using Snappy If xerial is found to be in use, the buffer is decoded in a fashion compatible with the xerial snappy library. Adapted from kafka-python https://github.com/mumrah/kafka-python/pull/127/files """ if snappy is None: raise ImportError("Please install python-snappy") if _detect_xerial_stream(buff): out = StringIO() body = buffer(buff[16:]) length = len(body) cursor = 0 while cursor < length: block_size = struct.unpack_from('!i', body[cursor:])[0] cursor += 4 end = cursor + block_size out.write(snappy.decompress(body[cursor:end])) cursor = end out.seek(0) return out.read() else: return snappy.decompress(buff)
def snappy_unpack_blob(blob, sep=SEP): if len(blob) == 0: return None if blob[0] == 'S': return np.array(snappy.decompress(blob[1:]).split(sep)) dt = lookup[blob[0]] arr = np.frombuffer(snappy.decompress(blob[1:]), dtype=dt) # hack since arrays arent writable from buffer and we need this for comp_het # phasing. if blob[0] == '?': arr.setflags(write=True) return arr
def get_subvolume(self, box_zyx, scale=0): """ Fetch a subvolume from the remote BrainMaps volume. Args: box: (start, stop) tuple, in ZYX order. scale: Which scale to fetch the subvolume from. Returns: volume (ndarray), where volume.shape = (stop - start) """ box_zyx = np.asarray(box_zyx) corner_zyx = box_zyx[0] shape_zyx = box_zyx[1] - box_zyx[0] corner_xyz = corner_zyx[::-1] shape_xyz = shape_zyx[::-1] snappy_data = fetch_subvol_data(self.http, self.project, self.dataset, self.volume_id, corner_xyz, shape_xyz, scale, self.change_stack_id, self.use_gzip) volume_buffer = snappy.decompress(snappy_data) volume = np.frombuffer(volume_buffer, dtype=self.dtype).reshape(shape_zyx) return volume
def _read_page(fo, page_header, column_metadata): """Internal function to read the data page from the given file-object and convert it to raw, uncompressed bytes (if necessary).""" bytes_from_file = fo.read(page_header.compressed_page_size) codec = column_metadata.codec if codec is not None and codec != CompressionCodec.UNCOMPRESSED: if column_metadata.codec == CompressionCodec.SNAPPY: raw_bytes = snappy.decompress(bytes_from_file) elif column_metadata.codec == CompressionCodec.GZIP: io_obj = StringIO.StringIO(bytes_from_file) with gzip.GzipFile(fileobj=io_obj, mode='rb') as f: raw_bytes = f.read() else: raise ParquetFormatException( "Unsupported Codec: {0}".format(codec)) else: raw_bytes = bytes_from_file logger.debug( "Read page with compression type {0}. Bytes {1} -> {2}".format( _get_name(CompressionCodec, codec), page_header.compressed_page_size, page_header.uncompressed_page_size)) assert len(raw_bytes) == page_header.uncompressed_page_size, \ "found {0} raw bytes (expected {1})".format( len(raw_bytes), page_header.uncompressed_page_size) return raw_bytes
def load_experience_pairs(filename): with open(INTEGRATE_DIR + filename, 'r') as file_ref: json_str = snappy.decompress(file_ref.read()) print 'finished decompressing ' + filename experiences = json.loads(json_str) # compress_screen_hex(experiences) return pair_experiences(experiences, filename)
def _ReadChunkFromBevy(self, chunk_id, bevy, bevy_index, index_size): chunk_id_in_bevy = chunk_id % self.chunks_per_segment if index_size == 0: LOGGER.error("Index empty in %s: %s", self.urn, chunk_id) raise IOError("Index empty in %s: %s" % (self.urn, chunk_id)) # The segment is not completely full. if chunk_id_in_bevy >= index_size: LOGGER.error("Bevy index too short in %s: %s", self.urn, chunk_id) raise IOError("Bevy index too short in %s: %s" % (self.urn, chunk_id)) (bevvy_offset, compressed_chunk_size) = bevy_index[chunk_id_in_bevy] bevy.Seek(bevvy_offset, 0) cbuffer = bevy.Read(compressed_chunk_size) if self.compression == lexicon.AFF4_IMAGE_COMPRESSION_STORED or compressed_chunk_size == self.chunk_size: return cbuffer elif self.compression == lexicon.AFF4_IMAGE_COMPRESSION_ZLIB: return zlib.decompress(cbuffer) elif self.compression == lexicon.AFF4_IMAGE_COMPRESSION_SNAPPY: return snappy.decompress(cbuffer) else: raise RuntimeError("Unable to process compression %s" % self.compression)
def _read_block_header(self): self.block_count = self.raw_decoder.read_long() if self.codec == "null": # Skip a long; we don't need to use the length. self.raw_decoder.skip_long() self._datum_decoder = self._raw_decoder elif self.codec == 'deflate': # Compressed data is stored as (length, data), which # corresponds to how the "bytes" type is encoded. data = self.raw_decoder.read_bytes() # -15 is the log of the window size; negative indicates # "raw" (no zlib headers) decompression. See zlib.h. uncompressed = zlib.decompress(data, -15) self._datum_decoder = io.BinaryDecoder(StringIO(uncompressed)) elif self.codec == 'snappy': # Compressed data includes a 4-byte CRC32 checksum length = self.raw_decoder.read_long() data = self.raw_decoder.read(length - 4) uncompressed = snappy.decompress(data) self._datum_decoder = io.BinaryDecoder(StringIO(uncompressed)) self.raw_decoder.check_crc32(uncompressed) elif self.codec == 'zstandard': length = self.raw_decoder.read_long() data = self.raw_decoder.read(length) uncompressed = bytearray() dctx = zstd.ZstdDecompressor() with dctx.stream_reader(StringIO(data)) as reader: while True: chunk = reader.read(16384) if not chunk: break uncompressed.extend(chunk) self._datum_decoder = io.BinaryDecoder(StringIO(uncompressed)) else: raise DataFileException("Unknown codec: %r" % self.codec)
def get_subvolume(self, box_zyx, scale=0): """ Fetch a subvolume from the remote BrainMaps volume. Args: box: (start, stop) tuple, in ZYX order. scale: Which scale to fetch the subvolume from. Returns: volume (ndarray), where volume.shape = (stop - start) """ box_zyx = np.asarray(box_zyx) corner_zyx = box_zyx[0] shape_zyx = box_zyx[1] - box_zyx[0] corner_xyz = corner_zyx[::-1] shape_xyz = shape_zyx[::-1] snappy_data = fetch_subvol_data( self.http, self.project, self.dataset, self.volume_id, corner_xyz, shape_xyz, scale, self.change_stack_id, self.use_gzip ) volume_buffer = snappy.decompress(snappy_data) volume = np.frombuffer(volume_buffer, dtype=self.dtype).reshape(shape_zyx) return volume
def demo_get(service, params, compress=False): url = build_url(service, params, compress) print("REQ:") print(url) ua = UAResponse() msg = MSG() print() print("RESP:") if compress: data = snappy.decompress(urlopen(url).read()) else: data = urlopen(url).read() ua.ParseFromString(data) if ua.Err == 0: msg.ParseFromString(ua.Data) # TODO: handle msg here print(msg) else: print(ua)
def UnpackState(packed_state): """Convert a packed State binary string into a StateStruct object. If the input doesn't have the STATE_MARK_ZIP prefix, it is assumed to be an old-style compressed state object, and is directly decompressed. Args: packed_state - Binary string of the type produces by PackState. Returns: Populated StateStruct object. """ if not packed_state: return None if ord(packed_state[0]) == STATE_MARK_ZIP: # Extract the meta-data Struct from the packed data. meta = StateMeta() meta.Deserialize(packed_state) # Extract the compressed State from the packed data. compressed_state = packed_state[meta.Size():] # Compute the checksum and make sure it matches the metadata. cksum = zlib.adler32(compressed_state) if cksum != meta.checksum: raise ValueError('Compressed State Checksum Error') # Return the decompressed State. return pickle.loads(zlib.decompress(compressed_state)) elif ord(packed_state[0]) == STATE_MARK_SNAPPY: # Extract the meta-data Struct from the packed data. meta = StateMeta() meta.Deserialize(packed_state) # Extract the compressed State from the packed data. compressed_state = packed_state[meta.Size():] # Compute the checksum and make sure it matches the metadata. cksum = zlib.adler32(compressed_state) if cksum != meta.checksum: raise ValueError('Compressed State Checksum Error') # Return the decompressed State. return pickle.loads(snappy.decompress(compressed_state)) elif ord(packed_state[0]) == STATE_MARK_LIGHT: # Extract the meta-data Struct from the packed data. meta = StateMeta() meta.Deserialize(packed_state) # Extract the State buffer from the packed data. state_buffer = packed_state[meta.Size():] # Return the decompressed State. return pickle.load(state_buffer) else: # Unsupported format. raise ValueError('Unrecognized State serialization format')
def u_slug(username, slug): user = current_user post = user.posts.filter_by(slug=slug).first() if post: _prev = user.posts.filter(Post.created_timestamp < post.created_timestamp).slice(0, 4) _next = Post.query.filter(User.username==user.username,Post.created_timestamp > post.created_timestamp).order_by(Post.created_timestamp).slice(0, 4) _prev_count = _prev.count() _next_count = _next.count() if _prev_count < 2: _next = _next.slice(0, 4 - _prev_count) elif _next_count < 2: _prev = _prev.slice(0, 4 - _next_count) else: _prev = _prev.slice(0, 2) _next = _next.slice(0, 2) if post.content: # Decrypt half_key = session[generate_hash(user.user_key_salt)] key = xor_keys(half_key, app.config['MASTER_KEY']) content = AES_decrypt(key, post.content) content = snappy.decompress(content) return render_template("post.html", content=content, user=user, post=post, next=_next, prev=_prev) return render_template("post.html", content='', user=user, post=post, next=_next, prev=_prev) abort(404)
def _get_data(cls, stream, encoding='utf-8', compress_option=None): if isinstance(stream, six.text_type): data = stream.encode(encoding) elif isinstance(stream, six.binary_type): data = stream else: data = stream.read( ) # due to the restriction of protobuf api, just read the data all stream.close() # directly close the stream if isinstance(data, six.text_type): data = data.encode(encoding) if compress_option is None or \ compress_option.algorithm == CompressOption.CompressAlgorithm.ODPS_RAW: return data elif compress_option.algorithm == CompressOption.CompressAlgorithm.ODPS_ZLIB: return data # because requests do the unzip automatically, thanks to them O.O elif compress_option.algorithm == CompressOption.CompressAlgorithm.ODPS_SNAPPY: try: import snappy except ImportError: raise errors.DependencyNotInstalledError( 'python-snappy library is required for snappy support') data = snappy.decompress(data) return data else: raise IOError('invalid compression option.')
def _read_page(file_obj, page_header, column_metadata): """Read the data page from the given file-object and convert it to raw, uncompressed bytes (if necessary).""" bytes_from_file = file_obj.read(page_header.compressed_page_size) codec = column_metadata.codec if codec is not None and codec != parquet_thrift.CompressionCodec.UNCOMPRESSED: if column_metadata.codec == parquet_thrift.CompressionCodec.SNAPPY: raw_bytes = snappy.decompress(bytes_from_file) elif column_metadata.codec == parquet_thrift.CompressionCodec.GZIP: io_obj = io.BytesIO(bytes_from_file) with gzip.GzipFile(fileobj=io_obj, mode='rb') as file_data: raw_bytes = file_data.read() else: raise ParquetFormatException( "Unsupported Codec: {0}".format(codec)) else: raw_bytes = bytes_from_file if logger.isEnabledFor(logging.DEBUG): logger.debug( "Read page with compression type %s. Bytes %d -> %d", _get_name(parquet_thrift.CompressionCodec, codec), page_header.compressed_page_size, page_header.uncompressed_page_size) assert len(raw_bytes) == page_header.uncompressed_page_size, \ "found {0} raw bytes (expected {1})".format( len(raw_bytes), page_header.uncompressed_page_size) return raw_bytes
def _decompress_event(self, data: Union[BaseEvent, bytes]) -> BaseEvent: if isinstance(data, BaseEvent): return data else: import snappy return cast(BaseEvent, pickle.loads(snappy.decompress(data)))
def _ReadChunkFromBevy(self, chunk_id, bevy, bevy_index, index_size): chunk_id_in_bevy = chunk_id % self.chunks_per_segment if index_size == 0: LOGGER.error("Index empty in %s: %s", self.urn, chunk_id) raise IOError("Index empty in %s: %s" % (self.urn, chunk_id)) # The segment is not completely full. if chunk_id_in_bevy >= index_size: LOGGER.error("Bevy index too short in %s: %s", self.urn, chunk_id) raise IOError("Bevy index too short in %s: %s" % (self.urn, chunk_id)) # For the last chunk in the bevy, consume to the end of the bevy # segment. if chunk_id_in_bevy == index_size - 1: compressed_chunk_size = bevy.Size() - bevy.Tell() else: compressed_chunk_size = (bevy_index[chunk_id_in_bevy + 1] - bevy_index[chunk_id_in_bevy]) bevy.Seek(bevy_index[chunk_id_in_bevy], 0) cbuffer = bevy.Read(compressed_chunk_size) if self.compression == lexicon.AFF4_IMAGE_COMPRESSION_ZLIB: return zlib.decompress(cbuffer) if snappy and self.compression == lexicon.AFF4_IMAGE_COMPRESSION_SNAPPY: return snappy.decompress(cbuffer) if self.compression == lexicon.AFF4_IMAGE_COMPRESSION_STORED: return cbuffer raise RuntimeError("Unable to process compression %s" % self.compression)
def _get_data(cls, stream, encoding='utf-8', compress_option=None): if isinstance(stream, six.text_type): data = stream.encode(encoding) elif isinstance(stream, six.binary_type): data = stream else: data = stream.read() # due to the restriction of protobuf api, just read the data all stream.close() # directly close the stream if isinstance(data, six.text_type): data = data.encode(encoding) if compress_option is None or \ compress_option.algorithm == CompressOption.CompressAlgorithm.ODPS_RAW: return data elif compress_option.algorithm == CompressOption.CompressAlgorithm.ODPS_ZLIB: return data # because requests do the unzip automatically, thanks to them O.O elif compress_option.algorithm == CompressOption.CompressAlgorithm.ODPS_SNAPPY: try: import snappy except ImportError: raise errors.DependencyNotInstalledError( 'python-snappy library is required for snappy support') data = snappy.decompress(data) return data else: raise IOError('invalid compression option.')
def decode(cls, raw_msg: RawHeaderBody, deserializer: AbstractDeserializer) -> RPCMessage: header = munpackb(raw_msg.header) msgtype = RPCMessageTypes(header['type']) compressed = header['zip'] raw_data = raw_msg.body if compressed: if not has_snappy: raise ConfigurationError('python-snappy is not installed') raw_data = snappy.decompress(raw_data) data = munpackb(raw_data) metadata = metadata_types[msgtype].decode(data['meta']) if msgtype in (RPCMessageTypes.FUNCTION, RPCMessageTypes.RESULT): body = deserializer(data['body']) else: body = data['body'] return cls( raw_msg.peer_id, msgtype, header['meth'], header['okey'], header['seq'], metadata, body, )
def _ReadChunkFromBevy(self, chunk_id, bevy, bevy_index, index_size): chunk_id_in_bevy = chunk_id % self.chunks_per_segment if index_size == 0: LOGGER.error("Index empty in %s: %s", self.urn, chunk_id) raise IOError("Index empty in %s: %s" % (self.urn, chunk_id)) # The segment is not completely full. if chunk_id_in_bevy >= index_size: LOGGER.error("Bevy index too short in %s: %s", self.urn, chunk_id) raise IOError("Bevy index too short in %s: %s" % ( self.urn, chunk_id)) # For the last chunk in the bevy, consume to the end of the bevy # segment. if chunk_id_in_bevy == index_size - 1: compressed_chunk_size = bevy.Size() - bevy.Tell() else: compressed_chunk_size = (bevy_index[chunk_id_in_bevy + 1] - bevy_index[chunk_id_in_bevy]) bevy.Seek(bevy_index[chunk_id_in_bevy], 0) cbuffer = bevy.Read(compressed_chunk_size) if self.compression == lexicon.AFF4_IMAGE_COMPRESSION_ZLIB: return zlib.decompress(cbuffer) if snappy and self.compression == lexicon.AFF4_IMAGE_COMPRESSION_SNAPPY: return snappy.decompress(cbuffer) if self.compression == lexicon.AFF4_IMAGE_COMPRESSION_STORED: return cbuffer raise RuntimeError( "Unable to process compression %s" % self.compression)
def _decompress_bytes(data, codec): if codec == b'null': return data elif codec == b'deflate': # zlib.MAX_WBITS is the window size. '-' sign indicates that this is # raw data (without headers). See zlib and Avro documentations for more # details. return zlib.decompress(data, -zlib.MAX_WBITS) elif codec == b'snappy': # Snappy is an optional avro codec. # See Snappy and Avro documentation for more details. try: import snappy except ImportError: raise ValueError( 'python-snappy does not seem to be installed.') # Compressed data includes a 4-byte CRC32 checksum which we verify. # We take care to avoid extra copies of data while slicing large objects # by use of a memoryview. result = snappy.decompress(memoryview(data)[:-4]) avroio.BinaryDecoder(io.BytesIO(data[-4:])).check_crc32(result) return result else: raise ValueError('Unknown codec: %r' % codec)
def _read_page(file_obj, page_header, column_metadata): """Read the data page from the given file-object and convert it to raw, uncompressed bytes (if necessary).""" bytes_from_file = file_obj.read(page_header.compressed_page_size) codec = column_metadata.codec if codec is not None and codec != parquet_thrift.CompressionCodec.UNCOMPRESSED: if column_metadata.codec == parquet_thrift.CompressionCodec.SNAPPY: raw_bytes = snappy.decompress(bytes_from_file) elif column_metadata.codec == parquet_thrift.CompressionCodec.GZIP: io_obj = io.BytesIO(bytes_from_file) with gzip.GzipFile(fileobj=io_obj, mode='rb') as file_data: raw_bytes = file_data.read() else: raise ParquetFormatException( "Unsupported Codec: {0}".format(codec)) else: raw_bytes = bytes_from_file if logger.isEnabledFor(logging.DEBUG): logger.debug("Read page with compression type %s. Bytes %d -> %d", _get_name(parquet_thrift.CompressionCodec, codec), page_header.compressed_page_size, page_header.uncompressed_page_size) assert len(raw_bytes) == page_header.uncompressed_page_size, \ "found {0} raw bytes (expected {1})".format( len(raw_bytes), page_header.uncompressed_page_size) return raw_bytes
def _read_block_header(self): self._block_count = self.raw_decoder.read_long() if self.codec == "null": # Skip a long; we don't need to use the length. self.raw_decoder.skip_long() self._datum_decoder = self._raw_decoder elif self.codec == 'deflate': # Compressed data is stored as (length, data), which # corresponds to how the "bytes" type is encoded. data = self.raw_decoder.read_bytes() # -15 is the log of the window size; negative indicates # "raw" (no zlib headers) decompression. See zlib.h. uncompressed = zlib.decompress(data, -15) self._datum_decoder = avro_io.BinaryDecoder( io.BytesIO(uncompressed)) elif self.codec == 'snappy': # Compressed data includes a 4-byte CRC32 checksum length = self.raw_decoder.read_long() data = self.raw_decoder.read(length - 4) uncompressed = snappy.decompress(data) self._datum_decoder = avro_io.BinaryDecoder( io.BytesIO(uncompressed)) self.raw_decoder.check_crc32(uncompressed) else: raise DataFileException("Unknown codec: %r" % self.codec)
def process_sqlite(path): try: import snappy except ImportError: print("Please install python-snappy module.\n", file=sys.stderr) sys.exit(-1) try: import sqlite3 except ImportError: print("Please install sqlite3 module.\n", file=sys.stderr) sys.exit(-1) conn = sqlite3.connect(path) cur = conn.cursor() data = cur.execute('SELECT * FROM object_data') fetched = data.fetchall() # uses undocumented nonstandard data format # probably can break in the future dataValue = snappy.decompress(fetched[0][4]) key_hash = dataValue.split(b"keyHash")[1][9:53].decode() email = dataValue.split(b"email")[1][11:].split(b'\x00')[0].decode() iterations = int.from_bytes(dataValue.split(b"kdfIterations")[1][3:7], byteorder="little") return [(email, key_hash, iterations)]
def decompress(x): # Luckily \x78\x9c is an invalid preamble for Snappy: # If the block was 120 bytes, the preamble would be \x78\x00. # The first byte cannot be \x78 in any other case. if x[0] == '\x78' and x[1] in ('\x9c', '\xda', '\x01'): return zlib.decompress(x) else: return snappy.decompress(x)
def decompress(self, readers_decoder): # Compressed data includes a 4-byte CRC32 checksum length = readers_decoder.read_long() data = readers_decoder.read(length - 4) uncompressed = snappy.decompress(data) checksum = readers_decoder.read(4) self.check_crc32(uncompressed, checksum) return avro.io.BinaryDecoder(io.BytesIO(uncompressed))
def untransform(self, buf): for trans_id in self.__read_transforms: if trans_id == TRANSFORM.ZLIB: buf = zlib.decompress(buf) elif trans_id == TRANSFORM.SNAPPY: buf = snappy.decompress(buf) if trans_id not in self.__write_transforms: self.__write_transforms.append(trans_id) return buf
def decode_ins_ops(self, event): """Parses the data field of a MicroEventLog event and returns a sequence of instruction ops (micro ops, grouped by instruction).""" assert event.HasField('micro_event_log') io_class = self._io_for_arch() fp = StringIO.StringIO(snappy.decompress(event.micro_event_log.data)) with contextlib.closing(fp): for ins_op in InstructionOpsDecoder(io_class(fp)).decode_stream(): yield ins_op
def untransform(self, buf): for trans_id in self.__read_transforms: if trans_id == self.ZLIB_TRANSFORM: buf = zlib.decompress(buf) elif trans_id == self.SNAPPY_TRANSFORM: buf = snappy.decompress(buf) if not trans_id in self.__write_transforms: self.__write_transforms.append(trans_id) return buf
def get_decompressed(self): if self._decompressed != None: return self._decompressed if self._compressed != None: self._decompressed = snappy.decompress(self._compressed) return self._decompressed return None
def decompress(self, compressed: bytes) -> bytes: """only one type for now""" raw = compressed if self._compression_type == CompressionType.SNAPPY: raw = decompress(compressed) return raw
def decode_micro_ops(self, event): """Parses the data field of a MicroEventLog event and returns a sequence of micro ops. """ assert event.HasField('micro_event_log') io_class = self._io_for_arch() fp = StringIO.StringIO(snappy.decompress(event.micro_event_log.data)) with contextlib.closing(fp): for op in MicroOpDecoder(io_class(fp)).decode_stream(): yield op
def memory_profile(): import snappy data = bytearray(FILES[-1].read_bytes()) out1 = bytes(cramjam.snappy.compress_raw(data)) _out1 = bytes(cramjam.snappy.decompress_raw(out1)) out2 = snappy.compress(data) _ou2 = snappy.decompress(out2)
def loads(classifier_ser): d = pickle.loads(snappy.decompress(classifier_ser)) if d['classifier_name'] == 'plslinearsvmxval': def decision_function(x): for step_name, step in d['classifier'].steps[:-1]: x = step.transform(x) return d['classifier'].steps[-1][1].decision_function(x) d['classifier'].decision_function = decision_function return d['classifier']
def __init__(self, filename, writable=False, like=None): try: mode = os.stat(filename).st_mode if not stat.S_ISREG(mode): raise ValueError( "filename %s doesn't refer to a regular file" % filename) exists = True if writable: self.fp = open(filename, 'r+b') else: self.fp = open(filename, 'rb') except FileNotFoundError as e: exists = False if not writable: raise e self.filename = filename if exists: if like != None: raise ValueError( "can't re-intilize existing pack like another") fp = self.fp magic = fp.read(4).decode('ascii') if magic != "P4cK": raise Exception("%s not a pack file" % filename) try: (self.frames_offset, hdr_len) = struct.unpack('II', fp.read(8)) snappy_header = fp.read(hdr_len) header = snappy.decompress(snappy_header) (self.major, self.minor, part0_size, n_properties, n_sections) = struct.unpack('5I', header[:20]) if self.major != 1: raise ValueError("Unsupported pack file version") for n in range(0, n_sections): off = 20 + n * 64 section_name = header[off:off + 64].decode('ascii').split('\0')[0] self.section_names.append(section_name) self.properties = Pack._unpack_properties( header[20 + n_sections * 64:], max_properties=n_properties) self._is_empty = False except struct.error as e: raise Exception('corrupt, truncated pack file') else: self._is_empty = True
def _decode(self, data): """ @return [val,owner="",schema="",expire=0,acl={}] """ crcint = j.data.hash.crc32_string(data[:-4]) crc = crcint.to_bytes(4, byteorder='big', signed=False) if not crc == data[-4:]: raise j.exceptions.Input( message="Invalid checksum (CRC), is this a valid object ?:%s" % data) # # parsing header # header = data[0] counter = 1 owner = j.data.hash.bin2hex(data[counter:counter + 16]).decode() counter += 16 if header & 0b1000000: # schema defined schema = j.data.hash.bin2hex(data[counter:counter + 16]) counter += 16 else: # no schema schema = "" if header & 0b0100000: # expire is set expire = int.from_bytes(data[counter:counter + 4], byteorder='big', signed=False) counter += 4 else: expire = 0 nrsecrets = int.from_bytes(data[counter:counter + 1], byteorder='big', signed=False) aclbin = data[counter:counter + 17 * nrsecrets + 1] counter += 17 * nrsecrets + 1 acl = j.servers.kvs._aclUnserialze(aclbin) val = data[counter:-4] val = snappy.decompress(val) if header & 0b0010000: val = j.data.serializer.msgpack.loads(val) return (val, owner, schema, expire, acl)
def decompress(self, data): if self._message_encoding == "gzip" or self._message_encoding == "deflate": import zlib return zlib.decompress(data) elif self._message_encoding == "snappy": import snappy return snappy.decompress(data) else: raise UnsupportedMessageEncodingError( "Unsupported compression: {}".format(self._message_encoding))
def snappy_read_block(stream, buffer): """Read a block of data with the 'snappy' codec.""" block_len = read_long(stream) data = stream.read(block_len) # Trim off last 4 bytes which hold the CRC32 decompressed = snappy.decompress(data[:-4]) buffer.truncate(0) buffer.seek(0) buffer.write(decompressed) buffer.seek(0)
def _get(key, callback, args): r = cache_table.find_one({'_id': key}) if not r: content = callback(*args) data = bson.binary.Binary(snappy.compress(content)) cache_table.insert_one({'_id': key, 'data': data}) else: data = r['data'] content = snappy.decompress(data) return content
def test_label(prob_dir, index, gt_label): datum = caffe_pb2.Datum() fn = '%010d' % index f = open(os.path.join(prob_dir, fn), 'rb') data = f.read() f.close() datum.ParseFromString(snappy.decompress(data)) pred_lb = np.argmax(np.asarray(datum.float_data)) return 1 if pred_lb == gt_label else 0
def untransform(self, buf): for trans_id in self.__read_transforms: if trans_id == TRANSFORM.ZLIB: buf = zlib.decompress(buf) elif trans_id == TRANSFORM.SNAPPY: buf = snappy.decompress(buf) elif trans_id == TRANSFORM.ZSTD: buf = zstd.ZstdDecompressor().decompress(buf) if trans_id not in self.__write_transforms: self.__write_transforms.append(trans_id) return buf
def read(self): header = safe_recv(self._sock, self.HEADER_LEN) if not header: return False length = struct.unpack(self.HEADER_STRUCT, header)[0] chunks = [] while length: recv = safe_recv(self._sock, length) if not recv: return False chunks.append(recv) length -= len(recv) return snappy.decompress("".join(chunks))
def decompress_payload(self, raw_payload: bytes) -> bytes: # Do the Snappy Decompression only if Snappy Compression is supported by the protocol if self.snappy_support: try: return snappy.decompress(raw_payload) except Exception as err: # log this just in case it's a library error of some kind on valid messages. self.logger.debug("Snappy decompression error on payload: %s", raw_payload.hex()) raise MalformedMessage from err else: return raw_payload
def Decompress(Input): Output = Input + '.unsnappy' file_in = file(Input, "rb") c_data = file_in.read() file_out = file(Output, "wb") data = snappy.decompress(c_data) file_out.write(data) file_out.close() file_in.close()
def get_question_title_desc(self, post_id: int) -> dict: """dict including title and excerpt fo a question by PostId""" try: data = json.loads( snappy.decompress( self.safe_get(self.question_details_key(post_id)))) except Exception: # we might not have a record for that post_id: # - post_id can be erroneous (from a mistyped link) # - post_id can reference an excluded question (no answer) data = [None, None] return {"title": data[0], "excerpt": data[1]}
def receive(sock): unpickler = pickle.Unpickler(sock) result = OrderedDict([]) keylist = unpickler.load() for col in keylist: (length, ) = struct.unpack("!I", sock.read(4)) data = snappy.decompress(sock.read(length)).decode('utf-8') result[col] = json.loads(data) return result
def fget(self , inst): if hasattr(inst, self.name+'_array') : return getattr(inst, self.name+'_array') nprow = getattr(inst, 'NumpyArrayTable__'+self.name) #~ print 'fget',self.name, nprow, inst.id if nprow is None or nprow.shape is None or nprow.dtype is None: return None if nprow.shape =='': shape = () else: shape = tuple([ int(v) for v in nprow.shape.split(',') ]) dt = np.dtype(nprow.dtype) if nprow.compress == 'blosc': buf = blosc.decompress(nprow.blob) elif nprow.compress == 'zlib': buf = zlib.decompress(nprow.blob) elif nprow.compress == 'lz4': buf = lz4.decompress(nprow.blob) elif nprow.compress == 'snappy': buf = snappy.decompress(nprow.blob) elif nprow.compress is None: buf = nprow.blob if np.prod(shape)==0: if len(buf) != 0: arr = np.frombuffer( buf , dtype = dt) else: arr= np.empty( shape, dtype = dt ) else: arr = np.frombuffer( buf , dtype = dt) arr.flags.writeable = True arr = arr.reshape(shape) if self.arraytype == pq.Quantity: arr = pq.Quantity(arr, units = nprow.units, copy =False) # next access will be direct setattr(inst, self.name+'_array', arr) #~ delattr(inst, 'NumpyArrayTable__'+self.name) return arr
def recv(self): header = self.__fileobj.read(self.HEADER_LEN) if len(header) < self.HEADER_LEN: return None length = struct.unpack(self.HEADER_STRUCT, header)[0] chunks = [] while length: recv = self.__fileobj.read(length) if not recv: return None chunks.append(recv) length -= len(recv) return SerLib.loads(ZipLib.decompress("".join(chunks)))
def _get(url, callback, *args): key = get_sha1_key(url) r = cache_table.find_one({'_id': key}) if not r: throttle.run() r = requests.get(url) content = callback(r, *args) data = bson.binary.Binary(snappy.compress(content)) cache_table.insert_one({'_id': key, 'data': data}) else: data = r['data'] content = snappy.decompress(data) return content
def decode_micro_events(self, msg): """Parses the data field of a MicroEventLog event and returns a sequence of MicroEvent messages.""" if self.arch == ZTrace_pb2.ARCH_X86: read_ptr = read_uint32 elif self.arch == ZTrace_pb2.ARCH_X86_64: read_ptr = read_uint64 else: read_ptr = None # TBD, die? if msg.HasField('micro_event_log'): fp = cStringIO.StringIO(snappy.decompress(msg.micro_event_log.data)) curr_pc = 0 ent = None while True: rec_type = fp.read(1) if not rec_type: break else: rec_type = ord(rec_type) if rec_type == OP_NEWPC: pc = read_ptr(fp) ent = MicroEvent(type=OP_NEWPC, pc=pc) curr_pc = pc elif rec_type == OP_MEMREAD: ea = read_ptr(fp) size = read_uint32(fp) ent = MicroEvent(type=OP_MEMREAD, pc=curr_pc, ea=ea, size=size) elif rec_type == OP_MEMWRITE: ea = read_ptr(fp) size = read_uint32(fp) wdata = fp.read(size) ent = MicroEvent(type=OP_MEMWRITE, pc=curr_pc, ea=ea, data=wdata, size=size) elif rec_type == OP_REGWRITE: assert 0 # TBD elif rec_type == OP_XMMWRITE: assert 0 # TBD elif rec_type == OP_CALL: target = read_ptr(fp) sp = read_ptr(fp) is_direct_call = read_bool(fp) ent = MicroEvent(type=OP_CALL, pc=curr_pc, target=target, sp=sp, is_direct_call=is_direct_call) elif rec_type == OP_RET: sp = read_ptr(fp) ent = MicroEvent(type=OP_RET, pc=curr_pc, sp=sp) else: print "%x" % (ord(rec_type)) assert 0 yield ent
def snappy_decode(payload): if not has_snappy(): raise NotImplementedError("Snappy codec is not available") if _detect_xerial_stream(payload): # TODO ? Should become a fileobj ? out = BytesIO() byt = payload[16:] length = len(byt) cursor = 0 while cursor < length: block_size = struct.unpack_from('!i', byt[cursor:])[0] # Skip the block size cursor += 4 end = cursor + block_size out.write(snappy.decompress(byt[cursor:end])) cursor = end out.seek(0) return out.read() else: return snappy.decompress(payload)
def view(filename, skip=1): cv2.namedWindow('rgb') cv2.namedWindow('depth') cv2.moveWindow('rgb',640,0) cv2.moveWindow('depth',0,0) files = glob.glob('%s/*.snappy' % (filename,)) + glob.glob('%s/*.jpg' % (filename,)) files = sorted(files, key=lambda f: os.path.basename(f)[2:])[::skip] for f in files: if f.endswith('.jpg'): cv2.imshow('rgb', cv2.imread(f)) cv2.waitKey(25) if f.endswith('.snappy'): depth = np.fromstring(snappy.decompress(open(f).read()), dtype='uint16').reshape((480,640)) cv2.imshow('depth', 1024./depth) cv2.waitKey(25)
def read_message(self, message_type, compressed=False, read_size=True): """ Read a protobuf message """ if read_size: size = self.read_vint32() b = self.read(size) else: b = self.read() if compressed: b = snappy.decompress(b) m = message_type() m.ParseFromString(b) return m
def __decompress(self, event): original_event=event try: event['data']=snappy.decompress(event['data']) self.logging.debug("Incoming data decompressed.") event['header']['snappy']=False except Exception as err: self.logging.warn("Decompressing failed. Reason: %s"%err) if self.purge == True: return try: self.queuepool.outbox.put(event) except QueueLocked: self.queuepool.inbox.rescue(original_event) self.queuepool.outbox.waitUntilPutAllowed()
def iter(rgb=True, depth=True, skip=1): # Load the image fns = [] fns_ = glob.glob(current_path+'/host-*/*/*.snappy')+glob.glob(current_path+'/host-*/*/*.jpg') print len(fns_) for fn in fns_: host,_,fnbase = fn.split('/')[-3:] fngroups = list(FN_RE.search(fnbase).groups()) cam = int(host[-1])-1 fns.append((fn, cam, float(fngroups[0]))) fns.sort(key=lambda x: x[2]) fns = fns[::skip] for fn, cam, ts in fns: if (fn.endswith('.ppm') or fn.endswith('.jpg')) and rgb: yield ((cam, ts, cv2.imread(fn)),), () elif fn.endswith('.snappy') and depth: d = np.fromstring(snappy.decompress(open(fn).read()), dtype=np.uint16).reshape((480, 640)) yield (), ((cam, ts, d),)
def _backend_get(self, keys): keys, is_single = tup(keys, ret_is_single=True) rows = self.cf.multiget(keys, columns=['value', 'compressed', 'format']) ret = {} for key, columns in rows.iteritems(): value = columns['value'] compressed = columns.get('compressed') if compressed == 'zlib': with g.stats.get_timer('permacache.deserialize.decompress_zlib'): value = zlib.decompress(value) elif compressed == 'snappy': with g.stats.get_timer('permacache.deserialize.decompress_snappy'): value = snappy.decompress(value) elif compressed: raise Exception("Unknown compression format %r(%r)" % (compression, format)) format = columns.get('format') or 'pickle' if format == 'pickle': with g.stats.get_timer('permacache.deserialize.pickle'): value = pickle.loads(value) elif format == 'json': with g.stats.get_timer('permacache.deserialize.json'): value = json.loads(value) else: # we don't know how to deal with any other formats raise Exception("Unknown permacache serialization format %r" % (format,)) ret[key] = value if is_single: if ret: return ret.values()[0] else: return None else: return ret