def take_action(self, parsed_args): self.log = logging.getLogger(self.__class__.__name__) self.log.debug('Initialized %s', self.__class__.__name__) self.log.debug('Args: %s', str(parsed_args)) self.log.debug("Establishing connection") c, db, collection = OutputDBInterface.get_db_connection(hostname=parsed_args.hostname) cursor = collection.find() N = cursor.count() if N == 0: self.log.error("No events in the output database; no file made.") return f = gzip.open(parsed_args.filename, 'wb') pickle.dump(__version__, f) self.log.info("Processing %d trigger events" % N) for i in tqdm(range(N)): doc = next(cursor) doc2 = snappy.uncompress(doc['compressed_doc']) doc2 = pickle.loads(doc2) pickle.dump(doc2, f) f.close()
def decompress(data): """ Decompresses the given data via the snappy algorithm. If ``python-snappy`` is not installed a ``RuntimeError`` is raised. """ if not snappy_available: raise RuntimeError("Snappy compression unavailable.") buff_offset = len(raw_header) # skip the header length = len(data) - len(raw_header) output = BytesIO() while buff_offset <= length: block_size = struct.unpack_from("!i", data, buff_offset)[0] buff_offset += struct.calcsize("!i") block = struct.unpack_from("!%ds" % block_size, data, buff_offset)[0] buff_offset += block_size output.write(snappy.uncompress(block)) result = output.getvalue() output.close() return result
def __init__(self, filename): self.fileName = filename self.api = "API_UNKNOWN" self.traceFile = open(self.fileName, 'rb+') self.filePointer = 0 self.fileSize = os.path.getsize(self.fileName) self.nextCallNumber = 0 self.lastFrameBreakPos = 0 self.container = 0 self.containerPointer = 0 self.fullFilePosition = 0 self.mem = self.traceFile.read(2) self.filePointer += 2 if self.mem[0:2] != b'at': raise Exception("not snappy file!") length = int(struct.unpack('I', self.traceFile.read(4))[0]) self.filePointer += 4 compressedMem = self.traceFile.read(length) self.filePointer += length self.mem = snappy.uncompress(compressedMem) self.getVersion(self.mem) return
def content(self): """ Holt den Content aus dem *_content*-Attribut oder aus dem *_blobs*-Ordner und gibt diesen entpackt zurück. """ # Falls der Content noch nicht gespeichert wurde, befindet # er sich noch in *self._content*. if not self._content is None: return self._content # Nachsehen ob es einen Blob für den Content gibt if not self.content_blob_name: return None # Blob laden, entpacken und zurück geben blob_dir = os.path.join(config.DATABLOBSDIR.value, self.content_blob_name[0]) blob_path = os.path.join(blob_dir, self.content_blob_name) with io.open(blob_path, "rb") as blob_file: if self.content_blob_name.endswith(".snappy"): content = snappy.uncompress(blob_file.read()) else: content = blob_file.read() if content and self.node.content_type in constants.CONTENT_TYPES_TEXT: return content.decode("utf-8") else: return content
def print_journal(fn, at=0): # file structure header_struct = xstruct('< 5s 20s 1s 128s 2s Q') section_struct = xstruct('< I Q Q') footer_struct = xstruct('< I QQ Q 4s') align = 8192 # open file f = open(fn, 'rb') sz = os.fstat(f.fileno()).st_size # 2.4 won't accept 0 buf = mmap.mmap(f.fileno(), sz, prot=mmap.PROT_READ) # file header magic, date, _, path, _, fileid = unpack_from(header_struct, buf) path = path[:path.find('\0')] date = date[:date.find('\0')] print '%08x: header magic=%s date=%s path=%s fid=%x' % (0, hex(magic), date, path, fileid) if at==0: at = 8192 # traverse file while at < len(buf): # section header l, lsn, fid = unpack_from(section_struct, buf, at) lp = (l + align-1) & ~(align-1) section_at = at + 20 footer_at = at + l - 32 ok = 'OK' if fid!=fileid: ok = 'BAD' print '%08x: section l=%x(%d) lp=%x(%d) lsn=%x(%d) fid=%x(%s)' % \ (at, l, l, lp, lp, lsn, lsn, fid, ok) # compute hash, compare with footer sentinel, hash_a, hash_b, reserved, magic = unpack_from(footer_struct, buf, footer_at) computed_hash_a, computed_hash_b = hash(buf[at:footer_at]) hash_ok = 'OK' if not (hash_a==computed_hash_a and hash_b==computed_hash_b): ok = 'BAD' print '%08x: hash=%08x:%08x(%s)' % (at, computed_hash_a, computed_hash_b, hash_ok) # section try: if snappy: section = snappy.uncompress(buf[section_at:footer_at]) print '%08x: uncompressed length=%x(%d)' % (section_at, len(section), len(section)) if do_journal_entries: print_journal_entries(section) except Exception, e: print '%08x: %s' % (section_at, e) # section footer print '%08x: footer sentinel=%x hash=%08x:%08x(%s) magic=%s' % \ (footer_at, sentinel, hash_a, hash_b, hash_ok, hex(magic)) # next section at += lp
def _unpack_msgpack_snappy(str): if str.startswith(b'S'): tmp = snappy.uncompress(str[1:]) # print "SNAPPY: ", len(str), len(tmp) obj = msgpack.loads(tmp, encoding='utf-8') elif str.startswith(b'\0'): obj = msgpack.loads(str[1:], encoding='utf-8') else: return None return obj
def _unpack(str) : if str[0] == 'S': tmp = snappy.uncompress(str[1:]) obj = msgpack.loads(tmp) elif str[0] == '\0': obj = msgpack.loads(str[1:]) else: return None #print "UNPACK", obj return obj
def _unpack(str) : if str[0] == 'S': tmp = snappy.uncompress(str[1:]) #print "SNAPPY: ", len(str), len(tmp) obj = msgpack.loads(tmp) elif str[0] == '\0': obj = msgpack.loads(str[1:]) else: return None return obj
def getByte(self): if self.containerPointer == len(self.mem): length = int(struct.unpack('I', self.traceFile.read(4))[0]) self.filePointer += 4 compressedMem = self.traceFile.read(length) self.filePointer += length self.container += 1 self.mem = snappy.uncompress(compressedMem) self.containerPointer = 0 rval= self.mem[self.containerPointer] self.containerPointer += 1 self.fullFilePosition += 1 return rval
def read(stream, peek): if peek.cls not in PBMSG_BY_KIND.values(): msg = 'please update demo.proto: {0}'.format(peek.cls) raise InvalidProtobufMessage(msg) stream.seek(peek.offset) data = stream.read(peek.size) if peek.compressed: data = snappy.uncompress(data) message = peek.cls() message.ParseFromString(data) return message
def _decompress_subblock(self): if self._subblock_size is None: if len(self._buf) <= 4: return b"" self._subblock_size = struct.unpack(">i", self._buf[:4])[0] self._buf = self._buf[4:] # Only attempt to decompress complete subblocks. if len(self._buf) < self._subblock_size: return b"" compressed = self._buf[:self._subblock_size] self._buf = self._buf[self._subblock_size:] uncompressed = snappy.uncompress(compressed) self._block_read += len(uncompressed) self._subblock_size = None return uncompressed
def decompress(data, compressor_id): if compressor_id == SnappyContext.compressor_id: # python-snappy doesn't support the buffer interface. # https://github.com/andrix/python-snappy/issues/65 # This only matters when data is a memoryview since # id(bytes(data)) == id(data) when data is a bytes. # NOTE: bytes(memoryview) returns the memoryview repr # in Python 2.7. The right thing to do in 2.7 is call # memoryview.tobytes(), but we currently only use # memoryview in Python 3.x. return snappy.uncompress(bytes(data)) elif compressor_id == ZlibContext.compressor_id: return zlib.decompress(data) else: raise ValueError("Unknown compressorId %d" % (compressor_id, ))
def decompress(data, compressor_id): if compressor_id == SnappyContext.compressor_id: # python-snappy doesn't support the buffer interface. # https://github.com/andrix/python-snappy/issues/65 # This only matters when data is a memoryview since # id(bytes(data)) == id(data) when data is a bytes. return snappy.uncompress(bytes(data)) elif compressor_id == ZlibContext.compressor_id: return zlib.decompress(data) elif compressor_id == ZstdContext.compressor_id: # ZstdDecompressor is not thread safe. # TODO: Use a pool? return ZstdDecompressor().decompress(data) else: raise ValueError("Unknown compressorId %d" % (compressor_id, ))
def decompress(data, compressor_id): if compressor_id == SnappyContext.compressor_id: # python-snappy doesn't support the buffer interface. # https://github.com/andrix/python-snappy/issues/65 # This only matters when data is a memoryview since # id(bytes(data)) == id(data) when data is a bytes. # NOTE: bytes(memoryview) returns the memoryview repr # in Python 2.7. The right thing to do in 2.7 is call # memoryview.tobytes(), but we currently only use # memoryview in Python 3.x. return snappy.uncompress(bytes(data)) elif compressor_id == ZlibContext.compressor_id: return zlib.decompress(data) else: raise ValueError("Unknown compressorId %d" % (compressor_id,))
def find_by_bond_topology_id(self, btid): """Finds all the conformer associated with a bond topology id. Args: btid: bond topology id to look up. Returns: iterable of dataset_pb2.Conformer """ cur = self._conn.cursor() select = (f'SELECT cid, conformer ' f'FROM {_CONFORMER_TABLE_NAME} ' f'INNER JOIN {_BTID_TABLE_NAME} USING(cid) ' f'WHERE {_BTID_TABLE_NAME}.btid = ?') cur.execute(select, (btid, )) return (dataset_pb2.Conformer().FromString(snappy.uncompress( result[1])) for result in cur)
def post(self): """Insert a message """ # print self.request.body print "get message" userid = self.get_argument("userid") pcid = self.get_argument("pcid") packettype = self.get_argument("type") pos = self.request.body.find("&data=") if pos==-1: self.write('error') self.finish() return snappydata = self.request.body[pos+6:] try: protodata=snappy.uncompress(snappydata) for package in protodata.split("!!!"): if len(package) < 10: continue if packettype == "1": packetobj = IpPacket() packetobj.ParseFromString(package) self.saveipdata(packetobj,userid,pcid) elif packettype == "2": packetobj = EmailPacket() packetobj.ParseFromString(package) self.saveemaildata(packetobj,userid,pcid) elif packettype == "3": packetobj = HttpPacket() packetobj.ParseFromString(package) self.savehttpdata(packetobj,userid,pcid) except: print "error" print "print self.request.body",self.request.body print "self.request.arguments",self.request.arguments traceback.print_exc() self.write('ok') self.finish() return self.write('ok') self.finish()
def imgmsg_to_pil(img_msg, rgba=True): try: uncompressed_img_msg = sensor_msgs.msg.Image() uncompressed_img_msg.header = img_msg.header uncompressed_img_msg.height = img_msg.height uncompressed_img_msg.width = img_msg.width uncompressed_img_msg.step = 1 uncompressed_img_msg.encoding = 'mono8' uncompressed_img_msg.data = snappy.uncompress(np.fromstring(img_msg.data, dtype = 'uint8')) if img_msg._type == 'sensor_msgs/CompressedImage': pil_img = Image.open(StringIO(img_msg.data)) if pil_img.mode != 'L': pil_img = pil_bgr2rgb(pil_img) else: alpha = False if uncompressed_img_msg.encoding == 'mono8': mode = 'L' elif uncompressed_img_msg.encoding == 'rgb8': mode = 'BGR' elif uncompressed_img_msg.encoding == 'bgr8': mode = 'RGB' elif uncompressed_img_msg.encoding in ['bayer_rggb8', 'bayer_bggr8', 'bayer_gbrg8', 'bayer_grbg8']: mode = 'L' elif uncompressed_img_msg.encoding == 'mono16': if uncompressed_img_msg.is_bigendian: mode = 'F;16B' else: mode = 'F:16' elif uncompressed_img_msg.encoding == 'rgba8': mode = 'BGR' alpha = True elif uncompressed_img_msg.encoding == 'bgra8': mode = 'RGB' alpha = True pil_img = Image.frombuffer('RGB', (uncompressed_img_msg.width, uncompressed_img_msg.height), uncompressed_img_msg.data, 'raw', mode, 0, 1) if rgba and pil_img.mode != 'RGBA': pil_img = pil_img.convert('RGBA') return pil_img except Exception, ex: print >> sys.stderr, 'Can\'t convert image: %s' % ex return None
def format(self, value): try: if is_gzip(value): output = gzip.decompress(value) elif is_lzma(value): output = lzma.decompress(value) elif is_snappy(value): if SNAPPY_SUPPORT: output = snappy.uncompress(value) else: return self.process_error( 'Cannot decompress value: ' 'Snappy is not available on this system.') else: output = lz4.block.decompress(value) return output except OSError as e: return self.process_error('Cannot decompress value: {}'.format(e))
def depth_from_binary(binary_name, imgsize=(240, 320)): """ Decode binary file containing depth images and return the depth images as a numpy ndarray. :param binary_name: The file name of the binary file to read. :param imgsize: The size (height, width) of each uncompressed image. :return: numpy array containing 'l' images of size 'imgsize'. """ images = list() with open(binary_name, 'rb') as fp: b = fp.read(4) while b != '': k = struct.unpack('<L', b)[0] image_bytes = fp.read(k) images.append(snappy.uncompress(image_bytes)) b = fp.read(4) l = len(images) images = np.array(images) images = np.fromstring(images, dtype=np.dtype('>u2')) return images.reshape((l, ) + imgsize)
def Retrieve(self, item): """ Retrieve one piece of data from the table. Args: item: A integer, which specifies the index number of the data to be retrieved. Returns: Bytes retrieved from the table. Raises: KeyError : when the index number is out of range. RuntimeError: when failed to retrieve data. """ with self.lock.gen_rlock(): if (self.index is None) or (self.head is None): raise RuntimeError('The table or the item is inaccessible') if self.items <= item: #atomic.load(self.items) raise KeyError('The item number is out of bounds') if self.itemOffset > item: raise KeyError('The item offset number is out of bounds') startOffset, endOffset, filenum = self.GetBounds(item - self.itemOffset) dataFile = self.files.get(filenum, None) if dataFile is None: raise RuntimeError('missing data file {filenum}'.format(filenum=filenum)) dataFile.seek(startOffset) blob = dataFile.read(endOffset - startOffset) # self.readMeter.Mark(len(blob) + 2 * INDEX_ENTRY_SIZE) if self.noCompression: return blob return snappy.uncompress(blob)
def main(): args = parser.parse_args() if args.action not in actions: print("Error: Invalid action %s" % args.action) sys.exit(1) def process_error(msg): if args.action == ACTION_VALIDATE: return print(json.dumps({"valid": False, "message": msg})) else: print(msg) sys.exit(2) try: decoded_value = base64.b64decode(args.value) except binascii.Error as e: return process_error("Cannot decode value: %s" % e) try: if is_gzip(decoded_value): unpacked_value = gzip.decompress(decoded_value) elif is_lzma(decoded_value): unpacked_value = lzma.decompress(decoded_value) elif is_snappy(decoded_value): unpacked_value = snappy.uncompress(decoded_value) else: unpacked_value = lz4.block.decompress(decoded_value) except OSError as e: return process_error("Cannot decompress value: %s" % e) unpacked_value = unpacked_value.decode() if args.action == ACTION_VALIDATE: return print(json.dumps({"valid": True, "message": ""})) else: return print( json.dumps({ "output": repr(unpacked_value), "read-only": True, "format": "plain_text", }))
def find_by_expanded_stoichiometry(self, exp_stoich): """Finds all of the conformers with a stoichiometry. The expanded stoichiometry includes hydrogens as part of the atom type. See smu_utils_lib.expanded_stoichiometry_from_topology for a description. Args: exp_stoich: string Returns: iterable of dataset_pb2.Conformer """ cur = self._conn.cursor() select = (f'SELECT conformer ' f'FROM {_CONFORMER_TABLE_NAME} ' f'WHERE exp_stoich = ?') cur.execute(select, (exp_stoich, )) return (dataset_pb2.Conformer().FromString(snappy.uncompress( result[0])) for result in cur)
def process_raw(self, ctx, evidence_uuid, pipeline, data, raw, return_result=False, autosave=True): pipeline = self._get_pipeline(pipeline) if not pipeline: return # StringIO for raw data stream = StringIO.StringIO(snappy.uncompress(base64.b64decode(raw))) # Perform the actual processing bundle = Bundle(self.server, evidence_uuid, pipeline, data, stream) try: bundle = pipeline.process(bundle) except Exception as e: bundle.add_exception(e, traceback=''.join(traceback.format_exc())) data = bundle.data if autosave: data = self._save_result(evidence_uuid, data, wait=return_result) if return_result: return data
def imgmsg_to_pil(img_msg, rgba=True): try: uncompressed_img_msg = sensor_msgs.msg.Image() uncompressed_img_msg.header = img_msg.header uncompressed_img_msg.height = img_msg.height uncompressed_img_msg.width = img_msg.width uncompressed_img_msg.step = 1 uncompressed_img_msg.encoding = 'mono8' uncompressed_img_msg.data = snappy.uncompress(np.fromstring(img_msg.data, dtype = 'uint8')) alpha = False mode = 'L' pil_img = Image.frombuffer('RGB', (uncompressed_img_msg.width, uncompressed_img_msg.height), uncompressed_img_msg.data, 'raw', mode, 0, 1) if rgba and pil_img.mode != 'RGBA': pil_img = pil_img.convert('RGBA') return pil_img except Exception, ex: print >> sys.stderr, 'Can\'t convert image: %s' % ex return None
def _getblock_inet(self): result = BytesIO() last = 0 while not last: if self.protocol == Protocol.prot9: flag = self._getbytes(2) unpacked = struct.unpack('<H', flag)[0] # little endian short length = unpacked >> 1 last = unpacked & 1 else: flag = self._getbytes(8) unpacked = struct.unpack('<q', flag)[0] # little endian long long length = unpacked >> 1 last = unpacked & 1 if length > 0: block = self._getbytes(length) if self.compression == Compression.snappy: block = snappy.uncompress(block) result.write(block) return result.getvalue()
def read_idtk_file(filename): """ :param filename: source data filename (DTK serialized data format) :return: header, payload, contents, data - parsed JSON header, raw payload data, decompressed (if appropriate) payload data, and parsed JSON data """ header, payload = read_idtk_file_components(filename) header = json.loads(header, object_pairs_hook=OrderedDict ) # string isn't very useful, convert JSON to data contents = None if 'compressed' in header['metadata'] and header['metadata']['compressed']: contents = timing(lambda: snappy.uncompress(payload), message_index=DECOMPRESS_PAYLOAD) else: contents = payload data = timing(lambda: json.loads(contents, object_pairs_hook=OrderedDict), message_index=PARSE_JSON) return header, payload, contents, data
def parse_header(data, offset=0): request_id, response_to, op_code = struct.unpack_from("<III", data, offset) offset += 12 if op_code == OP_COMPRESSED: op_code, uncompressed_size, compressor_id = struct.unpack_from( "<IIB", data, offset) offset += 9 if compressor_id == COMPRESSOR_ZLIB: data = zlib.decompress(memoryview( data)[offset:], bufsize=uncompressed_size) offset = 0 elif compressor_id == COMPRESSOR_SNAPPY and SNAPPY_SUPPORTED: data = snappy.uncompress(memoryview(data)[offset:]) offset = 0 elif compressor_id == COMPRESSOR_NOOP: pass else: raise ValueError("Unsupported compressor") return request_id, response_to, op_code, data, offset
def find_by_expanded_stoichiometry_list(self, exp_stoichs): """Finds all of the molecules with a stoichiometry. The expanded stoichiometry includes hydrogens as part of the atom type. See smu_utils_lib.expanded_stoichiometry_from_topology for a description. Args: exp_stoichs: list of string Returns: iterable of dataset_pb2.Molecule """ cur = self._conn.cursor() select = (''.join([ f'SELECT conformer ' f'FROM {_MOLECULE_TABLE_NAME} ' f'WHERE exp_stoich IN (', ','.join('?' for _ in exp_stoichs), ')' ])) cur.execute(select, exp_stoichs) return (dataset_pb2.Molecule().FromString(snappy.uncompress(result[0])) for result in cur)
def items(self, filter=None): '''Yield a pair of (name, data) records in the same order they appear in the file. @p filter -- same as for get function ''' records = {} for block_type, block_data in self._read_blocks(): if block_type == _BLOCK_SCHEMA: identifier, bulk_record = self._parse_schema( block_data, filter=filter) if identifier is None: continue records[identifier] = bulk_record elif block_type == _BLOCK_DATA: stream = telemetry_archive.ReadStream( stringio.StringIO(block_data)) identifier = stream.read_uint32() flags = stream.read_uint16() record = records.get(identifier, None) if record is None: continue if flags & BlockDataFlags.kPreviousOffset: flags &= ~(BlockDataFlags.kPreviousOffset) _ = stream.read_varint() if flags & BlockDataFlags.kSchemaCRC: assert False # not supported yet if flags & BlockDataFlags.kSnappy: flags &= ~(BlockDataFlags.kSnappy) rest = stream.stream.read() stream = telemetry_archive.ReadStream( stringio.StringIO(snappy.uncompress(rest))) assert flags == 0 # no unknown flags rest = stream.stream.read() yield record.name, record.deserialize(rest)
def ParseBlock(blockBytes, compressed, crcBytes): if compressed == 1: blockBytes = snappy.uncompress(blockBytes) kvPair = dict() try: numRestarts = blockBytes[-1] stream2 = io.BytesIO(blockBytes[:-1 * (1 + 4 * numRestarts)]) bContinue = True curKey = '' while (bContinue): sharedKeyLen = varint.decode_stream(stream2) inlineKeyLen = varint.decode_stream(stream2) valueLen = varint.decode_stream(stream2) inlineKey = stream2.read(inlineKeyLen) valData = stream2.read(valueLen) if len(inlineKey) >= 8: keyName = inlineKey[:-8] keySequence = int.from_bytes(inlineKey[-7:], 'little') keySt = inlineKey[-8] if sharedKeyLen != 0: curKey = curKey[:sharedKeyLen] + keyName else: curKey = keyName kvPair[curKey] = [keySt, keySequence, valData] if (keySequence == 0xffffffffffffff): bContinue = False if inlineKeyLen == 0 and valueLen == 0: bContinue = False except Exception as e: print("ParseBlock exception: " + str(e)) return kvPair
def get_samples_from_doc(doc, is_compressed): """From a mongo document, fetch the data payload and decompress if necessary Args: doc (dictionary): Document from mongodb to analyze Returns: bytes: decompressed data """ data = doc['data'] assert len(data) != 0 if is_compressed: data = snappy.uncompress(data) data = np.fromstring(data, dtype=SAMPLE_TYPE) if len(data) == 0: raise IndexError("Data has zero length") return data
def readFrom(cls, con): header = con.read(4) method = ord(header[0]) size = (ord(header[1]) << 16) + (ord(header[2]) << 8) + (ord( header[3])) bytes = con.read(size) if method == Message.Encoding.Raw: pass elif method == Message.Encoding.Snappy: try: bytes = snappy.uncompress(bytes) except snappy.UncompressError: raise FramingError(FramingError.InvalidCompressedData) size = len(bytes) if size >= 1 << 24: raise FramingError(FramingError.MessageTooLarge) else: raise FramingError(FramingError.UnknownEncoding) try: data = msgpack.unpackb(bytes) except msgpack.UnpackException: raise FramingError(FramingError.InvalidFormatedData) return cls.decode(data)
def get_user_data(self): questionnaire_state = self._find_questionnaire_state() if questionnaire_state: version = questionnaire_state.version or 0 try: # legacy data was stored in a dict, base64-encoded, and not compressed data = json.loads(questionnaire_state.state_data)['data'] is_legacy_data = True except ValueError: data = questionnaire_state.state_data is_legacy_data = False decrypted_data = self.encrypter.decrypt_data(data) if is_legacy_data: decrypted_data = base64url_decode( decrypted_data.decode()).decode() else: decrypted_data = snappy.uncompress(decrypted_data).decode() return decrypted_data, version return None, None
def find_by_stoichiometry(self, stoich): """Finds all conformers with a given stoichiometry. The stoichiometry is like "C6H12". Internally, the stoichiometry is converted a set of expanded stoichiometries and the query is done to find all of those. Notably, this means only records with status <= 512 are returned. Args: stoich: stoichiometry string like "C6H12", case doesn't matter Returns: Iterable of type dataset_pb2.Conformer. """ exp_stoichs = list( smu_utils_lib.expanded_stoichiometries_from_stoichiometry(stoich)) cur = self._conn.cursor() select = (f'SELECT conformer ' f'FROM {_CONFORMER_TABLE_NAME} ' f'WHERE exp_stoich IN (' + ','.join('?' for _ in exp_stoichs) + ')') cur.execute(select, exp_stoichs) return (dataset_pb2.Conformer().FromString(snappy.uncompress( result[0])) for result in cur)
def get_data_from_doc(doc): """From a mongo document, fetch the data payload and decompress if necessary Args: doc (dictionary): Document from mongodb to analyze Returns: bytes: decompressed data """ data = doc['data'] assert len(data) != 0 if doc['zipped']: data = snappy.uncompress(data) data = np.fromstring(data, dtype=np.uint32) if len(data) == 0: raise IndexError("Data has zero length") return data
def to_python(self, value): return pickle.loads(snappy.uncompress(value))
import sys import os.path from snappy import uncompress ## box_size is the width and the height of each tile box_size = 500 ## usize is the maximum of the first tag of the snappy files usize = 25500 ## vsize is the maximum of the second tag of the snappy files vsize = 20500 if __name__ == "__main__": for u in range(0, usize, box_size): # iterate through all snappy files in data directory for v in range(0, vsize, box_size): ## compressed files are in data directory file = 'data/(%d, %d).snappy' % (u, v) if os.path.isfile(file): _if = file ## decompressed files are in decomp_data directory of = 'decomp_data/(%d,%d)' % (u, v) with open(_if, 'rb') as f: ## data copied from compressed file data = f.read() ## uncompressed data decomp = uncompress(data) with open(of, 'wb') as oof: oof.write(decomp)
def consume_batch_async( self, batch: pump.Batch ) -> Tuple[couchbaseConstants.PUMP_ERROR, Optional[pump.SinkBatchFuture]]: if not self.writer: self.csvfile = sys.stdout if self.spec.startswith(CSVSink.CSV_JSON_SCHEME): if len(batch.msgs) <= 0: future = pump.SinkBatchFuture(self, batch) self.future_done(future, 0) return 0, future cmd, vbucket_id, key, flg, exp, cas, meta, val_bytes = batch.msgs[ 0][:8] doc = json.loads(val_bytes) self.fields = sorted(doc.keys()) if 'id' not in self.fields: self.fields = ['id'] + self.fields if self.spec.endswith(".csv"): filename = self.get_csvfile( self.spec[len(CSVSink.CSV_JSON_SCHEME):]) try: self.csvfile = open(filename, "w", encoding='utf-8') except IOError as e: return f'error: could not write csv to file: {filename}', None self.writer = csv.writer(self.csvfile) self.writer.writerow(self.fields) else: if self.spec.endswith(".csv"): filename = self.get_csvfile( self.spec[len(CSVSink.CSV_SCHEME):]) try: self.csvfile = open(filename, "w", encoding='utf-8') except IOError as e: return f'error: could not write csv to file: {filename}', None self.writer = csv.writer(self.csvfile) self.writer.writerow([ 'id', 'flags', 'expiration', 'cas', 'value', 'rev', 'vbid', 'dtype' ]) msg_tuple_format = 0 for msg in batch.msgs: cmd, vbucket_id, key, flg, exp, cas, meta, val_bytes = msg[:8] if self.skip(key, vbucket_id): continue if not msg_tuple_format: msg_tuple_format = len(msg) seqno = dtype = nmeta = 0 if msg_tuple_format > 8: seqno, dtype, nmeta, conf_res = msg[8:12] if dtype > 2: try: val_bytes = snappy.uncompress(val_bytes) except Exception as err: pass try: if cmd in [ couchbaseConstants.CMD_TAP_MUTATION, couchbaseConstants.CMD_DCP_MUTATION ]: if self.fields: if val_bytes and len(val_bytes) > 0: try: row = [] doc = json.loads(val_bytes) if type(doc) == dict: for field in self.fields: if field == 'id': row.append(pump.returnString(key)) else: row.append(doc[field]) self.writer.writerow(row) except ValueError: pass else: #rev = self.convert_meta(meta) self.writer.writerow([ pump.returnString(key), flg, exp, cas, val_bytes, meta, vbucket_id, dtype ]) elif cmd in [ couchbaseConstants.CMD_TAP_DELETE, couchbaseConstants.CMD_DCP_DELETE ]: pass elif cmd == couchbaseConstants.CMD_GET: pass else: return f'error: CSVSink - unknown cmd: {cmd!s}', None except IOError: return "error: could not write csv to stdout", None future = pump.SinkBatchFuture(self, batch) self.future_done(future, 0) return 0, future
def test_simple_compress(self): text = "hello world!".encode('utf-8') compressed = snappy.compress(text) self.assertEqual(text, snappy.uncompress(compressed))
def send_msgs(self, conn: cb_bin_client.MemcachedClient, msgs: List[couchbaseConstants.BATCH_MSG], operation: str, vbucket_id: Optional[int] = None) -> couchbaseConstants.PUMP_ERROR: m: List[bytes] = [] msg_format_length = 0 for i, msg in enumerate(msgs): if not msg_format_length: msg_format_length = len(msg) cmd, vbucket_id_msg, key, flg, exp, cas, meta, val = msg[:8] seqno = dtype = nmeta = conf_res = 0 if msg_format_length > 8: seqno, dtype, nmeta, conf_res = msg[8:12] if vbucket_id is not None: vbucket_id_msg = vbucket_id if self.skip(key, vbucket_id_msg): continue if cmd == couchbaseConstants.CMD_SUBDOC_MULTIPATH_MUTATION: err, req = self.format_multipath_mutation(key, val, vbucket_id_msg, cas, i) if err: return err self.append_req(m, req) continue if cmd == couchbaseConstants.CMD_SUBDOC_MULTIPATH_LOOKUP: err, req = self.format_multipath_lookup(key, val, vbucket_id_msg, cas, i) if err: return err self.append_req(m, req) continue rv, translated_cmd = self.translate_cmd(cmd, operation, meta) if translated_cmd is None: return rv if dtype > 2: if self.uncompress and val: try: val = snappy.uncompress(val) except Exception as err: pass if translated_cmd == couchbaseConstants.CMD_GET: val, flg, exp, cas = b'', 0, 0, 0 if translated_cmd == couchbaseConstants.CMD_NOOP: key, val, flg, exp, cas = b'', b'', 0, 0, 0 if translated_cmd == couchbaseConstants.CMD_DELETE: val = b'' # A tombstone can contain Xattrs if translated_cmd == couchbaseConstants.CMD_DELETE_WITH_META and not dtype & couchbaseConstants.DATATYPE_HAS_XATTR: val = b'' # on mutations filter txn related data if translated_cmd == couchbaseConstants.CMD_SET_WITH_META or translated_cmd == couchbaseConstants.CMD_SET: if not getattr(self.opts, 'force_txn', False): skip, val, cas, exp, dtype = self.filter_out_txn(key, val, cas, exp, dtype) if skip: continue rv, req = self.cmd_request(translated_cmd, vbucket_id_msg, key, val, # type: ignore ctypes.c_uint32(flg).value, exp, cas, meta, i, dtype, nmeta, conf_res) # type: ignore if rv != 0: return rv self.append_req(m, req) if m: try: conn.s.sendall(self.join_str_and_bytes(m)) # type: ignore except socket.error as e: return f'error: conn.sendall() exception: {e}' return 0
def consume_batch_async(self, batch): op = self.operation() op_mutate = op in ['set', 'add'] stdout = sys.stdout msg_visitor = None opts_etc = getattr(self.opts, "etc", None) if opts_etc: stdout = opts_etc.get("stdout", sys.stdout) msg_visitor = opts_etc.get("msg_visitor", None) mcd_compatible = self.opts.extra.get("mcd_compatible", 1) msg_tuple_format = 0 for msg in batch.msgs: if msg_visitor: msg = msg_visitor(msg) if not msg_tuple_format: msg_tuple_format = len(msg) cmd, vbucket_id, key, flg, exp, cas, meta, val = msg[:8] seqno = dtype = nmeta = conf_res = 0 if msg_tuple_format > 8: seqno, dtype, nmeta, conf_res = msg[8:] if self.skip(key, vbucket_id): continue if dtype > 2: try: val = snappy.uncompress(val) except Exception: pass try: if cmd in [ couchbaseConstants.CMD_TAP_MUTATION, couchbaseConstants.CMD_DCP_MUTATION ]: if op_mutate: # <op> <key> <flags> <exptime> <bytes> [noreply]\r\n if mcd_compatible: stdout.write( f'{op} {key} {flg} {exp} {len(val)!s}\r\n') else: stdout.write( f'{op} {key} {flg} {exp} {len(val)} {seqno} {dtype} {conf_res}\r\n' ) try: stdout.write(val.decode()) except TypeError: stdout.write(f'{val}') stdout.write("\r\n") elif op == 'get': stdout.write(f'get {key}\r\n') elif cmd in [ couchbaseConstants.CMD_TAP_DELETE, couchbaseConstants.CMD_DCP_DELETE ]: if op_mutate: stdout.write(f'delete {key}\r\n') elif cmd == couchbaseConstants.CMD_GET: stdout.write(f'get {key}\r\n') else: return f'error: StdOutSink - unknown cmd: {cmd!s}', None except IOError: return "error: could not write to stdout", None stdout.flush() future = SinkBatchFuture(self, batch) self.future_done(future, 0) return 0, future
def search(primary_keys_map,to_be_compressed_input,collection_name,tofind,MAX_RESULTS=1000): INDEX_DIR_DEFAULT="IndexFiles.index" if collection_name!="DEFAULT": INDEX_DIR=collection_name else: INDEX_DIR=INDEX_DIR_DEFAULT try: print "********" + tofind tofind_keyvalue_pairs=json.loads(tofind) except: return 100 direc=SimpleFSDirectory(File(INDEX_DIR)) analyzer=StandardAnalyzer(Version.LUCENE_CURRENT) try: ireader=IndexReader.open(direc) searcher=IndexSearcher(ireader) except: return 105 #initializing return list return_list=[] #check_list=[] tofind_primary_keyvalue_pairs={} tofind_nonprimary_keyvalue_pairs={} #separating out primary and non_primary keys for key in tofind_keyvalue_pairs.keys(): if key in primary_keys_map: tofind_primary_keyvalue_pairs[key]=tofind_keyvalue_pairs[key] else: tofind_nonprimary_keyvalue_pairs[key]=tofind_keyvalue_pairs[key] #filtering documents if len(tofind_primary_keyvalue_pairs)>0: query=BooleanQuery() for key in tofind_primary_keyvalue_pairs.keys(): temp=QueryParser(Version.LUCENE_CURRENT,key,analyzer).parse(tofind_primary_keyvalue_pairs[key]) query.add(BooleanClause(temp,BooleanClause.Occur.MUST)) hits=searcher.search(query,MAX_RESULTS).scoreDocs for hit in hits: doc=searcher.doc(hit.doc) if to_be_compressed_input==True: data=snappy.uncompress(doc.get("$DATA$")) else: data=doc.get("$DATA$") #non primary key filtering(without having to load all the primary key filtered values into main memory!) if len(tofind_nonprimary_keyvalue_pairs)>0: entry=json.loads(data) satisfied=True for key in tofind_nonprimary_keyvalue_pairs.keys(): if entry.get(key)!=tofind_nonprimary_keyvalue_pairs[key]: satisfied=False break if satisfied==True: return_list.append(data) else: return_list.append(data) else: for i in range(0,ireader.numDocs()): doc=searcher.doc(i) if to_be_compressed_input==True: data=snappy.uncompress(str(doc.get("$DATA$"))) else: data=doc.get("$DATA$") #non primary key filtering(without having to load all the primary key filtered values into main memory!) if len(tofind_nonprimary_keyvalue_pairs)>0: entry=json.loads(data) satisfied=True for key in tofind_nonprimary_keyvalue_pairs.keys(): if entry.get(key)!=tofind_nonprimary_keyvalue_pairs[key]: satisfied=False break if satisfied==True: return_list.append(data) else: return_list.append(data) ireader.close() if len(return_list)==0: return None else: return return_list
def update(primary_keys_map,to_be_compressed_input,collection_name,tofind,update,commit=False,add_field_if_not_exists=True): INDEX_DIR_DEFAULT="IndexFiles.index" #As of now the update will be implemented as search,modify data in json file,delete and re-write if collection_name!="DEFAULT": INDEX_DIR=collection_name else: INDEX_DIR=INDEX_DIR_DEFAULT try: tofind_keyvalue_pairs=json.loads(tofind) except: return 100 direc=SimpleFSDirectory(File(INDEX_DIR)) analyzer=StandardAnalyzer(Version.LUCENE_CURRENT) try: ireader=IndexReader.open(direc) searcher=IndexSearcher(ireader) #setting writer configurations config=IndexWriterConfig(Version.LUCENE_CURRENT,analyzer) config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND) writer=IndexWriter(direc,config) except: return 105 no_of_documents_modified=0 #finding the document to update #Scope for making this more efficient def rewrite(data_string): data=json.loads(data_string) toupdate=json.loads(update) #primary_key_modified=False #delete the appropriate document query=BooleanQuery() for key in primary_keys_map: temp=QueryParser(Version.LUCENE_CURRENT,key,analyzer).parse(data[key]) query.add(BooleanClause(temp,BooleanClause.Occur.MUST)) #modify the values for key,value in toupdate.items(): #if such a key is not present the we either add and update that key into data,or just ignore it!(By default it is set to True!) if add_field_if_not_exists==False: if key in data.keys(): data[key]=value else: data[key]=value #this deletion statement has been intenstionally added here #only if the modified data,has primary keys already not existing,will the updating process continue query_search=BooleanQuery() for key in primary_keys_map: temp=QueryParser(Version.LUCENE_CURRENT,key,analyzer).parse(data[key]) query_search.add(BooleanClause(temp,BooleanClause.Occur.MUST)) hits=searcher.search(query_search,MAX_RESULTS).scoreDocs if len(hits) > 0: return 106 writer.deleteDocuments(query) #add the newly modified document doc=Document() #index files wrt primary key for primary_key in primary_keys_map: try: field=Field(primary_key,data[primary_key],Field.Store.NO,Field.Index.ANALYZED) doc.add(field) except: # primary_keys_map.pop(collection_name) return 101 #compress data using snappy if compression is on if to_be_compressed_input==True: data_string=snappy.compress(str(json.dumps(data))) else: data_string=json.dumps(data) field=Field("$DATA$",data_string,Field.Store.YES,Field.Index.ANALYZED) doc.add(field) writer.addDocument(doc) tofind_primary_keyvalue_pairs={} tofind_nonprimary_keyvalue_pairs={} #separating out primary and non_primary keys for key in tofind_keyvalue_pairs.keys(): if key in primary_keys_map: tofind_primary_keyvalue_pairs[key]=tofind_keyvalue_pairs[key] else: tofind_nonprimary_keyvalue_pairs[key]=tofind_keyvalue_pairs[key] #filtering documents if len(tofind_primary_keyvalue_pairs)>0: query=BooleanQuery() for key in tofind_primary_keyvalue_pairs.keys(): temp=QueryParser(Version.LUCENE_CURRENT,key,analyzer).parse(tofind_primary_keyvalue_pairs[key]) query.add(BooleanClause(temp,BooleanClause.Occur.MUST)) hits=searcher.search(query,MAX_RESULTS).scoreDocs for hit in hits: doc=searcher.doc(hit.doc) if to_be_compressed_input==True: data=snappy.uncompress(doc.get("$DATA$")) else: data=doc.get("$DATA$") #non primary key filtering(without having to load all the primary key filtered values into main memory!) if len(tofind_nonprimary_keyvalue_pairs)>0: entry=json.loads(data) satisfied=True for key in tofind_nonprimary_keyvalue_pairs.keys(): if entry.get(key)!=tofind_nonprimary_keyvalue_pairs[key]: satisfied=False break if satisfied==True: if rewrite(data)!=106: no_of_documents_modified+=1 else: writer.rollback() return 106 else: if rewrite(data)!=106: no_of_documents_modified+=1 else: writer.rollback() return 106 else: for i in range(0,ireader.numDocs()): doc=searcher.doc(i) if to_be_compressed_input==True: data=snappy.uncompress(doc.get("$DATA$")) else: data=doc.get("$DATA$") #non primary key filtering(without having to load all the primary key filtered values into main memory!) if len(tofind_nonprimary_keyvalue_pairs)>0: entry=json.loads(data) satisfied=True for key in tofind_nonprimary_keyvalue_pairs.keys(): if entry.get(key)!=tofind_nonprimary_keyvalue_pairs[key]: satisfied=False break if satisfied==True: if rewrite(data)!=106: no_of_documents_modified+=1 else: writer.rollback() return 106 else: if rewrite(data)!=106: no_of_documents_modified+=1 else: writer.rollback() return 106 ireader.close() if commit==True: writer.commit() writer.close() return str(no_of_documents_modified)+" have been modified"
def test_unicode_compress(self): text = "hello unicode world!".decode('utf-8') compressed = snappy.compress(text) self.assertEqual(text, snappy.uncompress(compressed))
def test_valid_compressed_buffer(self): text = "hello world!".encode('utf-8') compressed = snappy.compress(text) uncompressed = snappy.uncompress(compressed) self.assertEqual(text == uncompressed, snappy.isValidCompressed(compressed))
def getImageFeatures(self, image , image_shape): image = np.fromstring(snappy.uncompress(image), dtype=np.float32) image.resize(image_shape) feature_dic = self.extractor.getImageFeatures(image) feature_dic = {layer:snappy.compress(features) for layer,features in feature_dic.items()} return feature_dic
def test_randombytes2_compress(self): _bytes = bytes(os.urandom(10000)) compressed = snappy.compress(_bytes) self.assertEqual(_bytes, snappy.uncompress(compressed))
def DXHTTPRequest(resource, data, method='POST', headers={}, auth=True, timeout=600, use_compression=None, jsonify_data=True, want_full_response=False, prepend_srv=True, session_handler=None, max_retries=DEFAULT_RETRIES, always_retry=False, **kwargs): ''' :param resource: API server route, e.g. "/record/new" :type resource: string :param data: Content of the request body :type data: list or dict, if *jsonify_data* is True; or string or file-like object, otherwise :param headers: Names and values of HTTP headers to submit with the request (in addition to those needed for authentication, compression, or other options specified with the call). :type headers: dict :param auth: Overrides the *auth* value to pass through to :meth:`requests.request`. By default a token is obtained from the ``DX_SECURITY_CONTEXT``. :type auth: tuple, object, True (default), or None :param timeout: HTTP request timeout, in seconds :type timeout: float :param config: *config* value to pass through to :meth:`requests.request` :type config: dict :param use_compression: "snappy" to use Snappy compression, or None :type use_compression: string or None :param jsonify_data: If True, *data* is converted from a Python list or dict to a JSON string :type jsonify_data: boolean :param want_full_response: If True, the full :class:`requests.Response` object is returned (otherwise, only the content of the response body is returned) :type want_full_response: boolean :param prepend_srv: If True, prepends the API server location to the URL :type prepend_srv: boolean :param max_retries: Maximum number of retries to perform for a request. A "failed" request is retried if any of the following is true: - A response is received from the server, and the content length received does not match the "Content-Length" header. - A response is received from the server, and the response has an HTTP status code in 5xx range. - A response is received from the server, the "Content-Length" header is not set, and the response JSON cannot be parsed. - No response is received from the server, and either *always_retry* is True or the request *method* is "GET". :type max_retries: int :param always_retry: If True, indicates that it is safe to retry a request on failure - Note: It is not guaranteed that the request will *always* be retried on failure; rather, this is an indication to the function that it would be safe to do so. :type always_retry: boolean :returns: Response from API server in the format indicated by *want_full_response*. Note: if *want_full_response* is set to False and the header "content-type" is found in the response with value "application/json", the body of the response will **always** be converted from JSON to a Python list or dict before it is returned. :raises: :exc:`DXAPIError` if the server returned a non-200 status code; :exc:`requests.exceptions.HTTPError` if an invalid response was received from the server; or :exc:`requests.exceptions.ConnectionError` if a connection cannot be established. Wrapper around :meth:`requests.request()` that makes an HTTP request, inserting authentication headers and (by default) converting *data* to JSON. .. note:: Bindings methods that make API calls make the underlying HTTP request(s) using :func:`DXHTTPRequest`, and most of them will pass any unrecognized keyword arguments you have supplied through to :func:`DXHTTPRequest`. ''' if session_handler is None: session_handler = SESSION_HANDLERS[os.getpid()] global _UPGRADE_NOTIFY url = APISERVER + resource if prepend_srv else resource method = method.upper() # Convert method name to uppercase, to ease string comparisons later if _DEBUG: from repr import Repr print >>sys.stderr, method, url, "=>", Repr().repr(data) if auth is True: auth = AUTH_HELPER # When *data* is bytes but *headers* contains Unicode strings, httplib tries to concatenate them and decode *data*, # which should not be done. Also, per HTTP/1.1 headers must be encoded with MIME, but we'll disregard that here, and # just encode them with the Python default (ascii) and fail for any non-ascii content. headers = {k.encode(): v.encode() for k, v in headers.iteritems()} # This will make the total number of retries MAX_RETRIES^2 for some errors. TODO: check how to better integrate with requests retry logic. # config.setdefault('max_retries', MAX_RETRIES) if jsonify_data: data = json.dumps(data) if 'Content-Type' not in headers and method == 'POST': headers['Content-Type'] = 'application/json' # If the input is a buffer, its data gets consumed by # requests.request (moving the read position). Record the initial # buffer position so that we can return to it if the request fails # and needs to be retried. rewind_input_buffer_offset = None if hasattr(data, 'seek') and hasattr(data, 'tell'): rewind_input_buffer_offset = data.tell() headers['DNAnexus-API'] = API_VERSION headers['User-Agent'] = USER_AGENT if use_compression == 'snappy': if not snappy_available: raise DXError("Snappy compression requested, but the snappy module is unavailable") headers['accept-encoding'] = 'snappy' if 'verify' not in kwargs and 'DX_CA_CERT' in os.environ: kwargs['verify'] = os.environ['DX_CA_CERT'] if os.environ['DX_CA_CERT'] == 'NOVERIFY': kwargs['verify'] = False response, last_error = None, None for retry in range(max_retries + 1): streaming_response_truncated = False try: response = session_handler.request(method, url, data=data, headers=headers, timeout=timeout, auth=auth, **kwargs) if _UPGRADE_NOTIFY and response.headers.get('x-upgrade-info', '').startswith('A recommended update is available') and not os.environ.has_key('_ARGCOMPLETE'): logger.info(response.headers['x-upgrade-info']) try: with file(_UPGRADE_NOTIFY, 'a'): os.utime(_UPGRADE_NOTIFY, None) except: pass _UPGRADE_NOTIFY = False if _DEBUG: print >>sys.stderr, method, url, "<=", response.status_code, Repr().repr(response.content) # If HTTP code that is not 200 (OK) is received and the content is # JSON, parse it and throw the appropriate error. Otherwise, # raise the usual exception. if response.status_code != requests.codes.ok: # response.headers key lookup is case-insensitive if response.headers.get('content-type', '').startswith('application/json'): content = json.loads(response.content) raise DXAPIError(content, response.status_code) response.raise_for_status() if want_full_response: return response else: if 'content-length' in response.headers: if int(response.headers['content-length']) != len(response.content): raise ContentLengthError("Received response with content-length header set to %s but content length is %d" % (response.headers['content-length'], len(response.content))) if use_compression and response.headers.get('content-encoding', '') == 'snappy': # TODO: check if snappy raises any exceptions on truncated response content decoded_content = snappy.uncompress(response.content) else: decoded_content = response.content if response.headers.get('content-type', '').startswith('application/json'): try: return json.loads(decoded_content) except ValueError: # If a streaming API call (no content-length # set) encounters an error it may just halt the # response because it has no other way to # indicate an error. Under these circumstances # the client sees unparseable JSON, and we # should be able to recover. streaming_response_truncated = 'content-length' not in response.headers raise HTTPError("Invalid JSON received from server") return decoded_content except (DXAPIError, ConnectionError, HTTPError, Timeout, httplib.HTTPException) as e: last_error = e # TODO: support HTTP/1.1 503 Retry-After # TODO: if the socket was dropped mid-request, ConnectionError or httplib.IncompleteRead is raised, # but non-idempotent requests can be unsafe to retry # Distinguish between connection initiation errors and dropped socket errors if retry < max_retries: if (response is None) or isinstance(e, ContentLengthError): ok_to_retry = always_retry or (method == 'GET') else: ok_to_retry = (response.status_code >= 500 and response.status_code < 600) or streaming_response_truncated if ok_to_retry: if rewind_input_buffer_offset is not None: data.seek(rewind_input_buffer_offset) delay = 2 ** (retry+1) logger.warn("%s %s: %s. Waiting %d seconds before retry %d of %d..." % (method, url, str(e), delay, retry+1, max_retries)) time.sleep(delay) continue break if last_error is None: last_error = DXError("Internal error in DXHTTPRequest") raise last_error
def uncompress_array(compressed: CompressedArray) -> np.ndarray: """Uncompresses a numpy array with snappy given its shape and dtype.""" compressed_array, shape, dtype = compressed byte_string = snappy.uncompress(compressed_array) return np.frombuffer(byte_string, dtype=dtype).reshape(shape)
# RNG import secrets def get_salt(length): # return random.randbytes(length) # return secrets.token_bytes(length) return os.urandom(length) # compression # pip install python-snappy import snappy if __name__ == '__main__': toc = b"jim" compressed = snappy.compress(toc) print('comp',len(toc),len(compressed),) print(snappy.uncompress(compressed)) toc = toc * 20 compressed = snappy.compress(toc) print('comp',len(toc),len(compressed), type(compressed)) # use compression only if gets smaller. def compress(p): checksize(p) c = snappy.compress(p) return (c if len(c) < len(p) else p) def checksize(ba): if len(ba) > 1024: raise Exception('input too large (we have to protect our servers)')
def parse(kind, compressed, message): if compressed: message = snappy.uncompress(message) return protobuf.parse(IMPL_BY_KIND[kind], message)
def test_moredata_compress(self): text = "snappy +" * 1000 + " " + "by " * 1000 + " google" text = text.encode('utf-8') compressed = snappy.compress(text) self.assertEqual(text, snappy.uncompress(compressed))
def run_call(self, image): client = msgpackrpc.Client(msgpackrpc.Address(self.hostname, 18800)) feature_dic = client.call('getImageFeatures', snappy.compress(image), image.shape) feature_dic = {layer:np.fromstring(snappy.uncompress(features), dtype=np.float32) for layer,features in feature_dic.items()} labels = client.call('getImageLabels') return feature_dic, labels
def decompress(self, data): """ Decompress data using a snappy decompressor. :return: data as a bytes object. """ return snappy.uncompress(data)
def test_randombytes_compress(self): _bytes = repr(os.urandom(1000)).encode('utf-8') compressed = snappy.compress(_bytes) self.assertEqual(_bytes, snappy.uncompress(compressed))