def dumps_big_byte_dict(d): """ Serialize large byte dictionary to sequence of frames The input must be a dictionary and all values of that dictionary must be bytestrings. These should probably be large. Returns a sequence of frames, one header followed by each of the values See Also: loads_big_byte_dict """ assert isinstance(d, dict) and all(isinstance(v, bytes) for v in d.values()) keys, values = zip(*d.items()) compress = compressions[default_compression]['compress'] compression = [] values2 = [] for v in values: fmt, vv = maybe_compress(v) compression.append(fmt) values2.append(vv) header = {'encoding': 'big-byte-dict', 'keys': keys, 'compression': compression} return [msgpack.dumps(header, use_bin_type=True)] + values2
def dumps_big_byte_dict(d): """ Serialize large byte dictionary to sequence of frames The input must be a dictionary and all values of that dictionary must be bytestrings. These should probably be large. Returns a sequence of frames, one header followed by each of the values See Also: loads_big_byte_dict """ assert isinstance(d, dict) and all( isinstance(v, bytes) for v in d.values()) keys, values = zip(*d.items()) compress = compressions[default_compression]['compress'] compression = [] values2 = [] for v in values: fmt, vv = maybe_compress(v) compression.append(fmt) values2.append(vv) header = { 'encoding': 'big-byte-dict', 'keys': keys, 'compression': compression } return [msgpack.dumps(header, use_bin_type=True)] + values2
def dumps(msg): """ Transform Python value to bytestream suitable for communication """ header = {} payload = msgpack.dumps(msg, use_bin_type=True) if len(payload) > 1e3 and default_compression: payload = compressions[default_compression]['compress'](payload) header['compression'] = default_compression if header: header_bytes = msgpack.dumps(header, use_bin_type=True) else: header_bytes = b'' out = b''.join( [struct.pack('I', len(header_bytes)), header_bytes, payload]) return out
def dumps(msg): """ Transform Python message to bytestream suitable for communication """ try: data = {} # Only lists and dicts can contain serialized values if isinstance(msg, (list, dict)): msg, data, bytestrings = extract_serialize(msg) small_header, small_payload = dumps_msgpack(msg) if not data: # fast path without serialized data return small_header, small_payload pre = { key: (value.header, value.frames) for key, value in data.items() if type(value) is Serialized } data = { key: serialize(value.data) for key, value in data.items() if type(value) is Serialize } header = {'headers': {}, 'keys': [], 'bytestrings': list(bytestrings)} out_frames = [] for key, (head, frames) in data.items(): if 'lengths' not in head: head['lengths'] = list(map(len, frames)) if 'compression' not in head: frames = frame_split_size(frames) if frames: compression, frames = zip(*map(maybe_compress, frames)) else: compression = [] head['compression'] = compression head['count'] = len(frames) header['headers'][key] = head header['keys'].append(key) out_frames.extend(frames) for key, (head, frames) in pre.items(): if 'lengths' not in head: head['lengths'] = list(map(len, frames)) head['count'] = len(frames) header['headers'][key] = head header['keys'].append(key) out_frames.extend(frames) out_frames = [bytes(f) for f in out_frames] return [ small_header, small_payload, msgpack.dumps(header, use_bin_type=True) ] + out_frames except Exception as e: logger.critical("Failed to Serialize", exc_info=True) raise
def dumps(msg): """ Transform Python value to bytestream suitable for communication """ header = {} payload = msgpack.dumps(msg, use_bin_type=True) if len(payload) > 1e3 and default_compression: payload = compressions[default_compression]['compress'](payload) header['compression'] = default_compression if header: header_bytes = msgpack.dumps(header, use_bin_type=True) else: header_bytes = b'' out = b''.join([struct.pack('I', len(header_bytes)), header_bytes, payload]) return out
def dumps_msgpack(msg): """ Dump msg into header and payload, both bytestrings All of the message must be msgpack encodable See Also: loads_msgpack """ header = {} payload = msgpack.dumps(msg, use_bin_type=True) fmt, payload = maybe_compress(payload) if fmt: header['compression'] = fmt if header: header_bytes = msgpack.dumps(header, use_bin_type=True) else: header_bytes = b'' return [header_bytes, payload]
def dumps(msg): """ Transform Python value to bytestream suitable for communication """ big = {} # Only dicts can contain big values if isinstance(msg, dict): msg, big = extract_big_bytes(msg) small_header, small_payload = dumps_msgpack(msg) if not big: return small_header, small_payload # Shard the big segments shards = [] res = {} for k, v in list(big.items()): L = [] for i, j in enumerate(range(0, len(v), BIG_BYTES_SHARD_SIZE)): key = '.shard-%d-%s' % (i, k) res[key] = v[j:j + BIG_BYTES_SHARD_SIZE] L.append(key) shards.append((k, L)) keys, values = zip(*res.items()) compression = [] values2 = [] for v in values: fmt, vv = maybe_compress(v) compression.append(fmt) values2.append(vv) header = { 'encoding': 'big-byte-dict', 'keys': keys, 'compression': compression, 'shards': shards } return [ small_header, small_payload, msgpack.dumps(header, use_bin_type=True) ] + values2
def dumps_big_byte_dict(d): """ Serialize large byte dictionary to sequence of frames The input must be a dictionary and all values of that dictionary must be bytestrings. These should probably be large. Returns a sequence of frames, one header followed by each of the values See Also: loads_big_byte_dict """ assert isinstance(d, dict) and all(isinstance(v, bytes) for v in d.values()) shards = {} for k, v in list(d.items()): if len(v) >= 2**31: L = [] for i, j in enumerate(range(0, len(v), 2**30)): key = '.shard-%d-%s' % (i, k) d[key] = v[j: j + 2**30] L.append(key) del d[k] shards[k] = L keys, values = zip(*d.items()) compress = compressions[default_compression]['compress'] compression = [] values2 = [] for v in values: fmt, vv = maybe_compress(v) compression.append(fmt) values2.append(vv) header = {'encoding': 'big-byte-dict', 'keys': keys, 'compression': compression} if shards: header['shards'] = shards return [msgpack.dumps(header, use_bin_type=True)] + values2
def dumps(msg): """ Transform Python value to bytestream suitable for communication """ big = {} # Only lists and dicts can contain big values if isinstance(msg, (list, dict)): msg, big = extract_big_bytes(msg) small_header, small_payload = dumps_msgpack(msg) if not big: return small_header, small_payload # Shard the big segments shards = [] res = {} for k, v in list(big.items()): L = [] for i, j in enumerate(range(0, len(v), BIG_BYTES_SHARD_SIZE)): key = '.shard-%d-%s' % (i, k) res[key] = v[j: j + BIG_BYTES_SHARD_SIZE] L.append(key) shards.append((k, L)) keys, values = zip(*res.items()) compression = [] values2 = [] for v in values: fmt, vv = maybe_compress(v) compression.append(fmt) values2.append(vv) header = {'encoding': 'big-byte-dict', 'keys': keys, 'compression': compression, 'shards': shards} return [small_header, small_payload, msgpack.dumps(header, use_bin_type=True)] + values2