Example #1
0
def dumps_big_byte_dict(d):
    """ Serialize large byte dictionary to sequence of frames

    The input must be a dictionary and all values of that dictionary must be
    bytestrings.  These should probably be large.

    Returns a sequence of frames, one header followed by each of the values

    See Also:
        loads_big_byte_dict
    """
    assert isinstance(d, dict) and all(isinstance(v, bytes) for v in d.values())
    keys, values = zip(*d.items())

    compress = compressions[default_compression]['compress']
    compression = []
    values2 = []
    for v in values:
        fmt, vv = maybe_compress(v)
        compression.append(fmt)
        values2.append(vv)

    header = {'encoding': 'big-byte-dict',
              'keys': keys,
              'compression': compression}

    return [msgpack.dumps(header, use_bin_type=True)] + values2
Example #2
0
def dumps_big_byte_dict(d):
    """ Serialize large byte dictionary to sequence of frames

    The input must be a dictionary and all values of that dictionary must be
    bytestrings.  These should probably be large.

    Returns a sequence of frames, one header followed by each of the values

    See Also:
        loads_big_byte_dict
    """
    assert isinstance(d, dict) and all(
        isinstance(v, bytes) for v in d.values())
    keys, values = zip(*d.items())

    compress = compressions[default_compression]['compress']
    compression = []
    values2 = []
    for v in values:
        fmt, vv = maybe_compress(v)
        compression.append(fmt)
        values2.append(vv)

    header = {
        'encoding': 'big-byte-dict',
        'keys': keys,
        'compression': compression
    }

    return [msgpack.dumps(header, use_bin_type=True)] + values2
Example #3
0
def dumps(msg):
    """ Transform Python value to bytestream suitable for communication """
    header = {}

    payload = msgpack.dumps(msg, use_bin_type=True)

    if len(payload) > 1e3 and default_compression:
        payload = compressions[default_compression]['compress'](payload)
        header['compression'] = default_compression

    if header:
        header_bytes = msgpack.dumps(header, use_bin_type=True)
    else:
        header_bytes = b''
    out = b''.join(
        [struct.pack('I', len(header_bytes)), header_bytes, payload])
    return out
Example #4
0
def dumps(msg):
    """ Transform Python message to bytestream suitable for communication """
    try:
        data = {}
        # Only lists and dicts can contain serialized values
        if isinstance(msg, (list, dict)):
            msg, data, bytestrings = extract_serialize(msg)
        small_header, small_payload = dumps_msgpack(msg)

        if not data:  # fast path without serialized data
            return small_header, small_payload

        pre = {
            key: (value.header, value.frames)
            for key, value in data.items() if type(value) is Serialized
        }

        data = {
            key: serialize(value.data)
            for key, value in data.items() if type(value) is Serialize
        }

        header = {'headers': {}, 'keys': [], 'bytestrings': list(bytestrings)}

        out_frames = []

        for key, (head, frames) in data.items():
            if 'lengths' not in head:
                head['lengths'] = list(map(len, frames))
            if 'compression' not in head:
                frames = frame_split_size(frames)
                if frames:
                    compression, frames = zip(*map(maybe_compress, frames))
                else:
                    compression = []
                head['compression'] = compression
            head['count'] = len(frames)
            header['headers'][key] = head
            header['keys'].append(key)
            out_frames.extend(frames)

        for key, (head, frames) in pre.items():
            if 'lengths' not in head:
                head['lengths'] = list(map(len, frames))
            head['count'] = len(frames)
            header['headers'][key] = head
            header['keys'].append(key)
            out_frames.extend(frames)

        out_frames = [bytes(f) for f in out_frames]

        return [
            small_header, small_payload,
            msgpack.dumps(header, use_bin_type=True)
        ] + out_frames
    except Exception as e:
        logger.critical("Failed to Serialize", exc_info=True)
        raise
Example #5
0
def dumps(msg):
    """ Transform Python value to bytestream suitable for communication """
    header = {}

    payload = msgpack.dumps(msg, use_bin_type=True)

    if len(payload) > 1e3 and default_compression:
        payload = compressions[default_compression]['compress'](payload)
        header['compression'] = default_compression

    if header:
        header_bytes = msgpack.dumps(header, use_bin_type=True)
    else:
        header_bytes = b''
    out = b''.join([struct.pack('I', len(header_bytes)),
                    header_bytes,
                    payload])
    return out
Example #6
0
def dumps_msgpack(msg):
    """ Dump msg into header and payload, both bytestrings

    All of the message must be msgpack encodable

    See Also:
        loads_msgpack
    """
    header = {}
    payload = msgpack.dumps(msg, use_bin_type=True)

    fmt, payload = maybe_compress(payload)
    if fmt:
        header['compression'] = fmt

    if header:
        header_bytes = msgpack.dumps(header, use_bin_type=True)
    else:
        header_bytes = b''

    return [header_bytes, payload]
Example #7
0
def dumps_msgpack(msg):
    """ Dump msg into header and payload, both bytestrings

    All of the message must be msgpack encodable

    See Also:
        loads_msgpack
    """
    header = {}
    payload = msgpack.dumps(msg, use_bin_type=True)

    fmt, payload = maybe_compress(payload)
    if fmt:
        header['compression'] = fmt

    if header:
        header_bytes = msgpack.dumps(header, use_bin_type=True)
    else:
        header_bytes = b''

    return [header_bytes, payload]
Example #8
0
def dumps(msg):
    """ Transform Python value to bytestream suitable for communication """
    big = {}
    # Only dicts can contain big values
    if isinstance(msg, dict):
        msg, big = extract_big_bytes(msg)
    small_header, small_payload = dumps_msgpack(msg)
    if not big:
        return small_header, small_payload

    # Shard the big segments
    shards = []
    res = {}
    for k, v in list(big.items()):
        L = []
        for i, j in enumerate(range(0, len(v), BIG_BYTES_SHARD_SIZE)):
            key = '.shard-%d-%s' % (i, k)
            res[key] = v[j:j + BIG_BYTES_SHARD_SIZE]
            L.append(key)
        shards.append((k, L))

    keys, values = zip(*res.items())

    compression = []
    values2 = []
    for v in values:
        fmt, vv = maybe_compress(v)
        compression.append(fmt)
        values2.append(vv)

    header = {
        'encoding': 'big-byte-dict',
        'keys': keys,
        'compression': compression,
        'shards': shards
    }

    return [
        small_header, small_payload,
        msgpack.dumps(header, use_bin_type=True)
    ] + values2
Example #9
0
def dumps_big_byte_dict(d):
    """ Serialize large byte dictionary to sequence of frames

    The input must be a dictionary and all values of that dictionary must be
    bytestrings.  These should probably be large.

    Returns a sequence of frames, one header followed by each of the values

    See Also:
        loads_big_byte_dict
    """
    assert isinstance(d, dict) and all(isinstance(v, bytes) for v in d.values())
    shards = {}
    for k, v in list(d.items()):
        if len(v) >= 2**31:
            L = []
            for i, j in enumerate(range(0, len(v), 2**30)):
                key = '.shard-%d-%s' % (i, k)
                d[key] = v[j: j + 2**30]
                L.append(key)
            del d[k]
            shards[k] = L

    keys, values = zip(*d.items())

    compress = compressions[default_compression]['compress']
    compression = []
    values2 = []
    for v in values:
        fmt, vv = maybe_compress(v)
        compression.append(fmt)
        values2.append(vv)

    header = {'encoding': 'big-byte-dict',
              'keys': keys,
              'compression': compression}
    if shards:
        header['shards'] = shards

    return [msgpack.dumps(header, use_bin_type=True)] + values2
Example #10
0
def dumps(msg):
    """ Transform Python value to bytestream suitable for communication """
    big = {}
    # Only lists and dicts can contain big values
    if isinstance(msg, (list, dict)):
        msg, big = extract_big_bytes(msg)
    small_header, small_payload = dumps_msgpack(msg)
    if not big:
        return small_header, small_payload

    # Shard the big segments
    shards = []
    res = {}
    for k, v in list(big.items()):
        L = []
        for i, j in enumerate(range(0, len(v), BIG_BYTES_SHARD_SIZE)):
            key = '.shard-%d-%s' % (i, k)
            res[key] = v[j: j + BIG_BYTES_SHARD_SIZE]
            L.append(key)
        shards.append((k, L))

    keys, values = zip(*res.items())

    compression = []
    values2 = []
    for v in values:
        fmt, vv = maybe_compress(v)
        compression.append(fmt)
        values2.append(vv)

    header = {'encoding': 'big-byte-dict',
              'keys': keys,
              'compression': compression,
              'shards': shards}

    return [small_header, small_payload,
            msgpack.dumps(header, use_bin_type=True)] + values2