Example #1
0
def test_large_bytes():
    msg = {'x': b'0' * 1000000, 'y': 1}
    frames = dumps(msg)
    assert loads(frames) == msg
    assert len(frames[0]) < 1000
    assert len(frames[1]) < 1000

    assert loads(frames, deserialize=False) == msg
Example #2
0
def test_large_bytes():
    msg = {'x': b'0' * 1000000, 'y': 1}
    frames = dumps(msg)
    assert loads(frames) == msg
    assert len(frames[0]) < 1000
    assert len(frames[1]) < 1000

    assert loads(frames, deserialize=False) == msg
Example #3
0
def test_large_bytes():
    for tp in (bytes, bytearray):
        msg = {'x': tp(b'0' * 1000000), 'y': 1}
        frames = dumps(msg)
        assert loads(frames) == msg
        assert len(frames[0]) < 1000
        assert len(frames[1]) < 1000

        assert loads(frames, deserialize=False) == msg
Example #4
0
def test_large_bytes():
    for tp in (bytes, bytearray):
        msg = {"x": tp(b"0" * 1000000), "y": 1}
        frames = dumps(msg)
        assert loads(frames) == msg
        assert len(frames[0]) < 1000
        assert len(frames[1]) < 1000

        assert loads(frames, deserialize=False) == msg
Example #5
0
def test_loads_deserialize_False():
    frames = dumps({'data': Serialize(123), 'status': 'OK'})
    msg = loads(frames)
    assert msg == {'data': 123, 'status': 'OK'}

    msg = loads(frames, deserialize=False)
    assert msg['status'] == 'OK'
    assert isinstance(msg['data'], Serialized)

    result = deserialize(msg['data'].header, msg['data'].frames)
    assert result == 123
Example #6
0
def test_loads_deserialize_False():
    frames = dumps({"data": Serialize(123), "status": "OK"})
    msg = loads(frames)
    assert msg == {"data": 123, "status": "OK"}

    msg = loads(frames, deserialize=False)
    assert msg["status"] == "OK"
    assert isinstance(msg["data"], Serialized)

    result = deserialize(msg["data"].header, msg["data"].frames)
    assert result == 123
Example #7
0
def test_loads_deserialize_False():
    frames = dumps({'data': Serialize(123), 'status': 'OK'})
    msg = loads(frames)
    assert msg == {'data': 123, 'status': 'OK'}

    msg = loads(frames, deserialize=False)
    assert msg['status'] == 'OK'
    assert isinstance(msg['data'], Serialized)

    result = deserialize(msg['data'].header, msg['data'].frames)
    assert result == 123
Example #8
0
def test_dumps_loads_Serialized():
    msg = {"x": 1, "data": Serialized(*serialize(123))}
    frames = dumps(msg)
    assert len(frames) > 2
    result = loads(frames)
    assert result == {"x": 1, "data": 123}

    result2 = loads(frames, deserialize=False)
    assert result2 == msg

    frames2 = dumps(result2)
    assert all(map(eq_frames, frames, frames2))

    result3 = loads(frames2)
    assert result == result3
Example #9
0
def test_loads_without_deserialization_avoids_compression():
    pytest.importorskip('lz4')
    b = b'0' * 100000

    msg = {'x': 1, 'data': to_serialize(b)}
    frames = dumps(msg)

    assert sum(map(nbytes, frames)) < 10000

    msg2 = loads(frames, deserialize=False)
    assert sum(map(nbytes, msg2['data'].frames)) < 10000

    msg3 = dumps(msg2)
    msg4 = loads(msg3)

    assert msg4 == {'x': 1, 'data': b'0' * 100000}
Example #10
0
def test_large_messages():
    np = pytest.importorskip('numpy')
    psutil = pytest.importorskip('psutil')
    pytest.importorskip('lz4')
    if psutil.virtual_memory().total < 8e9:
        return

    if sys.version_info.major == 2:
        return 2

    x = np.random.randint(0, 255, size=200000000, dtype='u1')

    msg = {
        'x': [Serialize(x), b'small_bytes'],
        'y': {
            'a': Serialize(x),
            'b': b'small_bytes'
        }
    }

    b = dumps(msg)
    msg2 = loads(b)
    assert msg['x'][1] == msg2['x'][1]
    assert msg['y']['b'] == msg2['y']['b']
    assert (msg['x'][0].data == msg2['x'][0]).all()
    assert (msg['y']['a'].data == msg2['y']['a']).all()
Example #11
0
def test_large_messages():
    psutil = pytest.importorskip('psutil')
    pytest.importorskip('lz4')
    if psutil.virtual_memory().total < 8e9:
        return

    def f(n):
        """
        Want to avoid compiling b'0' * 2**31 as a constant during
        setup.py install, so we turn this into a function and call it in
        the next line

        Otherwise this takes up 2 GB of memory during install
        """
        return b'0' * (2**n + 10)
    big_bytes = f(31)
    msg = {'x': [big_bytes, b'small_bytes'],
           'y': {'a': big_bytes, 'b': b'small_bytes'}}

    b = dumps(msg)
    msg2 = loads(b)
    assert msg == msg2

    assert len(b) >= 2
    big_header = msgpack.loads(b[2], encoding='utf8')
    assert len(big_header['shards']) == 2
    assert len(big_header['keys']) + 2 + 1 == len(b)
Example #12
0
def test_large_messages():
    np = pytest.importorskip("numpy")
    psutil = pytest.importorskip("psutil")
    pytest.importorskip("lz4")
    if psutil.virtual_memory().total < 8e9:
        return

    if sys.version_info.major == 2:
        return 2

    x = np.random.randint(0, 255, size=200000000, dtype="u1")

    msg = {
        "x": [Serialize(x), b"small_bytes"],
        "y": {
            "a": Serialize(x),
            "b": b"small_bytes"
        },
    }

    b = dumps(msg)
    msg2 = loads(b)
    assert msg["x"][1] == msg2["x"][1]
    assert msg["y"]["b"] == msg2["y"]["b"]
    assert (msg["x"][0].data == msg2["x"][0]).all()
    assert (msg["y"]["a"].data == msg2["y"]["a"]).all()
Example #13
0
def test_loads_without_deserialization_avoids_compression():
    pytest.importorskip("lz4")
    b = b"0" * 100000

    msg = {"x": 1, "data": to_serialize(b)}
    frames = dumps(msg)

    assert sum(map(nbytes, frames)) < 10000

    msg2 = loads(frames, deserialize=False)
    assert sum(map(nbytes, msg2["data"].frames)) < 10000

    msg3 = dumps(msg2)
    msg4 = loads(msg3)

    assert msg4 == {"x": 1, "data": b"0" * 100000}
Example #14
0
def test_loads_without_deserialization_avoids_compression():
    pytest.importorskip('lz4')
    b = b'0' * 100000

    msg = {'x': 1, 'data': to_serialize(b)}
    frames = dumps(msg)

    assert sum(map(len, frames)) < 10000

    msg2 = loads(frames, deserialize=False)
    assert sum(map(len, msg2['data'].frames)) < 10000

    msg3 = dumps(msg2)
    msg4 = loads(msg3)

    assert msg4 == {'x': 1, 'data': b'0' * 100000}
Example #15
0
def test_large_messages():
    psutil = pytest.importorskip('psutil')
    pytest.importorskip('lz4')
    if psutil.virtual_memory().total < 8e9:
        return

    def f(n):
        """
        Want to avoid compiling b'0' * 2**31 as a constant during
        setup.py install, so we turn this into a function and call it in
        the next line

        Otherwise this takes up 2 GB of memory during install
        """
        return b'0' * (2**n + 10)

    big_bytes = f(31)
    msg = {
        'x': [big_bytes, b'small_bytes'],
        'y': {
            'a': big_bytes,
            'b': b'small_bytes'
        }
    }

    b = dumps(msg)
    msg2 = loads(b)
    assert msg == msg2

    assert len(b) >= 2
    big_header = msgpack.loads(b[2], encoding='utf8')
    assert len(big_header['shards']) == 2
    assert len(big_header['keys']) + 2 + 1 == len(b)
Example #16
0
def test_compression_1():
    pytest.importorskip('lz4')
    np = pytest.importorskip('numpy')
    x = np.ones(1000000)
    frames = dumps({'x': Serialize(x.tobytes())})
    assert sum(map(len, frames)) < x.nbytes
    y = loads(frames)
    assert {'x': x.tobytes()} == y
Example #17
0
def test_compression():
    pytest.importorskip('lz4')
    np = pytest.importorskip('numpy')
    x = np.ones(1000000)
    b = dumps(x.tobytes())
    assert len(b) < x.nbytes
    y = loads(b)
    assert x.tobytes() == y
Example #18
0
def test_dumps_loads_Serialized():
    msg = {'x': 1,
           'data': Serialized(*serialize(123)),
           }
    frames = dumps(msg)
    assert len(frames) > 2
    result = loads(frames)
    assert result == {'x': 1, 'data': 123}

    result2 = loads(frames, deserialize=False)
    assert result2 == msg

    frames2 = dumps(result2)
    assert all(map(eq_frames, frames, frames2))

    result3 = loads(frames2)
    assert result == result3
Example #19
0
def test_compression_1():
    pytest.importorskip('lz4')
    np = pytest.importorskip('numpy')
    x = np.ones(1000000)
    frames = dumps({'x': Serialize(x.tobytes())})
    assert sum(map(nbytes, frames)) < x.nbytes
    y = loads(frames)
    assert {'x': x.tobytes()} == y
Example #20
0
def test_compression():
    pytest.importorskip('lz4')
    np = pytest.importorskip('numpy')
    x = np.ones(1000000)
    b = dumps(x.tobytes())
    assert len(b) < x.nbytes
    y = loads(b)
    assert x.tobytes() == y
Example #21
0
def test_dumps_loads_Serialize():
    msg = {"x": 1, "data": Serialize(123)}
    frames = dumps(msg)
    assert len(frames) > 2
    result = loads(frames)
    assert result == {"x": 1, "data": 123}

    result2 = loads(frames, deserialize=False)
    assert result2["x"] == 1
    assert isinstance(result2["data"], Serialized)
    assert any(a is b for a in result2["data"].frames for b in frames)

    frames2 = dumps(result2)
    assert all(map(eq_frames, frames, frames2))

    result3 = loads(frames2)
    assert result == result3
Example #22
0
def test_big_bytes_protocol():
    np = pytest.importorskip('numpy')
    data = np.random.randint(0, 255, dtype='u1', size=2000000).tobytes()
    d = {'x': data, 'y': b'1' * 2000000}
    L = dumps(d)
    assert d['x'] in L
    dd = loads(L)
    assert dd == d
Example #23
0
def test_big_bytes_protocol():
    np = pytest.importorskip('numpy')
    data = np.random.randint(0, 255, dtype='u1', size=2000000).tobytes()
    d = {'x': data, 'y': b'1' * 2000000}
    L = dumps(d)
    assert d['x'] in L
    dd = loads(L)
    assert dd == d
Example #24
0
def test_compression_1():
    pytest.importorskip('lz4')
    np = pytest.importorskip('numpy')
    x = np.ones(1000000)
    header, payload = dumps(x.tobytes())
    assert len(payload) < x.nbytes
    y = loads([header, payload])
    assert x.tobytes() == y
Example #25
0
def test_compression_1():
    pytest.importorskip("lz4")
    np = pytest.importorskip("numpy")
    x = np.ones(1000000)
    frames = dumps({"x": Serialize(x.tobytes())})
    assert sum(map(nbytes, frames)) < x.nbytes
    y = loads(frames)
    assert {"x": x.tobytes()} == y
Example #26
0
def test_large_bytes():
    for tp in (bytes, bytearray):
        msg = {"x": to_serialize(tp(b"0" * 1000000)), "y": 1}
        frames = dumps(msg)
        msg["x"] = msg["x"].data
        assert loads(frames) == msg
        assert len(frames[0]) < 1000
        assert len(frames[1]) < 1000
Example #27
0
def test_compression():
    pytest.importorskip('lz4')
    np = pytest.importorskip('numpy')
    x = np.ones(1000000)
    header, payload = dumps(x.tobytes())
    assert len(payload) < x.nbytes
    y = loads([header, payload])
    assert x.tobytes() == y
Example #28
0
def test_dumps_numpy_writable(writeable):
    a1 = np.arange(1000)
    a1.flags.writeable = writeable
    fs = dumps([to_serialize(a1)])
    # Make all frames read-only
    fs = list(map(ensure_bytes, fs))
    (a2, ) = loads(fs)
    assert (a1 == a2).all()
    assert a2.flags.writeable == a1.flags.writeable
def test_ToPickle():
    class Foo:
        def __init__(self, data):
            self.data = data

    msg = {"x": ToPickle(Foo(123))}
    frames = dumps(msg)
    out = loads(frames)
    assert out["x"].data == 123
Example #30
0
def test_dumps_pandas_writable():
    a1 = np.arange(1000)
    s1 = pd.Series(a1)
    fs = dumps([to_serialize(s1)])
    # Make all frames read-only
    fs = list(map(ensure_bytes, fs))
    (s2,) = loads(fs)
    assert (s1 == s2).all()
    s2[...] = 0
Example #31
0
def test_large_messages_map():
    if MEMORY_LIMIT < 8e9:
        pytest.skip("insufficient memory")

    x = {i: "mystring_%d" % i for i in range(100000)}

    b = dumps(x)
    x2 = loads(b)
    assert x == x2
Example #32
0
def test_large_messages_map():
    import psutil
    if psutil.virtual_memory().total < 8e9:
        pytest.skip("insufficient memory")

    x = {i: 'mystring_%d' % i for i in range(100000)}

    b = dumps(x)
    x2 = loads(b)
    assert x == x2
Example #33
0
def test_compression_without_deserialization():
    pytest.importorskip('lz4')
    np = pytest.importorskip('numpy')
    x = np.ones(1000000)

    frames = dumps({'x': Serialize(x)})
    assert all(len(frame) < 1000000 for frame in frames)

    msg = loads(frames, deserialize=False)
    assert all(len(frame) < 1000000 for frame in msg['x'].frames)
Example #34
0
def test_compression_without_deserialization():
    pytest.importorskip("lz4")
    np = pytest.importorskip("numpy")
    x = np.ones(1000000)

    frames = dumps({"x": Serialize(x)})
    assert all(len(frame) < 1000000 for frame in frames)

    msg = loads(frames, deserialize=False)
    assert all(len(frame) < 1000000 for frame in msg["x"].frames)
Example #35
0
def test_large_messages():
    psutil = pytest.importorskip('psutil')
    pytest.importorskip('lz4')
    if psutil.virtual_memory().total < 8e9:
        return
    msg = {'x': b'0' * (2**31 + 10)}

    b = dumps(msg)
    msg2 = loads(b)
    assert msg == msg2
Example #36
0
def test_large_messages_map():
    import psutil
    if psutil.virtual_memory().total < 8e9:
        pytest.skip("insufficient memory")

    x = {i: 'mystring_%d' % i for i in range(100000)}

    b = dumps(x)
    x2 = loads(b)
    assert x == x2
Example #37
0
def test_dumps_serialize_numpy_large():
    psutil = pytest.importorskip('psutil')
    if psutil.virtual_memory().total < 4e9:
        return
    x = np.random.randint(0, 255,
                          size=int(BIG_BYTES_SHARD_SIZE * 2)).astype('u1')
    frames = dumps([to_serialize(x)])
    [y] = loads(frames)

    np.testing.assert_equal(x, y)
Example #38
0
def test_dumps_loads_Serialize():
    msg = {'x': 1, 'data': Serialize(123)}
    frames = dumps(msg)
    assert len(frames) > 2
    result = loads(frames)
    assert result == {'x': 1, 'data': 123}

    result2 = loads(frames, deserialize=False)
    assert result2['x'] == 1
    assert isinstance(result2['data'], Serialized)
    assert any(a is b
               for a in result2['data'].frames
               for b in frames)

    frames2 = dumps(result2)
    assert all(map(eq_frames, frames, frames2))

    result3 = loads(frames2)
    assert result == result3
Example #39
0
 def _from_frames():
     try:
         return protocol.loads(
             frames, deserialize=deserialize, deserializers=deserializers
         )
     except EOFError:
         if size > 1000:
             datastr = "[too large to display]"
         else:
             datastr = frames
         # Aid diagnosing
         logger.error("truncated data stream (%d bytes): %s", size, datastr)
         raise
Example #40
0
def test_dumps_serialize_numpy_large():
    if MEMORY_LIMIT < 2e9:
        pytest.skip("insufficient memory")
    x = np.random.random(size=int(BIG_BYTES_SHARD_SIZE * 2 // 8)).view("u1")
    assert x.nbytes == BIG_BYTES_SHARD_SIZE * 2
    frames = dumps([to_serialize(x)])
    dtype, shape = x.dtype, x.shape
    checksum = crc32(x)
    del x
    [y] = loads(frames)

    assert (y.dtype, y.shape) == (dtype, shape)
    assert crc32(y) == checksum, "Arrays are unequal"
Example #41
0
def test_dumps_serialize_numpy_large():
    psutil = pytest.importorskip('psutil')
    if psutil.virtual_memory().total < 2e9:
        return
    x = np.random.random(size=int(BIG_BYTES_SHARD_SIZE * 2 // 8)).view('u1')
    assert x.nbytes == BIG_BYTES_SHARD_SIZE * 2
    frames = dumps([to_serialize(x)])
    dtype, shape = x.dtype, x.shape
    checksum = crc32(x)
    del x
    [y] = loads(frames)

    assert (y.dtype, y.shape) == (dtype, shape)
    assert crc32(y) == checksum, "Arrays are unequal"
Example #42
0
def test_dumps_serialize_numpy_large():
    psutil = pytest.importorskip('psutil')
    if psutil.virtual_memory().total < 2e9:
        return
    x = np.random.random(size=int(BIG_BYTES_SHARD_SIZE * 2 // 8)).view('u1')
    assert x.nbytes == BIG_BYTES_SHARD_SIZE * 2
    frames = dumps([to_serialize(x)])
    dtype, shape = x.dtype, x.shape
    checksum = crc32(x)
    del x
    [y] = loads(frames)

    assert (y.dtype, y.shape) == (dtype, shape)
    assert crc32(y) == checksum, "Arrays are unequal"
Example #43
0
def test_big_bytes_protocol():
    np = pytest.importorskip('numpy')
    data = np.random.randint(0, 255, dtype='u1', size=2**21).tobytes()

    d = {'x': data, 'y': 'foo'}
    frames = dumps(d)
    assert len(frames) == 4     # Only `data` is extracted
    assert data is frames[3]    # `data` isn't sharded as it's too short
    dd = loads(frames)
    assert dd == d

    d = {'x': [data], 'y': 'foo'}
    frames = dumps(d)
    assert len(frames) == 4
    assert data is frames[3]
    dd = loads(frames)
    assert dd == d

    d = {'x': {'z': [data, 'small_data']}, 'y': 'foo'}
    frames = dumps(d)
    assert len(frames) == 4
    assert data is frames[3]
    dd = loads(frames)
    assert dd == d
Example #44
0
def test_dumps_serialize_numpy_large():
    if MEMORY_LIMIT < 2e9:
        pytest.skip("insufficient memory")
    x = np.random.random(size=int(BIG_BYTES_SHARD_SIZE * 2 // 8)).view("u1")
    assert x.nbytes == BIG_BYTES_SHARD_SIZE * 2
    frames = dumps([to_serialize(x)])
    dtype, shape = x.dtype, x.shape
    checksum = crc32(x)
    [y] = loads(frames)

    assert (y.dtype, y.shape) == (dtype, shape)
    assert crc32(y) == checksum, "Arrays are unequal"

    x[:] = 2  # shared buffer; serialization is zero-copy
    assert (x == y).all(), "Data was copied"
Example #45
0
def test_big_bytes_protocol():
    np = pytest.importorskip('numpy')
    data = np.random.randint(0, 255, dtype='u1', size=2**21).tobytes()

    d = {'x': data, 'y': 'foo'}
    frames = dumps(d)
    assert len(frames) == 4     # Only `data` is extracted
    assert data is frames[3]    # `data` isn't sharded as it's too short
    dd = loads(frames)
    assert dd == d

    d = {'x': [data], 'y': 'foo'}
    frames = dumps(d)
    assert len(frames) == 4
    assert data is frames[3]
    dd = loads(frames)
    assert dd == d

    d = {'x': {'z': [data, 'small_data']}, 'y': 'foo'}
    frames = dumps(d)
    assert len(frames) == 4
    assert data is frames[3]
    dd = loads(frames)
    assert dd == d
Example #46
0
def test_serialize_deserialize_model():
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(5, input_dim=3))
    model.add(keras.layers.Dense(2))
    model.compile(optimizer='sgd', loss='mse')
    x = np.random.random((1, 3))
    y = np.random.random((1, 2))
    model.train_on_batch(x, y)

    loaded = deserialize(*serialize(model))
    assert_allclose(loaded.predict(x), model.predict(x))

    data = {'model': to_serialize(model)}
    frames = dumps(data)
    result = loads(frames)
    assert_allclose(result['model'].predict(x), model.predict(x))
Example #47
0
def test_serialize_deserialize_model():
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(5, input_dim=3))
    model.add(keras.layers.Dense(2))
    model.compile(optimizer='sgd', loss='mse')
    x = np.random.random((1, 3))
    y = np.random.random((1, 2))
    model.train_on_batch(x, y)

    loaded = deserialize(*serialize(model))
    assert_allclose(loaded.predict(x), model.predict(x))

    data = {'model': to_serialize(model)}
    frames = dumps(data)
    result = loads(frames)
    assert_allclose(result['model'].predict(x), model.predict(x))
Example #48
0
def test_large_messages():
    np = pytest.importorskip('numpy')
    psutil = pytest.importorskip('psutil')
    pytest.importorskip('lz4')
    if psutil.virtual_memory().total < 8e9:
        return

    if sys.version_info.major == 2:
        return 2

    x = np.random.randint(0, 255, size=200000000, dtype='u1')

    msg = {'x': [Serialize(x), b'small_bytes'],
           'y': {'a': Serialize(x), 'b': b'small_bytes'}}

    b = dumps(msg)
    msg2 = loads(b)
    assert msg['x'][1] == msg2['x'][1]
    assert msg['y']['b'] == msg2['y']['b']
    assert (msg['x'][0].data == msg2['x'][0]).all()
    assert (msg['y']['a'].data == msg2['y']['a']).all()
Example #49
0
def test_serialize_numpy_ma_masked_array(x):
    y, = loads(dumps([to_serialize(x)]))
    assert x.data.dtype == y.data.dtype
    np.testing.assert_equal(x.data, y.data)
    np.testing.assert_equal(x.mask, y.mask)
    np.testing.assert_equal(x.fill_value, y.fill_value)
Example #50
0
def test_serialize_numpy_ma_masked():
    y, = loads(dumps([to_serialize(np.ma.masked)]))
    assert y is np.ma.masked
Example #51
0
def test_small_and_big():
    d = {'x': [1, 2, 3], 'y': b'0' * 10000000}
    L = dumps(d)
    assert loads(L) == d
Example #52
0
def test_empty_loads():
    from distributed.protocol import loads, dumps
    e = Empty()
    e2 = loads(dumps([to_serialize(e)]))
    assert isinstance(e2[0], Empty)
Example #53
0
def test_protocol():
    for msg in [1, 'a', b'a', {'x': 1}, {b'x': 1}, {}]:
        assert loads(dumps(msg)) == msg