Exemple #1
0
def test_dataframe_object_dtype():
    pd = pytest.importorskip('pandas')
    df = pd.DataFrame({'x': ['a'] * 1000})
    assert sizeof('a') * 1000 < sizeof(df) < 2 * sizeof('a') * 1000

    s = pd.Series(['a' * 1000] * 1000)
    assert sizeof(s) > 1000000
Exemple #2
0
def test_serires_object_dtype():
    pd = pytest.importorskip('pandas')
    s = pd.Series(['a'] * 1000)
    assert sizeof('a') * 1000 < sizeof(s) < 2 * sizeof('a') * 1000

    s = pd.Series(['a' * 1000] * 1000)
    assert sizeof(s) > 1000000
Exemple #3
0
def test_empty():
    pd = pytest.importorskip('pandas')
    df = pd.DataFrame({'x': [1, 2, 3], 'y': ['a' * 100, 'b' * 100, 'c' * 100]},
                      index=[10, 20, 30])
    empty = df.head(0)

    assert sizeof(empty) > 0
    assert sizeof(empty.x) > 0
    assert sizeof(empty.y) > 0
    assert sizeof(empty.index) > 0
Exemple #4
0
def safe_sizeof(obj, default_size=1e6):
    """ Safe variant of sizeof that captures and logs exceptions

    This returns a default size of 1e6 if the sizeof function fails
    """
    try:
        return sizeof(obj)
    except Exception:
        logger.warning('Sizeof calculation failed.  Defaulting to 1MB',
                       exc_info=True)
        return int(default_size)
Exemple #5
0
def test_sparse_matrix():
    sparse = pytest.importorskip('scipy.sparse')
    sp = sparse.eye(10)
    assert sizeof(sp.todia()) >= 152
    assert sizeof(sp.tobsr()) >= 232
    assert sizeof(sp.tocoo()) >= 252
    assert sizeof(sp.tocsc()) >= 232
    assert sizeof(sp.tocsr()) >= 260
    assert sizeof(sp.todok()) >= 260
    assert sizeof(sp.tolil()) >= 324
Exemple #6
0
def test_sparse_matrix():
    sparse = pytest.importorskip('scipy.sparse')
    sp = sparse.eye(10)
    # These are the 32-bit Python 2.7 values.
    assert sizeof(sp.todia()) >= 152
    assert sizeof(sp.tobsr()) >= 232
    assert sizeof(sp.tocoo()) >= 240
    assert sizeof(sp.tocsc()) >= 232
    assert sizeof(sp.tocsr()) >= 232
    assert sizeof(sp.todok()) >= 192
    assert sizeof(sp.tolil()) >= 204
def test_pandas_repeated_column():
    pd = pytest.importorskip('pandas')
    df = pd.DataFrame({'x': [1, 2, 3]})

    assert sizeof(df[['x', 'x', 'x']]) > sizeof(df)
Exemple #8
0
def test_pandas_repeated_column():
    pd = pytest.importorskip('pandas')
    df = pd.DataFrame({'x': [1, 2, 3]})

    assert sizeof(df[['x', 'x', 'x']]) > sizeof(df)
def test_spillbuffer(tmpdir):
    buf = SpillBuffer(str(tmpdir), target=300)
    # Convenience aliases
    assert buf.memory is buf.fast
    assert buf.disk is buf.slow

    assert not buf.slow.weight_by_key
    assert buf.slow.total_weight == (0, 0)
    assert buf.spilled_total == (0, 0)

    a, b, c, d = "a" * 100, "b" * 99, "c" * 98, "d" * 97

    # Test assumption made by this test, mostly for non CPython implementations
    assert 100 < sizeof(a) < 200
    assert psize(a)[0] != psize(a)[1]

    buf["a"] = a
    assert not buf.slow
    assert buf.fast.weights == {"a": sizeof(a)}
    assert buf.fast.total_weight == sizeof(a)
    assert buf.slow.weight_by_key == {}
    assert buf.slow.total_weight == (0, 0)
    assert buf["a"] == a

    buf["b"] = b
    assert not buf.slow
    assert not buf.slow.weight_by_key
    assert buf.slow.total_weight == (0, 0)

    buf["c"] = c
    assert set(buf.slow) == {"a"}
    assert buf.slow.weight_by_key == {"a": psize(a)}
    assert buf.slow.total_weight == psize(a)

    assert buf["a"] == a
    assert set(buf.slow) == {"b"}
    assert buf.slow.weight_by_key == {"b": psize(b)}
    assert buf.slow.total_weight == psize(b)

    buf["d"] = d
    assert set(buf.slow) == {"b", "c"}
    assert buf.slow.weight_by_key == {"b": psize(b), "c": psize(c)}
    assert buf.slow.total_weight == psize(b, c)

    # Deleting an in-memory key does not automatically move spilled keys back to memory
    del buf["a"]
    assert set(buf.slow) == {"b", "c"}
    assert buf.slow.weight_by_key == {"b": psize(b), "c": psize(c)}
    assert buf.slow.total_weight == psize(b, c)
    with pytest.raises(KeyError):
        buf["a"]

    # Deleting a spilled key updates the metadata
    del buf["b"]
    assert set(buf.slow) == {"c"}
    assert buf.slow.weight_by_key == {"c": psize(c)}
    assert buf.slow.total_weight == psize(c)
    with pytest.raises(KeyError):
        buf["b"]

    # Updating a spilled key moves it to the top of the LRU and to memory
    buf["c"] = c * 2
    assert set(buf.slow) == {"d"}
    assert buf.slow.weight_by_key == {"d": psize(d)}
    assert buf.slow.total_weight == psize(d)

    # Single key is larger than target and goes directly into slow
    e = "e" * 500

    buf["e"] = e
    assert set(buf.slow) == {"d", "e"}
    assert buf.slow.weight_by_key == {"d": psize(d), "e": psize(e)}
    assert buf.slow.total_weight == psize(d, e)

    # Updating a spilled key with another larger than target updates slow directly
    d = "d" * 500
    buf["d"] = d
    assert set(buf.slow) == {"d", "e"}
    assert buf.slow.weight_by_key == {"d": psize(d), "e": psize(e)}
    assert buf.slow.total_weight == psize(d, e)
Exemple #10
0
def test_bytes_like():
    assert 1000 <= sizeof(bytes(1000)) <= 2000
    assert 1000 <= sizeof(bytearray(1000)) <= 2000
    assert 1000 <= sizeof(memoryview(bytes(1000))) <= 2000
    assert 8000 <= sizeof(array("d", range(1000))) <= 9000
Exemple #11
0
def test_pyarrow_table():
    pd = pytest.importorskip("pandas")
    pa = pytest.importorskip("pyarrow")
    df = pd.DataFrame(
        {"x": [1, 2, 3], "y": ["a" * 100, "b" * 100, "c" * 100]}, index=[10, 20, 30]
    )
    table = pa.Table.from_pandas(df)

    assert sizeof(table) > sizeof(table.schema.metadata)
    assert isinstance(sizeof(table), int)
    assert isinstance(sizeof(table.columns[0]), int)
    assert isinstance(sizeof(table.columns[1]), int)
    assert isinstance(sizeof(table.columns[2]), int)

    empty = pa.Table.from_pandas(df.head(0))

    assert sizeof(empty) > sizeof(empty.schema.metadata)
    assert sizeof(empty.columns[0]) > 0
    assert sizeof(empty.columns[1]) > 0
    assert sizeof(empty.columns[2]) > 0
Exemple #12
0
def test_sizeof(dtype):
    c = cupy.random.random((2, 3, 4), dtype=dtype)

    assert sizeof(c) == c.nbytes
Exemple #13
0
def test_pandas():
    pd = pytest.importorskip("pandas")
    df = pd.DataFrame({
        "x": [1, 2, 3],
        "y": ["a" * 100, "b" * 100, "c" * 100]
    },
                      index=[10, 20, 30])

    assert sizeof(df) >= sizeof(df.x) + sizeof(df.y) - sizeof(df.index)
    assert sizeof(df.x) >= sizeof(df.index)
    assert sizeof(df.y) >= 100 * 3
    assert sizeof(df.index) >= 20

    assert isinstance(sizeof(df), int)
    assert isinstance(sizeof(df.x), int)
    assert isinstance(sizeof(df.index), int)
def test_safe_sizeof(obj):
    assert safe_sizeof(obj) == sizeof(obj)
def psize(*objs) -> tuple[int, int]:
    return (
        sum(sizeof(o) for o in objs),
        sum(len(frame) for obj in objs for frame in serialize_bytelist(obj)),
    )
def test_pandas():
    pd = pytest.importorskip('pandas')
    df = pd.DataFrame({
        'x': [1, 2, 3],
        'y': ['a' * 100, 'b' * 100, 'c' * 100]
    },
                      index=[10, 20, 30])

    assert sizeof(df) >= sizeof(df.x) + sizeof(df.y) - sizeof(df.index)
    assert sizeof(df.x) >= sizeof(df.index)
    if pd.__version__ >= '0.17.1':
        assert sizeof(df.y) >= 100 * 3
    assert sizeof(df.index) >= 20

    assert isinstance(sizeof(df), int)
    assert isinstance(sizeof(df.x), int)
    assert isinstance(sizeof(df.index), int)
def test_numpy():
    np = pytest.importorskip('numpy')
    assert 8000 <= sizeof(np.empty(1000, dtype='f8')) <= 9000
    dt = np.dtype('f8')
    assert sizeof(dt) == sys.getsizeof(dt)
Exemple #18
0
def test_numpy_0_strided():
    np = pytest.importorskip("numpy")
    x = np.broadcast_to(1, (100, 100, 100))
    assert sizeof(x) <= 8
 def __init__(self, obj: object):
     self.id = id(obj)
     self.nbytes = sizeof(obj)
def test_spillbuffer(tmpdir):
    buf = SpillBuffer(str(tmpdir), target=300)
    # Convenience aliases
    assert buf.memory is buf.fast
    assert buf.disk is buf.slow

    assert not buf.spilled_by_key
    assert buf.spilled_total == 0

    a, b, c, d = "a" * 100, "b" * 100, "c" * 100, "d" * 100
    s = sizeof(a)
    # Test assumption made by this test, mostly for non CPython implementations
    assert 100 < s < 200

    buf["a"] = a
    assert not buf.disk
    assert not buf.spilled_by_key
    assert buf.spilled_total == 0
    assert buf["a"] == a

    buf["b"] = b
    assert not buf.disk
    assert not buf.spilled_by_key
    assert buf.spilled_total == 0

    buf["c"] = c
    assert set(buf.disk) == {"a"}
    assert buf.spilled_by_key == {"a": s}
    assert buf.spilled_total == s

    assert buf["a"] == a
    assert set(buf.disk) == {"b"}
    assert buf.spilled_by_key == {"b": s}
    assert buf.spilled_total == s

    buf["d"] = d
    assert set(buf.disk) == {"b", "c"}
    assert buf.spilled_by_key == {"b": s, "c": s}
    assert buf.spilled_total == s * 2

    # Deleting an in-memory key does not automatically move spilled keys back to memory
    del buf["a"]
    assert set(buf.disk) == {"b", "c"}
    assert buf.spilled_by_key == {"b": s, "c": s}
    assert buf.spilled_total == s * 2
    with pytest.raises(KeyError):
        buf["a"]

    # Deleting a spilled key updates the metadata
    del buf["b"]
    assert set(buf.disk) == {"c"}
    assert buf.spilled_by_key == {"c": s}
    assert buf.spilled_total == s
    with pytest.raises(KeyError):
        buf["b"]

    # Updating a spilled key moves it to the top of the LRU and to memory
    buf["c"] = c * 2
    assert set(buf.disk) == {"d"}
    assert buf.spilled_by_key == {"d": s}
    assert buf.spilled_total == s

    # Single key is larger than target and goes directly into slow
    e = "e" * 500
    slarge = sizeof(e)
    buf["e"] = e
    assert set(buf.disk) == {"d", "e"}
    assert buf.spilled_by_key == {"d": s, "e": slarge}
    assert buf.spilled_total == s + slarge

    # Updating a spilled key with another larger than target updates slow directly
    buf["d"] = "d" * 500
    assert set(buf.disk) == {"d", "e"}
    assert buf.spilled_by_key == {"d": slarge, "e": slarge}
    assert buf.spilled_total == slarge * 2
Exemple #21
0
def test_dict():
    np = pytest.importorskip("numpy")
    x = np.ones(10000)
    assert sizeof({"x": x}) > x.nbytes
    assert sizeof({"x": [x]}) > x.nbytes
    assert sizeof({"x": [{"y": x}]}) > x.nbytes
Exemple #22
0
def test_numpy():
    np = pytest.importorskip("numpy")
    assert 8000 <= sizeof(np.empty(1000, dtype="f8")) <= 9000
    dt = np.dtype("f8")
    assert sizeof(dt) == sys.getsizeof(dt)
Exemple #23
0
def test_base():
    assert sizeof(1) == getsizeof(1)
Exemple #24
0
def test_pandas_repeated_column():
    pd = pytest.importorskip("pandas")
    df = pd.DataFrame({"x": [1, 2, 3]})

    assert sizeof(df[["x", "x", "x"]]) > sizeof(df)
Exemple #25
0
def test_containers():
    assert sizeof([1, 2, [3]]) > (getsizeof(3) * 3 + getsizeof([]))
Exemple #26
0
def test_base():
    assert sizeof(1) == sys.getsizeof(1)
Exemple #27
0
def test_numpy():
    np = pytest.importorskip('numpy')
    assert 8000 <= sizeof(np.empty(1000, dtype='f8')) <= 9000
    dt = np.dtype('f8')
    assert sizeof(dt) == sys.getsizeof(dt)
Exemple #28
0
def test_containers():
    assert sizeof([1, 2, [3]]) > (sys.getsizeof(3) * 3 + sys.getsizeof([]))
Exemple #29
0
def test_pandas():
    pd = pytest.importorskip('pandas')
    df = pd.DataFrame({'x': [1, 2, 3], 'y': ['a' * 100, 'b' * 100, 'c' * 100]},
                      index=[10, 20, 30])

    assert sizeof(df) >= sizeof(df.x) + sizeof(df.y) - sizeof(df.index)
    assert sizeof(df.x) >= sizeof(df.index)
    assert sizeof(df.y) >= 100 * 3
    assert sizeof(df.index) >= 20

    assert isinstance(sizeof(df), int)
    assert isinstance(sizeof(df.x), int)
    assert isinstance(sizeof(df.index), int)
Exemple #30
0
def test_sizeof(dtype):
    c = cupy.random.random((2, 3, 4), dtype=dtype)

    assert sizeof(c) == c.nbytes
def test_spillbuffer_maxlim(tmpdir):
    buf = SpillBuffer(str(tmpdir),
                      target=200,
                      max_spill=600,
                      min_log_interval=0)

    a, b, c, d, e = "a" * 200, "b" * 100, "c" * 99, "d" * 199, "e" * 98

    # size of a is bigger than target and is smaller than max_spill;
    # key should be in slow
    buf["a"] = a
    assert not buf.fast
    assert not buf.fast.weights
    assert set(buf.slow) == {"a"}
    assert buf.slow.weight_by_key == {"a": psize(a)}
    assert buf.slow.total_weight == psize(a)
    assert buf["a"] == a

    # size of b is smaller than target key should be in fast
    buf["b"] = b
    assert set(buf.fast) == {"b"}
    assert buf.fast.weights == {"b": sizeof(b)}
    assert buf["b"] == b
    assert buf.fast.total_weight == sizeof(b)

    # size of c is smaller than target but b+c > target, c should stay in fast and b
    # move to slow since the max_spill limit has not been reached yet

    buf["c"] = c
    assert set(buf.fast) == {"c"}
    assert buf.fast.weights == {"c": sizeof(c)}
    assert buf["c"] == c
    assert buf.fast.total_weight == sizeof(c)

    assert set(buf.slow) == {"a", "b"}
    assert buf.slow.weight_by_key == {"a": psize(a), "b": psize(b)}
    assert buf.slow.total_weight == psize(a, b)

    # size of e < target but e+c > target, this will trigger movement of c to slow
    # but the max spill limit prevents it. Resulting in e remaining in fast

    with captured_logger(logging.getLogger("distributed.spill")) as logs_e:
        buf["e"] = e

    assert "disk reached capacity" in logs_e.getvalue()

    assert set(buf.fast) == {"c", "e"}
    assert buf.fast.weights == {"c": sizeof(c), "e": sizeof(e)}
    assert buf["e"] == e
    assert buf.fast.total_weight == sizeof(c) + sizeof(e)

    assert set(buf.slow) == {"a", "b"}
    assert buf.slow.weight_by_key == {"a": psize(a), "b": psize(b)}
    assert buf.slow.total_weight == psize(a, b)

    # size of d > target, d should go to slow but slow reached the max_spill limit then
    # d will end up on fast with c (which can't be move to slow because it won't fit
    # either)
    with captured_logger(logging.getLogger("distributed.spill")) as logs_d:
        buf["d"] = d

    assert "disk reached capacity" in logs_d.getvalue()

    assert set(buf.fast) == {"c", "d", "e"}
    assert buf.fast.weights == {"c": sizeof(c), "d": sizeof(d), "e": sizeof(e)}
    assert buf["d"] == d
    assert buf.fast.total_weight == sizeof(c) + sizeof(d) + sizeof(e)

    assert set(buf.slow) == {"a", "b"}
    assert buf.slow.weight_by_key == {"a": psize(a), "b": psize(b)}
    assert buf.slow.total_weight == psize(a, b)

    # Overwrite a key that was in slow, but the size of the new key is larger than
    # max_spill

    a_large = "a" * 500
    assert psize(a_large)[1] > 600  # size of max_spill

    with captured_logger(
            logging.getLogger("distributed.spill")) as logs_alarge:
        buf["a"] = a_large

    assert "disk reached capacity" in logs_alarge.getvalue()

    assert set(buf.fast) == {"a", "d", "e"}
    assert set(buf.slow) == {"b", "c"}
    assert buf.fast.total_weight == sizeof(d) + sizeof(a_large) + sizeof(e)
    assert buf.slow.total_weight == psize(b, c)

    # Overwrite a key that was in fast, but the size of the new key is larger than
    # max_spill

    d_large = "d" * 501
    with captured_logger(
            logging.getLogger("distributed.spill")) as logs_dlarge:
        buf["d"] = d_large

    assert "disk reached capacity" in logs_dlarge.getvalue()

    assert set(buf.fast) == {"a", "d", "e"}
    assert set(buf.slow) == {"b", "c"}
    assert buf.fast.total_weight == sizeof(a_large) + sizeof(d_large) + sizeof(
        e)
    assert buf.slow.total_weight == psize(b, c)