def test_one_item_host_limit(capsys):
    memory_limit = sizeof(asproxy(one_item_array(), serializers=("dask", "pickle")))
    dhf = ProxifyHostFile(
        device_memory_limit=one_item_nbytes, memory_limit=memory_limit
    )

    a1 = one_item_array() + 1
    a2 = one_item_array() + 2
    dhf["k1"] = a1
    dhf["k2"] = a2
    dhf.manager.validate()

    # Check k1 is spilled because of the newer k2
    k1 = dhf["k1"]
    k2 = dhf["k2"]
    assert k1._pxy_get().is_serialized()
    assert not k2._pxy_get().is_serialized()
    dhf.manager.validate()
    assert is_proxies_equal(dhf.manager._disk.get_proxies(), [])
    assert is_proxies_equal(dhf.manager._host.get_proxies(), [k1])
    assert is_proxies_equal(dhf.manager._dev.get_proxies(), [k2])

    # Check k1 is spilled to disk and k2 is spilled to host
    dhf["k3"] = one_item_array() + 3
    k3 = dhf["k3"]
    dhf.manager.validate()
    assert is_proxies_equal(dhf.manager._disk.get_proxies(), [k1])
    assert is_proxies_equal(dhf.manager._host.get_proxies(), [k2])
    assert is_proxies_equal(dhf.manager._dev.get_proxies(), [k3])
    dhf.manager.validate()

    # Accessing k2 spills k3 and unspill k2
    k2_val = k2[0]
    assert k2_val == 2
    dhf.manager.validate()
    assert is_proxies_equal(dhf.manager._disk.get_proxies(), [k1])
    assert is_proxies_equal(dhf.manager._host.get_proxies(), [k3])
    assert is_proxies_equal(dhf.manager._dev.get_proxies(), [k2])

    # Adding a new array spill k3 to disk and k2 to host
    dhf["k4"] = one_item_array() + 4
    k4 = dhf["k4"]
    dhf.manager.validate()
    assert is_proxies_equal(dhf.manager._disk.get_proxies(), [k1, k3])
    assert is_proxies_equal(dhf.manager._host.get_proxies(), [k2])
    assert is_proxies_equal(dhf.manager._dev.get_proxies(), [k4])

    # Accessing k1 unspills k1 directly to device and spills k4 to host
    k1_val = k1[0]
    assert k1_val == 1
    dhf.manager.validate()
    assert is_proxies_equal(dhf.manager._disk.get_proxies(), [k2, k3])
    assert is_proxies_equal(dhf.manager._host.get_proxies(), [k4])
    assert is_proxies_equal(dhf.manager._dev.get_proxies(), [k1])

    # Clean up
    del k1, k2, k3, k4
    dhf.clear()
    assert len(dhf.manager) == 0
Exemple #2
0
def test_serializing_array_to_disk(backend, serializers, size):
    """Check serializing arrays to disk"""

    np = pytest.importorskip(backend)
    obj = np.arange(size)

    # Serialize from host to disk
    pxy = proxy_object.asproxy(obj, serializers=serializers)
    ProxifyHostFile.serialize_proxy_to_disk_inplace(pxy)
    assert pxy._pxy_get().serializer == "disk"
    assert list(obj) == list(proxy_object.unproxy(pxy))
Exemple #3
0
def test_serializing_to_disk(obj):
    """Check serializing to disk"""

    if isinstance(obj, str):
        backend = pytest.importorskip(obj)
        obj = backend.arange(100)

    # Serialize from device to disk
    pxy = proxy_object.asproxy(obj)
    ProxifyHostFile.serialize_proxy_to_disk_inplace(pxy)
    assert pxy._pxy_get().serializer == "disk"
    assert obj == proxy_object.unproxy(pxy)

    # Serialize from host to disk
    pxy = proxy_object.asproxy(obj, serializers=("pickle", ))
    ProxifyHostFile.serialize_proxy_to_disk_inplace(pxy)
    assert pxy._pxy_get().serializer == "disk"
    assert obj == proxy_object.unproxy(pxy)
def test_externals():
    dhf = ProxifyHostFile(device_memory_limit=one_item_nbytes)
    dhf["k1"] = one_item_array()
    k1 = dhf["k1"]
    k2 = dhf.add_external(one_item_array())
    # `k2` isn't part of the store but still triggers spilling of `k1`
    assert len(dhf) == 1
    assert k1._obj_pxy_is_serialized()
    assert not k2._obj_pxy_is_serialized()
    k1[0]  # Trigger spilling of `k2`
    assert not k1._obj_pxy_is_serialized()
    assert k2._obj_pxy_is_serialized()
    k2[0]  # Trigger spilling of `k1`
    assert k1._obj_pxy_is_serialized()
    assert not k2._obj_pxy_is_serialized()
    assert dhf.proxies_tally.get_dev_mem_usage() == one_item_nbytes
    # Removing `k2` also removes it from the tally
    del k2
    assert dhf.proxies_tally.get_dev_mem_usage() == 0
    assert len(list(dhf.proxies_tally.get_unspilled_proxies())) == 0
def test_externals_setitem():
    dhf = ProxifyHostFile(device_memory_limit=one_item_nbytes)
    k1 = dhf.add_external(one_item_array())
    assert type(k1) is dask_cuda.proxy_object.ProxyObject
    assert len(dhf) == 0
    assert "external" in k1._obj_pxy
    assert "external_finalize" in k1._obj_pxy
    dhf["k1"] = k1
    k1 = dhf["k1"]
    assert type(k1) is dask_cuda.proxy_object.ProxyObject
    assert len(dhf) == 1
    assert "external" not in k1._obj_pxy
    assert "external_finalize" not in k1._obj_pxy

    k1 = dhf.add_external(one_item_array())
    k1._obj_pxy_serialize(serializers=("dask", "pickle"))
    dhf["k1"] = k1
    k1 = dhf["k1"]
    assert type(k1) is dask_cuda.proxy_object.ProxyObject
    assert len(dhf) == 1
    assert "external" not in k1._obj_pxy
    assert "external_finalize" not in k1._obj_pxy

    dhf["k1"] = one_item_array()
    assert len(dhf.proxies_tally.proxy_id_to_proxy) == 1
    assert dhf.proxies_tally.get_dev_mem_usage() == one_item_nbytes
    k1 = dhf.add_external(k1)
    assert len(dhf.proxies_tally.proxy_id_to_proxy) == 1
    assert dhf.proxies_tally.get_dev_mem_usage() == one_item_nbytes
    k1 = dhf.add_external(dhf["k1"])
    assert len(dhf.proxies_tally.proxy_id_to_proxy) == 1
    assert dhf.proxies_tally.get_dev_mem_usage() == one_item_nbytes
Exemple #6
0
def test_gds(gds_enabled, cuda_lib):
    lib = pytest.importorskip(cuda_lib)
    if cuda_lib == "cupy":
        data_create = lambda: lib.arange(10)
        data_compare = lambda x, y: all(x == y)
    elif cuda_lib == "cudf":
        data_create = lambda: lib.Series(range(10))
        data_compare = lambda x, y: all((x == y).values_host)
    elif cuda_lib == "numba.cuda":
        data_create = lambda: lib.to_device(range(10))
        data_compare = lambda x, y: all(x.copy_to_host() == y.copy_to_host())

    try:
        ProxifyHostFile.register_disk_spilling()
        if gds_enabled and not ProxifyHostFile._gds_enabled:
            pytest.skip("GDS not available")

        a = data_create()
        header, frames = serialize(a, serializers=("disk", ))
        b = deserialize(header, frames)
        assert type(a) == type(b)
        assert data_compare(a, b)
    finally:
        ProxifyHostFile.register_disk_spilling()  # Reset disk spilling options
def test_one_item_limit():
    dhf = ProxifyHostFile(device_memory_limit=one_item_nbytes)
    dhf["k1"] = one_item_array() + 42
    dhf["k2"] = one_item_array()

    # Check k1 is spilled because of the newer k2
    k1 = dhf["k1"]
    k2 = dhf["k2"]
    assert k1._obj_pxy_is_serialized()
    assert not k2._obj_pxy_is_serialized()

    # Accessing k1 spills k2 and unspill k1
    k1_val = k1[0]
    assert k1_val == 42
    assert k2._obj_pxy_is_serialized()

    # Duplicate arrays changes nothing
    dhf["k3"] = [k1, k2]
    assert not k1._obj_pxy_is_serialized()
    assert k2._obj_pxy_is_serialized()

    # Adding a new array spills k1 and k2
    dhf["k4"] = one_item_array()
    assert k1._obj_pxy_is_serialized()
    assert k2._obj_pxy_is_serialized()
    assert not dhf["k4"]._obj_pxy_is_serialized()

    # Accessing k2 spills k1 and k4
    k2[0]
    assert k1._obj_pxy_is_serialized()
    assert dhf["k4"]._obj_pxy_is_serialized()
    assert not k2._obj_pxy_is_serialized()

    # Deleting k2 does not change anything since k3 still holds a
    # reference to the underlying proxy object
    assert dhf.proxies_tally.get_dev_mem_usage() == one_item_nbytes
    p1 = list(dhf.proxies_tally.get_unspilled_proxies())
    assert len(p1) == 1
    del dhf["k2"]
    assert dhf.proxies_tally.get_dev_mem_usage() == one_item_nbytes
    p2 = list(dhf.proxies_tally.get_unspilled_proxies())
    assert len(p2) == 1
    assert p1[0] is p2[0]

    # Overwriting "k3" with a non-cuda object, should be noticed
    dhf["k3"] = "non-cuda-object"
    assert dhf.proxies_tally.get_dev_mem_usage() == 0
def test_spill_on_demand():
    """
    Test spilling on demand by disabling the device_memory_limit
    and allocating two large buffers that will otherwise fail because
    of spilling on demand.
    """
    rmm = pytest.importorskip("rmm")
    if not hasattr(rmm.mr, "FailureCallbackResourceAdaptor"):
        pytest.skip("RMM doesn't implement FailureCallbackResourceAdaptor")

    total_mem = get_device_total_memory()
    dhf = ProxifyHostFile(
        device_memory_limit=2 * total_mem,
        memory_limit=2 * total_mem,
        spill_on_demand=True,
    )
    for i in range(2):
        dhf[i] = rmm.DeviceBuffer(size=total_mem // 2 + 1)
def test_externals():
    """Test adding objects directly to the manager

    Add an object directly to the manager makes it count against the
    device_memory_limit but isn't part of the store.

    Normally, we use __setitem__ to store objects in the hostfile and make it
    count against the device_memory_limit with the inherent consequence that
    the objects are not freeable before subsequential calls to __delitem__.
    This is a problem for long running tasks that want objects to count against
    the device_memory_limit while freeing them ASAP without explicit calls to
    __delitem__.
    """
    dhf = ProxifyHostFile(device_memory_limit=one_item_nbytes, memory_limit=1000)
    dhf["k1"] = one_item_array()
    k1 = dhf["k1"]
    k2 = dhf.manager.proxify(one_item_array())
    # `k2` isn't part of the store but still triggers spilling of `k1`
    assert len(dhf) == 1
    assert k1._pxy_get().is_serialized()
    assert not k2._pxy_get().is_serialized()
    assert is_proxies_equal(dhf.manager._host.get_proxies(), [k1])
    assert is_proxies_equal(dhf.manager._dev.get_proxies(), [k2])
    assert dhf.manager._dev._mem_usage == one_item_nbytes

    k1[0]  # Trigger spilling of `k2`
    assert not k1._pxy_get().is_serialized()
    assert k2._pxy_get().is_serialized()
    assert is_proxies_equal(dhf.manager._host.get_proxies(), [k2])
    assert is_proxies_equal(dhf.manager._dev.get_proxies(), [k1])
    assert dhf.manager._dev._mem_usage == one_item_nbytes

    k2[0]  # Trigger spilling of `k1`
    assert k1._pxy_get().is_serialized()
    assert not k2._pxy_get().is_serialized()
    assert is_proxies_equal(dhf.manager._host.get_proxies(), [k1])
    assert is_proxies_equal(dhf.manager._dev.get_proxies(), [k2])
    assert dhf.manager._dev._mem_usage == one_item_nbytes

    # Removing `k2` also removes it from the tally
    del k2
    assert is_proxies_equal(dhf.manager._host.get_proxies(), [k1])
    assert is_proxies_equal(dhf.manager._dev.get_proxies(), [])
    assert dhf.manager._dev._mem_usage == 0
Exemple #10
0
def test_one_item_limit():
    dhf = ProxifyHostFile(device_memory_limit=itemsize)
    dhf["k1"] = cupy.arange(1) + 1
    dhf["k2"] = cupy.arange(1) + 2

    # Check k1 is spilled because of the newer k2
    k1 = dhf["k1"]
    assert k1._obj_pxy_is_serialized()
    assert not dhf["k2"]._obj_pxy_is_serialized()

    # Accessing k1 spills k2 and unspill k1
    k1_val = k1[0]
    assert k1_val == 1
    k2 = dhf["k2"]
    assert k2._obj_pxy_is_serialized()

    # Duplicate arrays changes nothing
    dhf["k3"] = [k1, k2]
    assert not k1._obj_pxy_is_serialized()
    assert k2._obj_pxy_is_serialized()

    # Adding a new array spills k1 and k2
    dhf["k4"] = cupy.arange(1) + 4
    assert k1._obj_pxy_is_serialized()
    assert k2._obj_pxy_is_serialized()
    assert not dhf["k4"]._obj_pxy_is_serialized()

    # Accessing k2 spills k1 and k4
    k2[0]
    assert k1._obj_pxy_is_serialized()
    assert dhf["k4"]._obj_pxy_is_serialized()
    assert not k2._obj_pxy_is_serialized()

    # Deleting k2 does not change anything since k3 still holds a
    # reference to the underlying proxy object
    assert dhf.proxies_tally.get_dev_mem_usage() == 8
    p1 = list(dhf.proxies_tally.get_unspilled_proxies())
    assert len(p1) == 1
    del dhf["k2"]
    assert dhf.proxies_tally.get_dev_mem_usage() == 8
    p2 = list(dhf.proxies_tally.get_unspilled_proxies())
    assert len(p2) == 1
    assert p1[0] is p2[0]
def test_dataframes_share_dev_mem():
    cudf = pytest.importorskip("cudf")

    df = cudf.DataFrame({"a": range(10)})
    grouped = shuffle_group(df, "a", 0, 2, 2, False, 2)
    view1 = grouped[0]
    view2 = grouped[1]
    # Even though the two dataframe doesn't point to the same cudf.Buffer object
    assert view1["a"].data is not view2["a"].data
    # They still share the same underlying device memory
    assert view1["a"].data._owner._owner is view2["a"].data._owner._owner

    dhf = ProxifyHostFile(device_memory_limit=160, memory_limit=1000)
    dhf["v1"] = view1
    dhf["v2"] = view2
    v1 = dhf["v1"]
    v2 = dhf["v2"]
    # The device_memory_limit is not exceeded since both dataframes share device memory
    assert not v1._pxy_get().is_serialized()
    assert not v2._pxy_get().is_serialized()
    # Now the device_memory_limit is exceeded, which should evict both dataframes
    dhf["k1"] = one_item_array()
    assert v1._pxy_get().is_serialized()
    assert v2._pxy_get().is_serialized()
def test_one_dev_item_limit():
    dhf = ProxifyHostFile(device_memory_limit=one_item_nbytes, memory_limit=1000)

    a1 = one_item_array() + 42
    a2 = one_item_array()
    dhf["k1"] = a1
    dhf["k2"] = a2
    dhf.manager.validate()

    # Check k1 is spilled because of the newer k2
    k1 = dhf["k1"]
    k2 = dhf["k2"]
    assert k1._pxy_get().is_serialized()
    assert not k2._pxy_get().is_serialized()
    dhf.manager.validate()
    assert is_proxies_equal(dhf.manager._host.get_proxies(), [k1])
    assert is_proxies_equal(dhf.manager._dev.get_proxies(), [k2])

    # Accessing k1 spills k2 and unspill k1
    k1_val = k1[0]
    assert k1_val == 42
    assert k2._pxy_get().is_serialized()
    dhf.manager.validate()
    assert is_proxies_equal(dhf.manager._host.get_proxies(), [k2])
    assert is_proxies_equal(dhf.manager._dev.get_proxies(), [k1])

    # Duplicate arrays changes nothing
    dhf["k3"] = [k1, k2]
    assert not k1._pxy_get().is_serialized()
    assert k2._pxy_get().is_serialized()
    dhf.manager.validate()
    assert is_proxies_equal(dhf.manager._host.get_proxies(), [k2])
    assert is_proxies_equal(dhf.manager._dev.get_proxies(), [k1])

    # Adding a new array spills k1 and k2
    dhf["k4"] = one_item_array()
    k4 = dhf["k4"]
    assert k1._pxy_get().is_serialized()
    assert k2._pxy_get().is_serialized()
    assert not dhf["k4"]._pxy_get().is_serialized()
    dhf.manager.validate()
    assert is_proxies_equal(dhf.manager._host.get_proxies(), [k1, k2])
    assert is_proxies_equal(dhf.manager._dev.get_proxies(), [k4])

    # Accessing k2 spills k1 and k4
    k2[0]
    assert k1._pxy_get().is_serialized()
    assert dhf["k4"]._pxy_get().is_serialized()
    assert not k2._pxy_get().is_serialized()
    dhf.manager.validate()
    assert is_proxies_equal(dhf.manager._host.get_proxies(), [k1, k4])
    assert is_proxies_equal(dhf.manager._dev.get_proxies(), [k2])

    # Deleting k2 does not change anything since k3 still holds a
    # reference to the underlying proxy object
    assert dhf.manager._dev.mem_usage() == one_item_nbytes
    dhf.manager.validate()
    assert is_proxies_equal(dhf.manager._host.get_proxies(), [k1, k4])
    assert is_proxies_equal(dhf.manager._dev.get_proxies(), [k2])
    del dhf["k2"]
    dhf.manager.validate()
    assert is_proxies_equal(dhf.manager._host.get_proxies(), [k1, k4])
    assert is_proxies_equal(dhf.manager._dev.get_proxies(), [k2])

    # Overwriting k3 with a non-cuda object and deleting k2
    # should empty the device
    dhf["k3"] = "non-cuda-object"
    del k2
    dhf.manager.validate()
    assert is_proxies_equal(dhf.manager._host.get_proxies(), [k1, k4])
    assert is_proxies_equal(dhf.manager._dev.get_proxies(), [])

    # Adding the underlying proxied of k1 doesn't change anything.
    # The host file detects that k1_ary is already proxied by the
    # existing proxy object k1.
    k1_ary = unproxy(k1)
    dhf["k5"] = k1_ary
    dhf.manager.validate()
    assert is_proxies_equal(dhf.manager._host.get_proxies(), [k4])
    assert is_proxies_equal(dhf.manager._dev.get_proxies(), [k1])

    # Clean up
    del k1, k4
    dhf.clear()
    assert len(dhf.manager) == 0
Exemple #13
0
import dask
import dask.array
from dask.dataframe.core import has_parallel_type
from dask.sizeof import sizeof
from distributed import Client
from distributed.protocol.serialize import deserialize, serialize

import dask_cudf

import dask_cuda
from dask_cuda import proxy_object
from dask_cuda.proxify_device_objects import proxify_device_objects
from dask_cuda.proxify_host_file import ProxifyHostFile

ProxifyHostFile.register_disk_spilling(
)  # Make the "disk" serializer available


@pytest.mark.parametrize("serializers", [None, ("dask", "pickle"), ("disk", )])
def test_proxy_object(serializers):
    """Check "transparency" of the proxy object"""

    org = bytearray(range(10))
    pxy = proxy_object.asproxy(org, serializers=serializers)

    assert len(org) == len(pxy)
    assert org[0] == pxy[0]
    assert 1 in pxy
    assert 10 not in pxy
    assert str(org) == str(pxy)
    assert "dask_cuda.proxy_object.ProxyObject at " in repr(pxy)