def test_on_demand_debug_info():
    """Test worker logging when on-demand-spilling fails"""
    rmm = pytest.importorskip("rmm")
    if not hasattr(rmm.mr, "FailureCallbackResourceAdaptor"):
        pytest.skip("RMM doesn't implement FailureCallbackResourceAdaptor")

    total_mem = get_device_total_memory()

    def task():
        rmm.DeviceBuffer(size=total_mem + 1)

    with dask_cuda.LocalCUDACluster(n_workers=1, jit_unspill=True) as cluster:
        with Client(cluster) as client:
            # Warmup, which trigger the initialization of spill on demand
            client.submit(range, 10).result()

            # Submit too large RMM buffer
            with pytest.raises(
                MemoryError, match=r".*std::bad_alloc:.*CUDA error at:.*"
            ):
                client.submit(task).result()

            log = str(client.get_worker_logs())
            assert re.search(
                "WARNING - RMM allocation of .* failed, spill-on-demand", log
            )
            assert re.search("<ProxyManager dev_limit=.* host_limit=.*>: Empty", log)
            assert "traceback:" in log
Example #2
0
def test_get_device_total_memory():
    for i in range(get_n_gpus()):
        with cuda.gpus[i]:
            assert (
                get_device_total_memory(i)
                == cuda.current_context().get_memory_info()[1]
            )
Example #3
0
def test_parse_device_memory_limit():
    total = get_device_total_memory(0)

    assert parse_device_memory_limit(None) == total
    assert parse_device_memory_limit(0) == total
    assert parse_device_memory_limit("auto") == total

    assert parse_device_memory_limit(0.8) == int(total * 0.8)
    assert parse_device_memory_limit(1000000000) == 1000000000
    assert parse_device_memory_limit("1GB") == 1000000000
def test_spill_on_demand():
    """
    Test spilling on demand by disabling the device_memory_limit
    and allocating two large buffers that will otherwise fail because
    of spilling on demand.
    """
    rmm = pytest.importorskip("rmm")
    if not hasattr(rmm.mr, "FailureCallbackResourceAdaptor"):
        pytest.skip("RMM doesn't implement FailureCallbackResourceAdaptor")

    total_mem = get_device_total_memory()
    dhf = ProxifyHostFile(
        device_memory_limit=2 * total_mem,
        memory_limit=2 * total_mem,
        spill_on_demand=True,
    )
    for i in range(2):
        dhf[i] = rmm.DeviceBuffer(size=total_mem // 2 + 1)
Example #5
0
import pytest
from zict.file import _safe_key as safe_key

import dask
from dask import array as da
from distributed import Client, get_worker, wait
from distributed.metrics import time
from distributed.sizeof import sizeof
from distributed.utils_test import gen_cluster, gen_test, loop  # noqa: F401
from distributed.worker import Worker

from dask_cuda import LocalCUDACluster, utils
from dask_cuda.device_host_file import DeviceHostFile

if utils.get_device_total_memory() < 1e10:
    pytest.skip("Not enough GPU memory", allow_module_level=True)


def device_host_file_size_matches(dhf,
                                  total_bytes,
                                  device_chunk_overhead=0,
                                  serialized_chunk_overhead=1024):
    byte_sum = dhf.device_buffer.fast.total_weight

    # `dhf.host_buffer.fast` is only available when Worker's `memory_limit != 0`
    if hasattr(dhf.host_buffer, "fast"):
        byte_sum += dhf.host_buffer.fast.total_weight
    else:
        byte_sum += sum([sizeof(b) for b in dhf.host_buffer.values()])
Example #6
0
def test_get_device_total_memory():
    for i in range(get_n_gpus()):
        with cuda.gpus[i]:
            total_mem = get_device_total_memory(i)
            assert type(total_mem) is int
            assert total_mem > 0