def test_on_demand_debug_info(): """Test worker logging when on-demand-spilling fails""" rmm = pytest.importorskip("rmm") if not hasattr(rmm.mr, "FailureCallbackResourceAdaptor"): pytest.skip("RMM doesn't implement FailureCallbackResourceAdaptor") total_mem = get_device_total_memory() def task(): rmm.DeviceBuffer(size=total_mem + 1) with dask_cuda.LocalCUDACluster(n_workers=1, jit_unspill=True) as cluster: with Client(cluster) as client: # Warmup, which trigger the initialization of spill on demand client.submit(range, 10).result() # Submit too large RMM buffer with pytest.raises( MemoryError, match=r".*std::bad_alloc:.*CUDA error at:.*" ): client.submit(task).result() log = str(client.get_worker_logs()) assert re.search( "WARNING - RMM allocation of .* failed, spill-on-demand", log ) assert re.search("<ProxyManager dev_limit=.* host_limit=.*>: Empty", log) assert "traceback:" in log
def test_get_device_total_memory(): for i in range(get_n_gpus()): with cuda.gpus[i]: assert ( get_device_total_memory(i) == cuda.current_context().get_memory_info()[1] )
def test_parse_device_memory_limit(): total = get_device_total_memory(0) assert parse_device_memory_limit(None) == total assert parse_device_memory_limit(0) == total assert parse_device_memory_limit("auto") == total assert parse_device_memory_limit(0.8) == int(total * 0.8) assert parse_device_memory_limit(1000000000) == 1000000000 assert parse_device_memory_limit("1GB") == 1000000000
def test_spill_on_demand(): """ Test spilling on demand by disabling the device_memory_limit and allocating two large buffers that will otherwise fail because of spilling on demand. """ rmm = pytest.importorskip("rmm") if not hasattr(rmm.mr, "FailureCallbackResourceAdaptor"): pytest.skip("RMM doesn't implement FailureCallbackResourceAdaptor") total_mem = get_device_total_memory() dhf = ProxifyHostFile( device_memory_limit=2 * total_mem, memory_limit=2 * total_mem, spill_on_demand=True, ) for i in range(2): dhf[i] = rmm.DeviceBuffer(size=total_mem // 2 + 1)
import pytest from zict.file import _safe_key as safe_key import dask from dask import array as da from distributed import Client, get_worker, wait from distributed.metrics import time from distributed.sizeof import sizeof from distributed.utils_test import gen_cluster, gen_test, loop # noqa: F401 from distributed.worker import Worker from dask_cuda import LocalCUDACluster, utils from dask_cuda.device_host_file import DeviceHostFile if utils.get_device_total_memory() < 1e10: pytest.skip("Not enough GPU memory", allow_module_level=True) def device_host_file_size_matches(dhf, total_bytes, device_chunk_overhead=0, serialized_chunk_overhead=1024): byte_sum = dhf.device_buffer.fast.total_weight # `dhf.host_buffer.fast` is only available when Worker's `memory_limit != 0` if hasattr(dhf.host_buffer, "fast"): byte_sum += dhf.host_buffer.fast.total_weight else: byte_sum += sum([sizeof(b) for b in dhf.host_buffer.values()])
def test_get_device_total_memory(): for i in range(get_n_gpus()): with cuda.gpus[i]: total_mem = get_device_total_memory(i) assert type(total_mem) is int assert total_mem > 0