async def test_worker_start_exception(cleanup): # make sure this raises the right Exception: with pytest.raises(StartException): async with Nanny("tcp://localhost:1", worker_class=BrokenWorker) as n: await n.start()
def test_nanny_no_port(): _ = str(Nanny('127.0.0.1', 8786))
async def test_config(cleanup): async with Scheduler() as s: async with Nanny(s.address, config={"foo": "bar"}) as n: async with Client(s.address, asynchronous=True) as client: config = await client.run(dask.config.get, "foo") assert config[n.worker_address] == "bar"
def test_data_types(c, s): w = yield Nanny(s.address, data=dict) r = yield c.run(lambda dask_worker: type(dask_worker.data)) assert r[w.worker_address] == dict yield w.close()
def test_many_kills(s): n = yield Nanny(s.address, nthreads=2, loop=s.loop) assert n.is_alive() yield [n.kill() for i in range(5)] yield [n.kill() for i in range(5)] yield n.close()
async def test_worker_start_exception(s): # make sure this raises the right Exception: with pytest.raises(StartException): async with Nanny(s.address, worker_class=BrokenWorker) as n: pass
def test_nanny_death_timeout(s): yield s.close() w = yield Nanny(s.address, death_timeout=1) yield gen.sleep(3) assert w.status == "closed"
def test_nanny_death_timeout(): w = Nanny('127.0.0.1', 38848, death_timeout=1) yield w._start() yield gen.sleep(3) assert w.status == 'closed'
async def test_config(c, s): async with Nanny(s.address, config={"foo": "bar"}) as n: config = await c.run(dask.config.get, "foo") assert config[n.worker_address] == "bar"
async def create_and_destroy_worker(delay): start = time() while time() < start + 5: async with Nanny(s.address, nthreads=2): await asyncio.sleep(delay) print("Killed nanny")
async def test_web_preload_worker(cleanup, worker_preload): async with Scheduler(port=8786, host="localhost") as s: async with Nanny( preload_nanny=["http://127.0.0.1:12346/preload"]) as nanny: assert nanny.scheduler_addr == s.address
async def f(): async with Scheduler(protocol=protocol, interface='ib0', dashboard_address=':8789') as s: async with Nanny( s.address, protocol=protocol, nthreads=1, memory_limit='32GB', env={'CUDA_VISIBLE_DEVICES': '2'}, ) as w: async with Nanny(s.address, protocol=protocol, memory_limit='32gb', env={'CUDA_VISIBLE_DEVICES': '3'}, nthreads=1) as w2: async with Client(s.address, asynchronous=True) as c: with log_errors(pdb=True): # Create a simple random array n_rows = 50000000 n_keys = 5000000 # working!!! # n_rows = 5000000 # n_keys = 500000 # 1200000 B or 1.2 MB #n_rows = 5000000 #n_keys = 2500000 # n_rows = 5000 # n_keys = 2500 chunks = n_rows // 1000 left = dd.concat([ da.random.random( n_rows, chunks=chunks).to_dask_dataframe(columns='x'), da.random.randint( 0, n_keys, size=n_rows, chunks=chunks).to_dask_dataframe(columns='id'), ], axis=1).persist() right = dd.concat([ da.random.random( n_rows, chunks=chunks).to_dask_dataframe(columns='y'), da.random.randint( 0, n_keys, size=n_rows, chunks=chunks).to_dask_dataframe(columns='id'), ], axis=1).persist() print('Building CUDF DataFrames...') gright = right.map_partitions(cudf.from_pandas) gleft = left.map_partitions(cudf.from_pandas) print(gleft.npartitions) print(gright.npartitions) print('Repartition Left 10...') res = gleft.repartition(npartitions=10) res = await res.persist() out = await c.compute(res.head(compute=False)) print(out) print("FINISHED!")
def __init__( self, scheduler=None, host=None, nthreads=1, name=None, memory_limit="auto", device_memory_limit="auto", rmm_pool_size=None, rmm_managed_memory=False, pid_file=None, resources=None, dashboard=True, dashboard_address=":0", local_directory=None, scheduler_file=None, interface=None, death_timeout=None, preload=[], dashboard_prefix=None, security=None, enable_tcp_over_ucx=False, enable_infiniband=False, enable_nvlink=False, enable_rdmacm=False, net_devices=None, jit_unspill=None, **kwargs, ): # Required by RAPIDS libraries (e.g., cuDF) to ensure no context # initialization happens before we can set CUDA_VISIBLE_DEVICES os.environ["RAPIDS_NO_INITIALIZE"] = "True" enable_proctitle_on_current() enable_proctitle_on_children() try: nprocs = len(os.environ["CUDA_VISIBLE_DEVICES"].split(",")) except KeyError: nprocs = get_n_gpus() if nthreads < 1: raise ValueError("nthreads must be higher than 0.") memory_limit = parse_memory_limit(memory_limit, nthreads, total_cores=nprocs) if pid_file: with open(pid_file, "w") as f: f.write(str(os.getpid())) def del_pid_file(): if os.path.exists(pid_file): os.remove(pid_file) atexit.register(del_pid_file) if resources: resources = resources.replace(",", " ").split() resources = dict(pair.split("=") for pair in resources) resources = valmap(float, resources) else: resources = None loop = IOLoop.current() preload_argv = kwargs.get("preload_argv", []) kwargs = {"worker_port": None, "listen_address": None} if (not scheduler and not scheduler_file and dask.config.get("scheduler-address", None) is None): raise ValueError("Need to provide scheduler address like\n" "dask-worker SCHEDULER_ADDRESS:8786") if isinstance(scheduler, Cluster): scheduler = scheduler.scheduler_address if interface and host: raise ValueError("Can not specify both interface and host") if rmm_pool_size is not None or rmm_managed_memory: try: import rmm # noqa F401 except ImportError: raise ValueError( "RMM pool requested but module 'rmm' is not available. " "For installation instructions, please see " "https://github.com/rapidsai/rmm") # pragma: no cover if rmm_pool_size is not None: rmm_pool_size = parse_bytes(rmm_pool_size) else: if enable_nvlink: warnings.warn( "When using NVLink we recommend setting a " "`rmm_pool_size`. Please see: " "https://dask-cuda.readthedocs.io/en/latest/ucx.html" "#important-notes for more details") if enable_nvlink and rmm_managed_memory: raise ValueError( "RMM managed memory and NVLink are currently incompatible.") # Ensure this parent dask-cuda-worker process uses the same UCX # configuration as child worker processes created by it. initialize( create_cuda_context=False, enable_tcp_over_ucx=enable_tcp_over_ucx, enable_infiniband=enable_infiniband, enable_nvlink=enable_nvlink, enable_rdmacm=enable_rdmacm, net_devices=net_devices, cuda_device_index=0, ) if jit_unspill is None: self.jit_unspill = dask.config.get("jit-unspill", default=False) else: self.jit_unspill = jit_unspill if self.jit_unspill: data = lambda i: ( ProxifyHostFile, { "device_memory_limit": parse_device_memory_limit(device_memory_limit, device_index=i), }, ) else: data = lambda i: ( DeviceHostFile, { "device_memory_limit": parse_device_memory_limit(device_memory_limit, device_index=i), "memory_limit": memory_limit, "local_directory": local_directory, }, ) self.nannies = [ Nanny( scheduler, scheduler_file=scheduler_file, nthreads=nthreads, dashboard=dashboard, dashboard_address=dashboard_address, http_prefix=dashboard_prefix, loop=loop, resources=resources, memory_limit=memory_limit, interface=_get_interface(interface, host, i, net_devices), host=host, preload=(list(preload) or []) + ["dask_cuda.initialize"], preload_argv=(list(preload_argv) or []) + ["--create-cuda-context"], security=security, env={"CUDA_VISIBLE_DEVICES": cuda_visible_devices(i)}, plugins={ CPUAffinity(get_cpu_affinity(i)), RMMSetup(rmm_pool_size, rmm_managed_memory), }, name=name if nprocs == 1 or not name else name + "-" + str(i), local_directory=local_directory, config={ "ucx": get_ucx_config( enable_tcp_over_ucx=enable_tcp_over_ucx, enable_infiniband=enable_infiniband, enable_nvlink=enable_nvlink, enable_rdmacm=enable_rdmacm, net_devices=net_devices, cuda_device_index=i, ) }, data=data(i), **kwargs, ) for i in range(nprocs) ]
def test_nanny_no_port(): _ = str(Nanny("127.0.0.1", 8786))