def test_default_process_thread_breakdown(): assert nprocesses_nthreads(1) == (1, 1) assert nprocesses_nthreads(4) == (4, 1) assert nprocesses_nthreads(5) == (5, 1) assert nprocesses_nthreads(8) == (4, 2) assert nprocesses_nthreads(12) in ((6, 2), (4, 3)) assert nprocesses_nthreads(20) == (5, 4) assert nprocesses_nthreads(24) in ((6, 4), (8, 3)) assert nprocesses_nthreads(32) == (8, 4) assert nprocesses_nthreads(40) in ((8, 5), (10, 4)) assert nprocesses_nthreads(80) in ((10, 8), (16, 5))
def test_nprocs_auto(loop): with popen(["dask-scheduler", "--no-dashboard"]) as sched: with popen(["dask-worker", "127.0.0.1:8786", "--nprocs=auto"]) as worker: with Client("tcp://127.0.0.1:8786", loop=loop) as c: procs, _ = nprocesses_nthreads() c.wait_for_workers(procs, timeout="10 seconds")
def main(scheduler, host, worker_port, listen_address, contact_address, nanny_port, nthreads, nprocs, nanny, name, pid_file, resources, dashboard, bokeh, bokeh_port, scheduler_file, dashboard_prefix, tls_ca_file, tls_cert, tls_key, dashboard_address, worker_class, preload_nanny, **kwargs): g0, g1, g2 = gc.get_threshold( ) # https://github.com/dask/distributed/issues/1653 gc.set_threshold(g0 * 3, g1 * 3, g2 * 3) enable_proctitle_on_current() enable_proctitle_on_children() if bokeh_port is not None: warnings.warn( "The --bokeh-port flag has been renamed to --dashboard-address. " "Consider adding ``--dashboard-address :%d`` " % bokeh_port) dashboard_address = bokeh_port if bokeh is not None: warnings.warn( "The --bokeh/--no-bokeh flag has been renamed to --dashboard/--no-dashboard. " ) dashboard = bokeh sec = { k: v for k, v in [ ("tls_ca_file", tls_ca_file), ("tls_worker_cert", tls_cert), ("tls_worker_key", tls_key), ] if v is not None } if nprocs == "auto": nprocs, nthreads = nprocesses_nthreads() else: nprocs = int(nprocs) if nprocs < 0: nprocs = CPU_COUNT + 1 + nprocs if nprocs <= 0: logger.error( "Failed to launch worker. Must specify --nprocs so that there's at least one process." ) sys.exit(1) if nprocs > 1 and not nanny: logger.error( "Failed to launch worker. You cannot use the --no-nanny argument when nprocs > 1." ) sys.exit(1) if contact_address and not listen_address: logger.error( "Failed to launch worker. " "Must specify --listen-address when --contact-address is given") sys.exit(1) if nprocs > 1 and listen_address: logger.error("Failed to launch worker. " "You cannot specify --listen-address when nprocs > 1.") sys.exit(1) if (worker_port or host) and listen_address: logger.error( "Failed to launch worker. " "You cannot specify --listen-address when --worker-port or --host is given." ) sys.exit(1) try: if listen_address: (host, worker_port) = get_address_host_port(listen_address, strict=True) if contact_address: # we only need this to verify it is getting parsed (_, _) = get_address_host_port(contact_address, strict=True) else: # if contact address is not present we use the listen_address for contact contact_address = listen_address except ValueError as e: logger.error("Failed to launch worker. " + str(e)) sys.exit(1) if nanny: port = nanny_port else: port = worker_port if not nthreads: nthreads = CPU_COUNT // nprocs if pid_file: with open(pid_file, "w") as f: f.write(str(os.getpid())) def del_pid_file(): if os.path.exists(pid_file): os.remove(pid_file) atexit.register(del_pid_file) if resources: resources = resources.replace(",", " ").split() resources = dict(pair.split("=") for pair in resources) resources = valmap(float, resources) else: resources = None loop = IOLoop.current() worker_class = import_term(worker_class) if nanny: kwargs["worker_class"] = worker_class kwargs["preload_nanny"] = preload_nanny if nanny: kwargs.update({ "worker_port": worker_port, "listen_address": listen_address }) t = Nanny else: if nanny_port: kwargs["service_ports"] = {"nanny": nanny_port} t = worker_class if (not scheduler and not scheduler_file and dask.config.get("scheduler-address", None) is None): raise ValueError("Need to provide scheduler address like\n" "dask-worker SCHEDULER_ADDRESS:8786") with suppress(TypeError, ValueError): name = int(name) nannies = [ t(scheduler, scheduler_file=scheduler_file, nthreads=nthreads, loop=loop, resources=resources, security=sec, contact_address=contact_address, host=host, port=port, dashboard=dashboard, dashboard_address=dashboard_address, name=name if nprocs == 1 or name is None or name == "" else str(name) + "-" + str(i), **kwargs) for i in range(nprocs) ] async def close_all(): # Unregister all workers from scheduler if nanny: await asyncio.gather(*[n.close(timeout=2) for n in nannies]) signal_fired = False def on_signal(signum): nonlocal signal_fired signal_fired = True if signum != signal.SIGINT: logger.info("Exiting on signal %d", signum) return asyncio.ensure_future(close_all()) async def run(): await asyncio.gather(*nannies) await asyncio.gather(*[n.finished() for n in nannies]) install_signal_handlers(loop, cleanup=on_signal) try: loop.run_sync(run) except TimeoutError: # We already log the exception in nanny / worker. Don't do it again. if not signal_fired: logger.info("Timed out starting worker") sys.exit(1) except KeyboardInterrupt: pass finally: logger.info("End worker")
def __init__( self, name=None, n_workers=None, threads_per_worker=None, processes=None, loop=None, start=None, host=None, ip=None, scheduler_port=0, silence_logs=logging.WARN, dashboard_address=":8787", worker_dashboard_address=None, diagnostics_port=None, services=None, worker_services=None, service_kwargs=None, asynchronous=False, security=None, protocol=None, blocked_handlers=None, interface=None, worker_class=None, scheduler_kwargs=None, scheduler_sync_interval=1, **worker_kwargs, ): if ip is not None: # In the future we should warn users about this move # warnings.warn("The ip keyword has been moved to host") host = ip if diagnostics_port is not None: warnings.warn("diagnostics_port has been deprecated. " "Please use `dashboard_address=` instead") dashboard_address = diagnostics_port if threads_per_worker == 0: warnings.warn( "Setting `threads_per_worker` to 0 has been deprecated. " "Please set to None or to a specific int.") threads_per_worker = None if "dashboard" in worker_kwargs: warnings.warn( "Setting `dashboard` is discouraged. " "Please set `dashboard_address` to affect the scheduler (more common) " "and `worker_dashboard_address` for the worker (less common).") if processes is None: processes = worker_class is None or issubclass(worker_class, Nanny) if worker_class is None: worker_class = Nanny if processes else Worker self.status = None self.processes = processes if security is None: # Falsey values load the default configuration security = Security() elif security is True: # True indicates self-signed temporary credentials should be used security = Security.temporary() elif not isinstance(security, Security): raise TypeError("security must be a Security object") if protocol is None: if host and "://" in host: protocol = host.split("://")[0] elif security and security.require_encryption: protocol = "tls://" elif not self.processes and not scheduler_port: protocol = "inproc://" else: protocol = "tcp://" if not protocol.endswith("://"): protocol = protocol + "://" if host is None and not protocol.startswith( "inproc") and not interface: host = "127.0.0.1" services = services or {} worker_services = worker_services or {} if n_workers is None and threads_per_worker is None: if processes: n_workers, threads_per_worker = nprocesses_nthreads() else: n_workers = 1 threads_per_worker = CPU_COUNT if n_workers is None and threads_per_worker is not None: n_workers = max(1, CPU_COUNT // threads_per_worker) if processes else 1 if n_workers and threads_per_worker is None: # Overcommit threads per worker, rather than undercommit threads_per_worker = max(1, int(math.ceil(CPU_COUNT / n_workers))) if n_workers and "memory_limit" not in worker_kwargs: worker_kwargs["memory_limit"] = parse_memory_limit( "auto", 1, n_workers) worker_kwargs.update({ "host": host, "nthreads": threads_per_worker, "services": worker_services, "dashboard_address": worker_dashboard_address, "dashboard": worker_dashboard_address is not None, "interface": interface, "protocol": protocol, "security": security, "silence_logs": silence_logs, }) scheduler = { "cls": Scheduler, "options": toolz.merge( dict( host=host, services=services, service_kwargs=service_kwargs, security=security, port=scheduler_port, interface=interface, protocol=protocol, dashboard=dashboard_address is not None, dashboard_address=dashboard_address, blocked_handlers=blocked_handlers, ), scheduler_kwargs or {}, ), } worker = {"cls": worker_class, "options": worker_kwargs} workers = {i: worker for i in range(n_workers)} super().__init__( name=name, scheduler=scheduler, workers=workers, worker=worker, loop=loop, asynchronous=asynchronous, silence_logs=silence_logs, security=security, scheduler_sync_interval=scheduler_sync_interval, )
async def test_nworkers_auto(c, s): with popen(["dask-worker", s.address, "--nworkers=auto"]): procs, _ = nprocesses_nthreads() await c.wait_for_workers(procs)