def start_cluster(ncores, loop, Worker=Worker, scheduler_kwargs={}, worker_kwargs={}): s = Scheduler(ip='127.0.0.1', loop=loop, validate=True, **scheduler_kwargs) done = s.start(0) workers = [ Worker(s.ip, s.port, ncores=ncore[1], ip=ncore[0], name=i, loop=loop, validate=True, **(merge(worker_kwargs, ncore[2]) if len(ncore) > 2 else worker_kwargs)) for i, ncore in enumerate(ncores) ] for w in workers: w.rpc = workers[0].rpc yield [w._start() for w in workers] start = time() while len(s.ncores) < len(ncores): yield gen.sleep(0.01) if time() - start > 5: raise Exception("Cluster creation timeout") raise gen.Return((s, workers))
async def test_security_dict_input(cleanup): conf = tls_config() ca_file = conf["distributed"]["comm"]["tls"]["ca-file"] client = conf["distributed"]["comm"]["tls"]["client"]["cert"] worker = conf["distributed"]["comm"]["tls"]["worker"]["cert"] scheduler = conf["distributed"]["comm"]["tls"]["scheduler"]["cert"] async with Scheduler(security={ "tls_ca_file": ca_file, "tls_scheduler_cert": scheduler }) as s: async with Worker(s.address, security={ "tls_ca_file": ca_file, "tls_worker_cert": worker }) as w: async with Client( s.address, security={ "tls_ca_file": ca_file, "tls_client_cert": client }, asynchronous=True, ) as c: result = await c.submit(inc, 1) assert result == 2
async def test_large_transfer_with_no_compression(): np = pytest.importorskip("numpy") with dask.config.set({"distributed.comm.compression": None}): async with Scheduler(protocol="ws://") as s: async with Worker(s.address, protocol="ws://"): async with Client(s.address, asynchronous=True) as c: await c.scatter(np.random.random(1_500_000))
async def start_scheduler(gateway, security, exit_on_failure=True): loop = IOLoop.current() plugin = GatewaySchedulerPlugin(gateway, loop) services = {("gateway", 0): (GatewaySchedulerService, {"plugin": plugin})} dashboard = False with ignoring(ImportError): from distributed.dashboard.scheduler import BokehScheduler services[("dashboard", 0)] = (BokehScheduler, {}) dashboard = True scheduler = Scheduler(loop=loop, services=services, security=security) scheduler.add_plugin(plugin) await scheduler host = urlparse(scheduler.address).hostname gateway_port = scheduler.services["gateway"].port api_address = "http://%s:%d" % (host, gateway_port) if dashboard: dashboard_port = scheduler.services["dashboard"].port dashboard_address = "http://%s:%d" % (host, dashboard_port) else: dashboard_address = "" try: await gateway.send_addresses(scheduler.address, dashboard_address, api_address) except Exception as exc: logger.error("Failed to send addresses to gateway", exc_info=exc) if exit_on_failure: sys.exit(1) return scheduler
def create_scheduler(loop, scheduler_file=None, host=None, bokeh=True, bokeh_port=8787, bokeh_prefix=None, scheduler_port=None): try: from distributed.bokeh.scheduler import BokehScheduler except ImportError: BokehScheduler = None if bokeh and BokehScheduler: services = { ('bokeh', bokeh_port): partial(BokehScheduler, prefix=bokeh_prefix) } else: services = {} scheduler = Scheduler(loop=loop, services=services, scheduler_file=scheduler_file) addr = uri_from_host_port(host, scheduler_port, 8786) scheduler.start(addr) return scheduler
def _start_scheduler(): logger.info("Starting scheduler...") loop = IOLoop.current() s = Scheduler(loop=loop) s.start("tcp://:6000") # Listen on TCP port 6000 logger.info("Scheduler started") return s
async def test_connection_made_with_extra_conn_args(cleanup, protocol, security): async with Scheduler(protocol=protocol, security=security) as s: connection_args = security.get_connection_args("worker") comm = await connect(s.address, **connection_args) assert comm.sock.request.headers.get("Authorization") == "Token abcd" await comm.close()
async def start_scheduler( gateway, security, adaptive_period=3, heartbeat_period=15, idle_timeout=0, scheduler_address="tls://:0", dashboard_address=":0", api_address=":0", exit_on_failure=True, ): loop = IOLoop.current() services = { ("gateway", api_address or 0): ( GatewaySchedulerService, { "gateway": gateway, "adaptive_period": adaptive_period, "heartbeat_period": heartbeat_period, "idle_timeout": idle_timeout, }, ) } scheduler = Scheduler( host=scheduler_address, loop=loop, services=services, security=security, dashboard_address=dashboard_address, ) return await scheduler
async def test_security_dict_input(): conf = tls_config() ca_file = conf["distributed"]["comm"]["tls"]["ca-file"] client = conf["distributed"]["comm"]["tls"]["client"]["cert"] worker = conf["distributed"]["comm"]["tls"]["worker"]["cert"] scheduler = conf["distributed"]["comm"]["tls"]["scheduler"]["cert"] async with Scheduler( host="localhost", dashboard_address=":0", security={ "tls_ca_file": ca_file, "tls_scheduler_cert": scheduler }, ) as s: assert s.address.startswith("tls://") async with Worker(s.address, security={ "tls_ca_file": ca_file, "tls_worker_cert": worker }) as w: assert w.address.startswith("tls://") async with Client( s.address, security={ "tls_ca_file": ca_file, "tls_client_cert": client }, asynchronous=True, ) as c: result = await c.submit(inc, 1) assert result == 2
def test_scheduler_file(): with tmpfile() as fn: s = yield Scheduler(scheduler_file=fn, port=8008) w = yield Nanny(scheduler_file=fn) assert set(s.workers) == {w.worker_address} yield w.close() s.stop()
def g(): s = Scheduler(ip='127.0.0.1') done = s.start() s.listen(0) a = Worker('127.0.0.1', s.port, ncores=2, ip='127.0.0.1') yield a._start() b = Worker('127.0.0.1', s.port, ncores=1, ip=b_ip) yield b._start() start = time() try: while len(s.ncores) < 2: yield gen.sleep(0.01) if time() - start > 5: raise Exception("Cluster creation timeout") yield f(s, a, b) finally: logger.debug("Closing out test cluster") for w in [a, b]: with ignoring(TimeoutError, StreamClosedError, OSError): yield w._close() if os.path.exists(w.local_dir): shutil.rmtree(w.local_dir) yield s.close()
def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) s.listen(0) yield s.sync_center() done = s.start() s.update_graph(dsk={'x': (div, 1, 0)}, keys=['x']) progress = TextProgressBar(['x'], scheduler=(s.ip, s.port), start=False, interval=0.01) yield progress.listen() assert progress.status == 'error' assert progress.stream.closed() progress = TextProgressBar(['x'], scheduler=(s.ip, s.port), start=False, interval=0.01) yield progress.listen() assert progress.status == 'error' assert progress.stream.closed() s.close() yield done
async def do_start_cluster(self, cluster): workdir = self.setup_working_directory(cluster) yield {"workdir": workdir} security = self.get_security(cluster) gateway_client = self.get_gateway_client(cluster) self.schedulers[cluster.name] = scheduler = Scheduler( protocol="tls", host="127.0.0.1", port=0, dashboard_address="127.0.0.1:0", security=security, services={ ("gateway", ":0"): ( GatewaySchedulerService, { "gateway": gateway_client, "heartbeat_period": self.cluster_heartbeat_period, "adaptive_period": cluster.config.adaptive_period, "idle_timeout": cluster.config.idle_timeout, }, ) }, ) await scheduler yield {"workdir": workdir, "started": True}
async def test_nanny_port_range(cleanup): async with Scheduler() as s: async with Client(s.address, asynchronous=True) as client: nanny_port = "9867:9868" worker_port = "9869:9870" async with Nanny(s.address, port=nanny_port, worker_port=worker_port) as n1: assert n1.port == 9867 # Selects first port in range async with Nanny(s.address, port=nanny_port, worker_port=worker_port) as n2: assert n2.port == 9868 # Selects next port in range with pytest.raises(ValueError, match="Could not start Nanny" ): # No more ports left async with Nanny(s.address, port=nanny_port, worker_port=worker_port): pass # Ensure Worker ports are in worker_port range def get_worker_port(dask_worker): return dask_worker.port worker_ports = await client.run(get_worker_port) assert list( worker_ports.values()) == parse_ports(worker_port)
def start_cluster(ncores, scheduler_addr, loop, security=None, Worker=Worker, scheduler_kwargs={}, worker_kwargs={}): s = Scheduler(loop=loop, validate=True, security=security, **scheduler_kwargs) done = s.start(scheduler_addr) workers = [ Worker(s.address, ncores=ncore[1], name=i, security=security, loop=loop, validate=True, **(merge(worker_kwargs, ncore[2]) if len(ncore) > 2 else worker_kwargs)) for i, ncore in enumerate(ncores) ] for w in workers: w.rpc = workers[0].rpc yield [w._start(ncore[0]) for ncore, w in zip(ncores, workers)] start = time() while len(s.ncores) < len(ncores): yield gen.sleep(0.01) if time() - start > 5: raise Exception("Cluster creation timeout") raise gen.Return((s, workers))
async def start_scheduler( gateway, security, adaptive_period=3, heartbeat_period=15, idle_timeout=0, exit_on_failure=True, ): loop = IOLoop.current() services = { ("gateway", 0): ( GatewaySchedulerService, { "gateway": gateway, "adaptive_period": adaptive_period, "heartbeat_period": heartbeat_period, "idle_timeout": idle_timeout, }, ) } with ignoring(ImportError): from distributed.dashboard.scheduler import BokehScheduler services[("dashboard", 0)] = (BokehScheduler, {}) scheduler = Scheduler(loop=loop, services=services, security=security) return await scheduler
async def test_integer_names(cleanup): async with Scheduler(port=0) as s: with popen(["dask-worker", s.address, "--name", "123"]) as worker: while not s.workers: await asyncio.sleep(0.01) [ws] = s.workers.values() assert ws.name == 123
def main(host, port, http_port, bokeh_port, bokeh_internal_port, show, _bokeh, bokeh_whitelist, prefix, use_xheaders, pid_file): if pid_file: with open(pid_file, 'w') as f: f.write(str(os.getpid())) def del_pid_file(): if os.path.exists(pid_file): os.remove(pid_file) atexit.register(del_pid_file) if sys.platform.startswith('linux'): import resource # module fails importing on Windows soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE) limit = max(soft, hard // 2) resource.setrlimit(resource.RLIMIT_NOFILE, (limit, hard)) addr = uri_from_host_port(host, port, 8786) loop = IOLoop.current() logger.info('-' * 47) services = {('http', http_port): HTTPScheduler} if _bokeh: with ignoring(ImportError): from distributed.bokeh.scheduler import BokehScheduler services[('bokeh', bokeh_internal_port)] = BokehScheduler scheduler = Scheduler(loop=loop, services=services) scheduler.start(addr) bokeh_proc = None if _bokeh: try: from distributed.bokeh.application import BokehWebInterface bokeh_proc = BokehWebInterface(http_port=http_port, tcp_port=scheduler.port, bokeh_port=bokeh_port, bokeh_whitelist=bokeh_whitelist, show=show, prefix=prefix, use_xheaders=use_xheaders, quiet=False) except ImportError: logger.info("Please install Bokeh to get Web UI") except Exception as e: logger.warn("Could not start Bokeh web UI", exc_info=True) logger.info('-' * 47) try: loop.start() loop.close() finally: scheduler.stop() if bokeh_proc: bokeh_proc.close() logger.info("End scheduler at %r", addr)
async def test_scheduler_startup_nanny(cleanup): async with Scheduler(port=0) as s: text = f""" import dask dask.config.set(scheduler_address="{s.address}") """ async with Nanny(preload_nanny=text) as w: assert w.scheduler.address == s.address
async def test_web_preload(cleanup, scheduler_preload): with captured_logger("distributed.preloading") as log: async with Scheduler( host="localhost", preload=["http://127.0.0.1:12345/preload"], ) as s: assert s.foo == 1 assert "12345/preload" in log.getvalue()
async def test_failure_during_worker_initialization(cleanup): with captured_logger(logger="distributed.nanny", level=logging.WARNING) as logs: async with Scheduler() as s: with pytest.raises(Exception): async with Nanny(s.address, foo="bar") as n: await n assert "Restarting worker" not in logs.getvalue()
def _start_scheduler(port=6000): logger = logging.getLogger(__name__) logger.info("Starting scheduler...") loop = IOLoop.current() s = Scheduler(loop=loop) s.start("tcp://:{}".format(port)) # Listen on port logger.info("Scheduler started on port {}".format(port)) return s
def test_services_with_port(): s = Scheduler(services={('http', 9999): HTTPScheduler}) s.start() try: assert isinstance(s.services['http'], HTTPServer) assert s.services['http'].port == 9999 finally: s.close()
def test_services(): s = Scheduler(services={'http': HTTPScheduler}) s.start() try: assert isinstance(s.services['http'], HTTPServer) assert s.services['http'].port > 0 finally: s.close()
def test_scheduler_file(): with tmpfile() as fn: s = Scheduler(scheduler_file=fn) s.start(8008) w = Nanny(scheduler_file=fn) yield w._start() assert s.workers == {w.worker_address} yield w._close() s.stop()
async def test_worker_preload_text(cleanup): text = """ def dask_setup(worker): worker.foo = 'setup' """ async with Scheduler(port=0, preload=text) as s: assert s.foo == "setup" async with Worker(s.address, preload=[text]) as w: assert w.foo == "setup"
async def test_worker_preload_text(): text = """ def dask_setup(worker): worker.foo = 'setup' """ async with Scheduler(dashboard_address=":0", preload=text) as s: assert s.foo == "setup" async with Worker(s.address, preload=[text]) as w: assert w.foo == "setup"
async def test_collections(cleanup): da = pytest.importorskip("dask.array") async with Scheduler(protocol="ws://") as s: async with Worker(s.address) as a: async with Worker(s.address) as b: async with Client(s.address, asynchronous=True) as c: x = da.random.random((1000, 1000), chunks=(100, 100)) x = x + x.T await x.persist()
async def test_roundtrip(cleanup): async with Scheduler(protocol="ws://") as s: async with Worker(s.address) as w: async with Client(s.address, asynchronous=True) as c: assert c.scheduler.address.startswith("ws://") assert w.address.startswith("ws://") future = c.submit(inc, 1) result = await future assert result == 2
async def test_nanny_closes_cleanly(cleanup): async with Scheduler() as s: n = await Nanny(s.address) assert n.process.pid proc = n.process.process await n.close() assert not n.process assert not proc.is_alive() assert proc.exitcode == 0