def start_diagnostics_server(self, port=8787, show=False, silence=logging.CRITICAL): """ Start Diagnostics Web Server This starts a web application to show diagnostics of what is happening on the cluster. This application runs in a separate process and is generally available at the following location: http://localhost:8787/status/ """ try: from distributed.bokeh.application import BokehWebInterface except ImportError: logger.info("To start diagnostics web server please install Bokeh") return from ..http.scheduler import HTTPScheduler assert self.diagnostics is None if 'http' not in self.scheduler.services: self.scheduler.services['http'] = HTTPScheduler( self.scheduler, io_loop=self.scheduler.loop) self.scheduler.services['http'].listen(0) self.diagnostics = BokehWebInterface( tcp_port=self.scheduler.port, http_port=self.scheduler.services['http'].port, bokeh_port=port, show=show, log_level=logging.getLevelName(silence).lower())
def main(center, host, port, http_port, bokeh_port, show, _bokeh, bokeh_whitelist): given_host = host host = host or get_ip() if ':' in host and port == 8786: host, port = host.rsplit(':', 1) port = int(port) ip = socket.gethostbyname(host) loop = IOLoop.current() scheduler = Scheduler(center, ip=ip, services={('http', http_port): HTTPScheduler}, loop=loop) scheduler.start(port) bokeh_proc = None if _bokeh: try: from distributed.bokeh.application import BokehWebInterface bokeh_proc = BokehWebInterface(host=host, http_port=http_port, tcp_port=port, bokeh_port=bokeh_port, bokeh_whitelist=bokeh_whitelist, show=show) except ImportError: logger.info("Please install Bokeh to get Web UI") except Exception as e: logger.warn("Could not start Bokeh web UI", exc_info=True) loop.start() loop.close() scheduler.stop() if bokeh_proc: bokeh_proc.close() logger.info("End scheduler at %s:%d", ip, port)
def main(host, port, http_port, bokeh_port, bokeh_internal_port, show, _bokeh, bokeh_whitelist, prefix, use_xheaders, pid_file): if pid_file: with open(pid_file, 'w') as f: f.write(str(os.getpid())) def del_pid_file(): if os.path.exists(pid_file): os.remove(pid_file) atexit.register(del_pid_file) if sys.platform.startswith('linux'): import resource # module fails importing on Windows soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE) limit = max(soft, hard // 2) resource.setrlimit(resource.RLIMIT_NOFILE, (limit, hard)) addr = uri_from_host_port(host, port, 8786) loop = IOLoop.current() logger.info('-' * 47) services = {('http', http_port): HTTPScheduler} if _bokeh: with ignoring(ImportError): from distributed.bokeh.scheduler import BokehScheduler services[('bokeh', bokeh_internal_port)] = BokehScheduler scheduler = Scheduler(loop=loop, services=services) scheduler.start(addr) bokeh_proc = None if _bokeh: try: from distributed.bokeh.application import BokehWebInterface bokeh_proc = BokehWebInterface(http_port=http_port, tcp_port=scheduler.port, bokeh_port=bokeh_port, bokeh_whitelist=bokeh_whitelist, show=show, prefix=prefix, use_xheaders=use_xheaders, quiet=False) except ImportError: logger.info("Please install Bokeh to get Web UI") except Exception as e: logger.warn("Could not start Bokeh web UI", exc_info=True) logger.info('-' * 47) try: loop.start() loop.close() finally: scheduler.stop() if bokeh_proc: bokeh_proc.close() logger.info("End scheduler at %r", addr)
def main(host, port, http_port, bokeh_port, show, _bokeh, bokeh_whitelist, prefix, use_xheaders, pid_file): if pid_file: with open(pid_file, 'w') as f: f.write(str(os.getpid())) def del_pid_file(): if os.path.exists(pid_file): os.remove(pid_file) atexit.register(del_pid_file) if sys.platform.startswith('linux'): import resource # module fails importing on Windows soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE) limit = max(soft, hard // 2) resource.setrlimit(resource.RLIMIT_NOFILE, (limit, hard)) given_host = host host = host or get_ip() if ':' in host and port == 8786: host, port = host.rsplit(':', 1) port = int(port) ip = socket.gethostbyname(host) loop = IOLoop.current() scheduler = Scheduler(ip=ip, loop=loop, services={('http', http_port): HTTPScheduler}) scheduler.start(port) bokeh_proc = None if _bokeh: try: from distributed.bokeh.application import BokehWebInterface bokeh_proc = BokehWebInterface(host=host, http_port=http_port, tcp_port=port, bokeh_port=bokeh_port, bokeh_whitelist=bokeh_whitelist, show=show, prefix=prefix, use_xheaders=use_xheaders, quiet=False) except ImportError: logger.info("Please install Bokeh to get Web UI") except Exception as e: logger.warn("Could not start Bokeh web UI", exc_info=True) loop.start() loop.close() scheduler.stop() if bokeh_proc: bokeh_proc.close() logger.info("End scheduler at %s:%d", ip, port)
def start_diagnostics_server(self, port=8787, show=False, silence=logging.CRITICAL): """ Start Diagnostics Web Server This starts a web application to show diagnostics of what is happening on the cluster. This application runs in a separate process and is generally available at the following location: http://localhost:8787/status/ """ try: from distributed.bokeh.application import BokehWebInterface except ImportError: logger.info("To start diagnostics web server please install Bokeh") return from ..http.scheduler import HTTPScheduler assert self.diagnostics is None if 'http' not in self.scheduler.services: self.scheduler.services['http'] = HTTPScheduler(self.scheduler, io_loop=self.scheduler.loop) self.scheduler.services['http'].listen(0) self.diagnostics = BokehWebInterface( tcp_port=self.scheduler.port, http_port=self.scheduler.services['http'].port, bokeh_port=port, show=show, log_level=logging.getLevelName(silence).lower())
def main(host, port, http_port, bokeh_port, show, _bokeh, bokeh_whitelist, prefix, use_xheaders, pid_file): if pid_file: with open(pid_file, 'w') as f: f.write(str(os.getpid())) def del_pid_file(): if os.path.exists(pid_file): os.remove(pid_file) atexit.register(del_pid_file) soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE) limit = max(soft, hard // 2) resource.setrlimit(resource.RLIMIT_NOFILE, (limit, hard)) given_host = host host = host or get_ip() if ':' in host and port == 8786: host, port = host.rsplit(':', 1) port = int(port) ip = socket.gethostbyname(host) loop = IOLoop.current() scheduler = Scheduler(ip=ip, loop=loop, services={('http', http_port): HTTPScheduler}) scheduler.start(port) bokeh_proc = None if _bokeh: try: from distributed.bokeh.application import BokehWebInterface bokeh_proc = BokehWebInterface(host=host, http_port=http_port, tcp_port=port, bokeh_port=bokeh_port, bokeh_whitelist=bokeh_whitelist, show=show, prefix=prefix, use_xheaders=use_xheaders, quiet=False) except ImportError: logger.info("Please install Bokeh to get Web UI") except Exception as e: logger.warn("Could not start Bokeh web UI", exc_info=True) loop.start() loop.close() scheduler.stop() if bokeh_proc: bokeh_proc.close() logger.info("End scheduler at %s:%d", ip, port)
def main(host, port, http_port, bokeh_port, show, _bokeh, bokeh_whitelist, prefix, use_xheaders): given_host = host host = host or get_ip() if ':' in host and port == 8786: host, port = host.rsplit(':', 1) port = int(port) ip = socket.gethostbyname(host) loop = IOLoop.current() scheduler = Scheduler(ip=ip, loop=loop, services={('http', http_port): HTTPScheduler}) scheduler.start(port) bokeh_proc = None if _bokeh: try: from distributed.bokeh.application import BokehWebInterface bokeh_proc = BokehWebInterface(host=host, http_port=http_port, tcp_port=port, bokeh_port=bokeh_port, bokeh_whitelist=bokeh_whitelist, show=show, prefix=prefix, use_xheaders=use_xheaders, quiet=False) except ImportError: logger.info("Please install Bokeh to get Web UI") except Exception as e: logger.warn("Could not start Bokeh web UI", exc_info=True) loop.start() loop.close() scheduler.stop() if bokeh_proc: bokeh_proc.close() logger.info("End scheduler at %s:%d", ip, port)
def test_BokehWebInterface(loop): with LocalCluster(2, loop=loop, scheduler_port=0, services={('http', 0): HTTPScheduler}, diagnostics_port=None) as c: with pytest.raises(Exception): response = requests.get('http://127.0.0.1:8787/status/') with BokehWebInterface( scheduler_address=c.scheduler.address, http_port=c.scheduler.services['http'].port, bokeh_port=8787) as w: start = time() while True: with ignoring(Exception): response = requests.get('http://127.0.0.1:8787/status/') if response.ok: break assert time() < start + 5 sleep(0.01) with pytest.raises(Exception): response = requests.get('http://127.0.0.1:8787/status/')
class LocalCluster(object): """ Create local Scheduler and Workers This creates a "cluster" of a scheduler and workers running on the local machine. Parameters ---------- n_workers: int Number of workers to start threads_per_worker: int Number of threads per each worker nanny: boolean If true start the workers in separate processes managed by a nanny. If False keep the workers in the main calling process scheduler_port: int Port of the scheduler. 8786 by default, use 0 to choose a random port silence_logs: logging level Level of logs to print out to stdout. ``logging.CRITICAL`` by default. Use a falsey value like False or None for no change. kwargs: dict Extra worker arguments, will be passed to the Worker constructor. Examples -------- >>> c = LocalCluster() # Create a local cluster with as many workers as cores # doctest: +SKIP >>> c # doctest: +SKIP LocalCluster("127.0.0.1:8786", workers=8, ncores=8) >>> c = Client(c) # connect to local cluster # doctest: +SKIP Add a new worker to the cluster >>> w = c.start_worker(ncores=2) # doctest: +SKIP Shut down the extra worker >>> c.remove_worker(w) # doctest: +SKIP Start a diagnostic web server and open a new browser tab >>> c.start_diagnostics_server(show=True) # doctest: +SKIP """ def __init__(self, n_workers=None, threads_per_worker=None, nanny=True, loop=None, start=True, scheduler_port=8786, silence_logs=logging.CRITICAL, diagnostics_port=8787, services={}, worker_services={}, **kwargs): self.status = None self.nanny = nanny self.silence_logs = silence_logs if silence_logs: for l in [ 'distributed.scheduler', 'distributed.worker', 'distributed.core', 'distributed.nanny' ]: logging.getLogger(l).setLevel(silence_logs) if n_workers is None and threads_per_worker is None: if nanny: n_workers = _ncores threads_per_worker = 1 else: n_workers = 1 threads_per_worker = _ncores if n_workers is None and threads_per_worker is not None: n_workers = max(1, _ncores // threads_per_worker) if n_workers and threads_per_worker is None: # Overcommit threads per worker, rather than undercommit threads_per_worker = max(1, int(math.ceil(_ncores / n_workers))) self.loop = loop or IOLoop() if start and not self.loop._running: self._thread = Thread(target=self.loop.start) self._thread.daemon = True self._thread.start() while not self.loop._running: sleep(0.001) self.scheduler = Scheduler(loop=self.loop, ip='127.0.0.1', services=services) self.scheduler.start(scheduler_port) self.workers = [] if start: _start_worker = self.start_worker else: _start_worker = partial(self.loop.add_callback, self._start_worker) for i in range(n_workers): _start_worker(ncores=threads_per_worker, nanny=nanny, services=worker_services, **kwargs) self.status = 'running' self.diagnostics = None if diagnostics_port is not None: self.start_diagnostics_server(diagnostics_port, silence=silence_logs) def __str__(self): return 'LocalCluster("%s", workers=%d, ncores=%d)' % ( self.scheduler_address, len( self.workers), sum(w.ncores for w in self.workers)) __repr__ = __str__ @gen.coroutine def _start_worker(self, port=0, nanny=None, **kwargs): if nanny is None: nanny = self.nanny if nanny: W = Nanny kwargs['quiet'] = True else: W = Worker w = W(self.scheduler.ip, self.scheduler.port, loop=self.loop, silence_logs=self.silence_logs, **kwargs) yield w._start(port) self.workers.append(w) while w.worker_address not in self.scheduler.worker_info: yield gen.sleep(0.01) raise gen.Return(w) def start_worker(self, port=0, ncores=0, **kwargs): """ Add a new worker to the running cluster Parameters ---------- port: int (optional) Port on which to serve the worker, defaults to 0 or random ncores: int (optional) Number of threads to use. Defaults to number of logical cores nanny: boolean If true start worker in separate process managed by a nanny Examples -------- >>> c = LocalCluster() # doctest: +SKIP >>> c.start_worker(ncores=2) # doctest: +SKIP Returns ------- The created Worker or Nanny object. Can be discarded. """ return sync(self.loop, self._start_worker, port, ncores=ncores, **kwargs) @gen.coroutine def _stop_worker(self, w): yield w._close() self.workers.remove(w) def stop_worker(self, w): """ Stop a running worker Examples -------- >>> c = LocalCluster() # doctest: +SKIP >>> w = c.start_worker(ncores=2) # doctest: +SKIP >>> c.stop_worker(w) # doctest: +SKIP """ sync(self.loop, self._stop_worker, w) def start_diagnostics_server(self, port=8787, show=False, silence=logging.CRITICAL): """ Start Diagnostics Web Server This starts a web application to show diagnostics of what is happening on the cluster. This application runs in a separate process and is generally available at the following location: http://localhost:8787/status/ """ try: from distributed.bokeh.application import BokehWebInterface except ImportError: logger.info("To start diagnostics web server please install Bokeh") return from ..http.scheduler import HTTPScheduler assert self.diagnostics is None if 'http' not in self.scheduler.services: self.scheduler.services['http'] = HTTPScheduler( self.scheduler, io_loop=self.scheduler.loop) self.scheduler.services['http'].listen(0) self.diagnostics = BokehWebInterface( tcp_port=self.scheduler.port, http_port=self.scheduler.services['http'].port, bokeh_port=port, show=show, log_level=logging.getLevelName(silence).lower()) @gen.coroutine def _close(self): with ignoring(gen.TimeoutError, StreamClosedError, OSError): yield All([w._close() for w in self.workers]) with ignoring(gen.TimeoutError, StreamClosedError, OSError): yield self.scheduler.close(fast=True) del self.workers[:] if self.diagnostics: self.diagnostics.close() def close(self): """ Close the cluster """ if self.status == 'running': self.status = 'closed' if self.loop._running: sync(self.loop, self._close) if hasattr(self, '_thread'): sync(self.loop, self.loop.stop) self._thread.join(timeout=1) self.loop.close() del self._thread @gen.coroutine def scale_up(self, n, **kwargs): """ Bring the total count of workers up to ``n`` This function/coroutine should bring the total number of workers up to the number ``n``. This can be implemented either as a function or as a Tornado coroutine. """ yield [ self._start_worker(**kwargs) for i in range(n - len(self.workers)) ] @gen.coroutine def scale_down(self, workers): """ Remove ``workers`` from the cluster Given a list of worker addresses this function should remove those workers from the cluster. This may require tracking which jobs are associated to which worker address. This can be implemented either as a function or as a Tornado coroutine. """ workers = set(workers) yield [ self._stop_worker(w) for w in self.workers if w.worker_address in workers ] while workers & set(self.workers): yield gen.sleep(0.01) def __del__(self): self.close() def __enter__(self): return self def __exit__(self, *args): self.close() @property def scheduler_address(self): return self.scheduler.address
class LocalCluster(object): """ Create local Scheduler and Workers This creates a "cluster" of a scheduler and workers running on the local machine. Parameters ---------- n_workers: int Number of workers to start threads_per_worker: int Number of threads per each worker nanny: boolean If true start the workers in separate processes managed by a nanny. If False keep the workers in the main calling process scheduler_port: int Port of the scheduler. 8786 by default, use 0 to choose a random port silence_logs: logging level Level of logs to print out to stdout. ``logging.CRITICAL`` by default. Use a falsey value like False or None for no change. kwargs: dict Extra worker arguments, will be passed to the Worker constructor. Examples -------- >>> c = LocalCluster() # Create a local cluster with as many workers as cores # doctest: +SKIP >>> c # doctest: +SKIP LocalCluster("127.0.0.1:8786", workers=8, ncores=8) >>> c = Client(c) # connect to local cluster # doctest: +SKIP Add a new worker to the cluster >>> w = c.start_worker(ncores=2) # doctest: +SKIP Shut down the extra worker >>> c.remove_worker(w) # doctest: +SKIP Start a diagnostic web server and open a new browser tab >>> c.start_diagnostics_server(show=True) # doctest: +SKIP """ def __init__(self, n_workers=None, threads_per_worker=None, nanny=True, loop=None, start=True, scheduler_port=8786, silence_logs=logging.CRITICAL, diagnostics_port=8787, services={}, worker_services={}, **kwargs): self.status = None self.nanny = nanny if silence_logs: for l in ['distributed.scheduler', 'distributed.worker', 'distributed.core', 'distributed.nanny']: logging.getLogger(l).setLevel(silence_logs) if n_workers is None and threads_per_worker is None: if nanny: n_workers = _ncores threads_per_worker = 1 else: n_workers = 1 threads_per_worker = _ncores if n_workers is None and threads_per_worker is not None: n_workers = max(1, _ncores // threads_per_worker) if n_workers and threads_per_worker is None: # Overcommit threads per worker, rather than undercommit threads_per_worker = max(1, int(math.ceil(_ncores / n_workers))) self.loop = loop or IOLoop() if start and not self.loop._running: self._thread = Thread(target=self.loop.start) self._thread.daemon = True self._thread.start() while not self.loop._running: sleep(0.001) self.scheduler = Scheduler(loop=self.loop, ip='127.0.0.1', services=services) self.scheduler.start(scheduler_port) self.workers = [] if start: _start_worker = self.start_worker else: _start_worker = partial(self.loop.add_callback, self._start_worker) for i in range(n_workers): _start_worker(ncores=threads_per_worker, nanny=nanny, services=worker_services, **kwargs) self.status = 'running' self.diagnostics = None if diagnostics_port is not None: self.start_diagnostics_server(diagnostics_port, silence=silence_logs) def __str__(self): return 'LocalCluster("%s", workers=%d, ncores=%d)' % ( self.scheduler_address, len(self.workers), sum(w.ncores for w in self.workers)) __repr__ = __str__ @gen.coroutine def _start_worker(self, port=0, nanny=None, **kwargs): if nanny is None: nanny = self.nanny if nanny: W = Nanny kwargs['quiet'] = True else: W = Worker w = W(self.scheduler.ip, self.scheduler.port, loop=self.loop, **kwargs) yield w._start(port) self.workers.append(w) while w.worker_address not in self.scheduler.worker_info: yield gen.sleep(0.01) raise gen.Return(w) def start_worker(self, port=0, ncores=0, **kwargs): """ Add a new worker to the running cluster Parameters ---------- port: int (optional) Port on which to serve the worker, defaults to 0 or random ncores: int (optional) Number of threads to use. Defaults to number of logical cores nanny: boolean If true start worker in separate process managed by a nanny Examples -------- >>> c = LocalCluster() # doctest: +SKIP >>> c.start_worker(ncores=2) # doctest: +SKIP Returns ------- The created Worker or Nanny object. Can be discarded. """ return sync(self.loop, self._start_worker, port, ncores=ncores, **kwargs) @gen.coroutine def _stop_worker(self, w): yield w._close() self.workers.remove(w) def stop_worker(self, w): """ Stop a running worker Examples -------- >>> c = LocalCluster() # doctest: +SKIP >>> w = c.start_worker(ncores=2) # doctest: +SKIP >>> c.stop_worker(w) # doctest: +SKIP """ sync(self.loop, self._stop_worker, w) def start_diagnostics_server(self, port=8787, show=False, silence=logging.CRITICAL): """ Start Diagnostics Web Server This starts a web application to show diagnostics of what is happening on the cluster. This application runs in a separate process and is generally available at the following location: http://localhost:8787/status/ """ try: from distributed.bokeh.application import BokehWebInterface except ImportError: logger.info("To start diagnostics web server please install Bokeh") return from ..http.scheduler import HTTPScheduler assert self.diagnostics is None if 'http' not in self.scheduler.services: self.scheduler.services['http'] = HTTPScheduler(self.scheduler, io_loop=self.scheduler.loop) self.scheduler.services['http'].listen(0) self.diagnostics = BokehWebInterface( tcp_port=self.scheduler.port, http_port=self.scheduler.services['http'].port, bokeh_port=port, show=show, log_level=logging.getLevelName(silence).lower()) @gen.coroutine def _close(self): with ignoring(gen.TimeoutError, StreamClosedError, OSError): yield All([w._close() for w in self.workers]) with ignoring(gen.TimeoutError, StreamClosedError, OSError): yield self.scheduler.close(fast=True) del self.workers[:] if self.diagnostics: self.diagnostics.close() def close(self): """ Close the cluster """ if self.status == 'running': self.status = 'closed' if self.loop._running: sync(self.loop, self._close) if hasattr(self, '_thread'): sync(self.loop, self.loop.stop) self._thread.join(timeout=1) self.loop.close() del self._thread @gen.coroutine def scale_up(self, n, **kwargs): """ Bring the total count of workers up to ``n`` This function/coroutine should bring the total number of workers up to the number ``n``. This can be implemented either as a function or as a Tornado coroutine. """ yield [self._start_worker(**kwargs) for i in range(n - len(self.workers))] @gen.coroutine def scale_down(self, workers): """ Remove ``workers`` from the cluster Given a list of worker addresses this function should remove those workers from the cluster. This may require tracking which jobs are associated to which worker address. This can be implemented either as a function or as a Tornado coroutine. """ workers = set(workers) yield [self._stop_worker(w) for w in self.workers if w.worker_address in workers] while workers & set(self.workers): yield gen.sleep(0.01) def __del__(self): self.close() def __enter__(self): return self def __exit__(self, *args): self.close() @property def scheduler_address(self): return self.scheduler.address
class LocalCluster(object): """ Create local Scheduler and Workers This creates a "cluster" of a scheduler and workers running on the local machine. Parameters ---------- n_workers: int Number of workers to start threads_per_worker: int Number of threads per each worker nanny: boolean If true start the workers in separate processes managed by a nanny. If False keep the workers in the main calling process scheduler_port: int Port of the scheduler. 8786 by default, use 0 to choose a random port Examples -------- >>> c = LocalCluster() # Create a local cluster with as many workers as cores # doctest: +SKIP >>> c # doctest: +SKIP LocalCluster("127.0.0.1:8786", workers=8, ncores=8) >>> e = Executor(c) # connect to local cluster # doctest: +SKIP Add a new worker to the cluster >>> w = c.start_worker(ncores=2) # doctest: +SKIP Shut down the extra worker >>> c.remove_worker(w) # doctest: +SKIP Start a diagnostic web server and open a new browser tab >>> c.start_diagnostics_server(show=True) # doctest: +SKIP """ def __init__(self, n_workers=None, threads_per_worker=None, nanny=True, loop=None, start=True, scheduler_port=8786, silence_logs=logging.CRITICAL, diagnostics_port=8787, services={'http': HTTPScheduler}, **kwargs): self.status = None if silence_logs: for l in ['distributed.scheduler', 'distributed.worker', 'distributed.core', 'distributed.nanny']: logging.getLogger(l).setLevel(silence_logs) if n_workers is None and threads_per_worker is None: if nanny: n_workers = _ncores threads_per_worker = 1 else: n_workers = 1 threads_per_worker = _ncores if n_workers is None and threads_per_worker is not None: n_workers = max(1, _ncores // threads_per_worker) if n_workers and threads_per_worker is None: threads_per_worker = max(1, _ncores // n_workers) self.loop = loop or IOLoop() if not self.loop._running: self._thread = Thread(target=self.loop.start) self._thread.daemon = True self._thread.start() while not self.loop._running: sleep(0.001) self.scheduler = Scheduler(loop=self.loop, ip='127.0.0.1', services=services) self.scheduler.start(scheduler_port) self.workers = [] if start: _start_worker = self.start_worker else: _start_worker = lambda *args, **kwargs: self.loop.add_callback(self._start_worker, *args, **kwargs) for i in range(n_workers): _start_worker(ncores=threads_per_worker, nanny=nanny) self.status = 'running' self.diagnostics = None if diagnostics_port is not None: self.start_diagnostics_server(diagnostics_port, silence=silence_logs) def __str__(self): return 'LocalCluster("%s", workers=%d, ncores=%d)' % ( self.scheduler_address, len(self.workers), sum(w.ncores for w in self.workers)) __repr__ = __str__ @gen.coroutine def _start_worker(self, port=0, nanny=True, **kwargs): if nanny: W = Nanny else: W = Worker w = W(self.scheduler.ip, self.scheduler.port, loop=self.loop, **kwargs) yield w._start(port) self.workers.append(w) raise gen.Return(w) def start_worker(self, port=0, ncores=0, **kwargs): """ Add a new worker to the running cluster Parameters ---------- port: int (optional) Port on which to serve the worker, defaults to 0 or random ncores: int (optional) Number of threads to use. Defaults to number of logical cores nanny: boolean If true start worker in separate process managed by a nanny Examples -------- >>> c = LocalCluster() # doctest: +SKIP >>> c.start_worker(ncores=2) # doctest: +SKIP Returns ------- The created Worker or Nanny object. Can be discarded. """ return sync(self.loop, self._start_worker, port, ncores=ncores, **kwargs) @gen.coroutine def _stop_worker(self, w): yield w._close() self.workers.remove(w) def stop_worker(self, w): """ Stop a running worker Examples -------- >>> c = LocalCluster() # doctest: +SKIP >>> w = c.start_worker(ncores=2) # doctest: +SKIP >>> c.stop_worker(w) # doctest: +SKIP """ sync(self.loop, self._stop_worker, w) def start_diagnostics_server(self, port=8787, show=False, silence=logging.CRITICAL): """ Start Diagnostics Web Server This starts a web application to show diagnostics of what is happening on the cluster. This application runs in a separate process and is generally available at the following location: http://localhost:8787/status/ """ assert self.diagnostics is None if 'http' not in self.scheduler.services: self.scheduler.services['http'] = HTTPScheduler(self.scheduler, io_loop=self.scheduler.loop) self.scheduler.services['http'].listen(0) from distributed.bokeh.application import BokehWebInterface self.diagnostics = BokehWebInterface( tcp_port=self.scheduler.port, http_port=self.scheduler.services['http'].port, bokeh_port=port, show=show, log_level=logging.getLevelName(silence).lower()) @gen.coroutine def _close(self): with ignoring(gen.TimeoutError, StreamClosedError, OSError): yield All([w._close() for w in self.workers]) with ignoring(gen.TimeoutError, StreamClosedError, OSError): yield self.scheduler.close(fast=True) del self.workers[:] if self.diagnostics: self.diagnostics.close() def close(self): """ Close the cluster """ if self.status == 'running': self.status = 'closed' if self.loop._running: sync(self.loop, self._close) if hasattr(self, '_thread'): sync(self.loop, self.loop.stop) self._thread.join(timeout=1) self.loop.close() del self._thread def __del__(self): self.close() def __enter__(self): return self def __exit__(self, *args): self.close() @property def scheduler_address(self): return self.scheduler.address
def main(host, port, http_port, bokeh_port, bokeh_internal_port, show, _bokeh, bokeh_whitelist, prefix, use_xheaders, pid_file, scheduler_file, interface): if pid_file: with open(pid_file, 'w') as f: f.write(str(os.getpid())) def del_pid_file(): if os.path.exists(pid_file): os.remove(pid_file) atexit.register(del_pid_file) if sys.platform.startswith('linux'): import resource # module fails importing on Windows soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE) limit = max(soft, hard // 2) resource.setrlimit(resource.RLIMIT_NOFILE, (limit, hard)) if interface: if host: raise ValueError("Can not specify both interface and host") else: host = get_ip_interface(interface) addr = uri_from_host_port(host, port, 8786) loop = IOLoop.current() logger.info('-' * 47) services = {('http', http_port): HTTPScheduler} if _bokeh: with ignoring(ImportError): from distributed.bokeh.scheduler import BokehScheduler services[('bokeh', bokeh_internal_port)] = BokehScheduler scheduler = Scheduler(loop=loop, services=services, scheduler_file=scheduler_file) scheduler.start(addr) bokeh_proc = None if _bokeh: if bokeh_port == 0: # This is a hack and not robust bokeh_port = open_port() # This port may be taken by the OS try: # before we successfully pass it to Bokeh from distributed.bokeh.application import BokehWebInterface bokeh_proc = BokehWebInterface(http_port=http_port, scheduler_address=scheduler.address, bokeh_port=bokeh_port, bokeh_whitelist=bokeh_whitelist, show=show, prefix=prefix, use_xheaders=use_xheaders, quiet=False) except ImportError: logger.info("Please install Bokeh to get Web UI") except Exception as e: logger.warn("Could not start Bokeh web UI", exc_info=True) logger.info('-' * 47) try: loop.start() loop.close() finally: scheduler.stop() if bokeh_proc: bokeh_proc.close() logger.info("End scheduler at %r", addr)
class LocalCluster(object): """ Create local Scheduler and Workers This creates a "cluster" of a scheduler and workers running on the local machine. Parameters ---------- n_workers: int Number of workers to start threads_per_worker: int Number of threads per each worker nanny: boolean If true start the workers in separate processes managed by a nanny. If False keep the workers in the main calling process scheduler_port: int Port of the scheduler. 8786 by default, use 0 to choose a random port Examples -------- >>> c = LocalCluster() # Create a local cluster with as many workers as cores # doctest: +SKIP >>> c # doctest: +SKIP LocalCluster("127.0.0.1:8786", workers=8, ncores=8) >>> e = Executor(c) # connect to local cluster # doctest: +SKIP Add a new worker to the cluster >>> w = c.start_worker(ncores=2) # doctest: +SKIP Shut down the extra worker >>> c.remove_worker(w) # doctest: +SKIP Start a diagnostic web server and open a new browser tab >>> c.start_diagnostics_server(show=True) # doctest: +SKIP """ def __init__(self, n_workers=None, threads_per_worker=None, nanny=True, loop=None, start=True, scheduler_port=8786, silence_logs=logging.CRITICAL, diagnostics_port=8787, services={'http': HTTPScheduler}, **kwargs): self.status = None if silence_logs: for l in ['distributed.scheduler', 'distributed.worker', 'distributed.core', 'distributed.nanny']: logging.getLogger(l).setLevel(silence_logs) if n_workers is None and threads_per_worker is None: if nanny: n_workers = _ncores threads_per_worker = 1 else: n_workers = 1 threads_per_worker = _ncores if n_workers is None and threads_per_worker is not None: n_workers = max(1, _ncores // threads_per_worker) if n_workers and threads_per_worker is None: threads_per_worker = max(1, _ncores // n_workers) self.loop = loop or IOLoop() if not self.loop._running: self._thread = Thread(target=self.loop.start) self._thread.daemon = True self._thread.start() while not self.loop._running: sleep(0.001) self.scheduler = Scheduler(loop=self.loop, ip='127.0.0.1', services=services) self.scheduler.start(scheduler_port) self.workers = [] if start: _start_worker = self.start_worker else: _start_worker = lambda *args, **kwargs: self.loop.add_callback(self._start_worker, *args, **kwargs) for i in range(n_workers): _start_worker(ncores=threads_per_worker, nanny=nanny) self.status = 'running' self.diagnostics = None if diagnostics_port is not None: self.start_diagnostics_server(diagnostics_port, silence=silence_logs) def __str__(self): return 'LocalCluster("%s", workers=%d, ncores=%d)' % ( self.scheduler_address, len(self.workers), sum(w.ncores for w in self.workers)) __repr__ = __str__ @gen.coroutine def _start_worker(self, port=0, nanny=True, **kwargs): if nanny: W = Nanny else: W = Worker w = W(self.scheduler.ip, self.scheduler.port, loop=self.loop, **kwargs) yield w._start(port) self.workers.append(w) raise gen.Return(w) def start_worker(self, port=0, ncores=0, **kwargs): """ Add a new worker to the running cluster Parameters ---------- port: int (optional) Port on which to serve the worker, defaults to 0 or random ncores: int (optional) Number of threads to use. Defaults to number of logical cores nanny: boolean If true start worker in separate process managed by a nanny Examples -------- >>> c = LocalCluster() # doctest: +SKIP >>> c.start_worker(ncores=2) # doctest: +SKIP Returns ------- The created Worker or Nanny object. Can be discarded. """ return sync(self.loop, self._start_worker, port, ncores=ncores, **kwargs) @gen.coroutine def _stop_worker(self, w): yield w._close() self.workers.remove(w) def stop_worker(self, w): """ Stop a running worker Examples -------- >>> c = LocalCluster() # doctest: +SKIP >>> w = c.start_worker(ncores=2) # doctest: +SKIP >>> c.stop_worker(w) # doctest: +SKIP """ sync(self.loop, self._stop_worker, w) def start_diagnostics_server(self, port=8787, show=False, silence=logging.CRITICAL): """ Start Diagnostics Web Server This starts a web application to show diagnostics of what is happening on the cluster. This application runs in a separate process and is generally available at the following location: http://localhost:8787/status/ """ try: from distributed.bokeh.application import BokehWebInterface except ImportError: logger.info("To start diagnostics web server please install Bokeh") return assert self.diagnostics is None if 'http' not in self.scheduler.services: self.scheduler.services['http'] = HTTPScheduler(self.scheduler, io_loop=self.scheduler.loop) self.scheduler.services['http'].listen(0) self.diagnostics = BokehWebInterface( tcp_port=self.scheduler.port, http_port=self.scheduler.services['http'].port, bokeh_port=port, show=show, log_level=logging.getLevelName(silence).lower()) @gen.coroutine def _close(self): with ignoring(gen.TimeoutError, StreamClosedError, OSError): yield All([w._close() for w in self.workers]) with ignoring(gen.TimeoutError, StreamClosedError, OSError): yield self.scheduler.close(fast=True) del self.workers[:] if self.diagnostics: self.diagnostics.close() def close(self): """ Close the cluster """ if self.status == 'running': self.status = 'closed' if self.loop._running: sync(self.loop, self._close) if hasattr(self, '_thread'): sync(self.loop, self.loop.stop) self._thread.join(timeout=1) self.loop.close() del self._thread def __del__(self): self.close() def __enter__(self): return self def __exit__(self, *args): self.close() @property def scheduler_address(self): return self.scheduler.address
def main(host, port, http_port, bokeh_port, bokeh_external_port, bokeh_internal_port, show, _bokeh, bokeh_whitelist, bokeh_prefix, use_xheaders, pid_file, scheduler_file, interface, local_directory, preload, prefix, tls_ca_file, tls_cert, tls_key): if bokeh_internal_port: print("The --bokeh-internal-port keyword has been removed.\n" "The internal bokeh server is now the default bokeh server.\n" "Use --bokeh-port %d instead" % bokeh_internal_port) sys.exit(1) if prefix: print("The --prefix keyword has moved to --bokeh-prefix") sys.exit(1) sec = Security( tls_ca_file=tls_ca_file, tls_scheduler_cert=tls_cert, tls_scheduler_key=tls_key, ) if pid_file: with open(pid_file, 'w') as f: f.write(str(os.getpid())) def del_pid_file(): if os.path.exists(pid_file): os.remove(pid_file) atexit.register(del_pid_file) local_directory_created = False if local_directory: if not os.path.exists(local_directory): os.mkdir(local_directory) local_directory_created = True else: local_directory = tempfile.mkdtemp(prefix='scheduler-') local_directory_created = True if local_directory not in sys.path: sys.path.insert(0, local_directory) if sys.platform.startswith('linux'): import resource # module fails importing on Windows soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE) limit = max(soft, hard // 2) resource.setrlimit(resource.RLIMIT_NOFILE, (limit, hard)) if interface: if host: raise ValueError("Can not specify both interface and host") else: host = get_ip_interface(interface) addr = uri_from_host_port(host, port, 8786) loop = IOLoop.current() logger.info('-' * 47) services = {('http', http_port): HTTPScheduler} if _bokeh: with ignoring(ImportError): from distributed.bokeh.scheduler import BokehScheduler services[('bokeh', bokeh_port)] = partial(BokehScheduler, prefix=bokeh_prefix) scheduler = Scheduler(loop=loop, services=services, scheduler_file=scheduler_file, security=sec) scheduler.start(addr) preload_modules(preload, parameter=scheduler, file_dir=local_directory) bokeh_proc = None if _bokeh and bokeh_external_port is not None: if bokeh_external_port == 0: # This is a hack and not robust bokeh_external_port = open_port( ) # This port may be taken by the OS try: # before we successfully pass it to Bokeh from distributed.bokeh.application import BokehWebInterface bokeh_proc = BokehWebInterface(http_port=http_port, scheduler_address=scheduler.address, bokeh_port=bokeh_external_port, bokeh_whitelist=bokeh_whitelist, show=show, prefix=bokeh_prefix, use_xheaders=use_xheaders, quiet=False) except ImportError: logger.info("Please install Bokeh to get Web UI") except Exception as e: logger.warning("Could not start Bokeh web UI", exc_info=True) logger.info('Local Directory: %26s', local_directory) logger.info('-' * 47) try: loop.start() loop.close() finally: scheduler.stop() if bokeh_proc: bokeh_proc.close() if local_directory_created: shutil.rmtree(local_directory) logger.info("End scheduler at %r", addr)
def main(host, port, http_port, bokeh_port, show, _bokeh, bokeh_whitelist, prefix, use_xheaders, pid_file): if pid_file: with open(pid_file, "w") as f: f.write(str(os.getpid())) def del_pid_file(): if os.path.exists(pid_file): os.remove(pid_file) atexit.register(del_pid_file) if sys.platform.startswith("linux"): import resource # module fails importing on Windows soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE) limit = max(soft, hard // 2) resource.setrlimit(resource.RLIMIT_NOFILE, (limit, hard)) given_host = host host = host or get_ip() if ":" in host and port == 8786: host, port = host.rsplit(":", 1) port = int(port) ip = socket.gethostbyname(host) loop = IOLoop.current() logger.info("-" * 47) services = {("http", http_port): HTTPScheduler} if _bokeh: from distributed.bokeh.scheduler import BokehScheduler services[("bokeh", 8788)] = BokehScheduler scheduler = Scheduler(ip=ip, loop=loop, services=services) scheduler.start(port) bokeh_proc = None if _bokeh: try: from distributed.bokeh.application import BokehWebInterface bokeh_proc = BokehWebInterface( host=host, http_port=http_port, tcp_port=port, bokeh_port=bokeh_port, bokeh_whitelist=bokeh_whitelist, show=show, prefix=prefix, use_xheaders=use_xheaders, quiet=False, ) except ImportError: logger.info("Please install Bokeh to get Web UI") except Exception as e: logger.warn("Could not start Bokeh web UI", exc_info=True) logger.info("-" * 47) try: loop.start() loop.close() finally: scheduler.stop() if bokeh_proc: bokeh_proc.close() logger.info("End scheduler at %s:%d", ip, port)