Exemple #1
0
    def start_diagnostics_server(self,
                                 port=8787,
                                 show=False,
                                 silence=logging.CRITICAL):
        """ Start Diagnostics Web Server

        This starts a web application to show diagnostics of what is happening
        on the cluster.  This application runs in a separate process and is
        generally available at the following location:

            http://localhost:8787/status/
        """
        try:
            from distributed.bokeh.application import BokehWebInterface
        except ImportError:
            logger.info("To start diagnostics web server please install Bokeh")
            return
        from ..http.scheduler import HTTPScheduler

        assert self.diagnostics is None
        if 'http' not in self.scheduler.services:
            self.scheduler.services['http'] = HTTPScheduler(
                self.scheduler, io_loop=self.scheduler.loop)
            self.scheduler.services['http'].listen(0)
        self.diagnostics = BokehWebInterface(
            tcp_port=self.scheduler.port,
            http_port=self.scheduler.services['http'].port,
            bokeh_port=port,
            show=show,
            log_level=logging.getLevelName(silence).lower())
def main(center, host, port, http_port, bokeh_port, show, _bokeh, bokeh_whitelist):
    given_host = host
    host = host or get_ip()
    if ':' in host and port == 8786:
        host, port = host.rsplit(':', 1)
        port = int(port)
    ip = socket.gethostbyname(host)
    loop = IOLoop.current()
    scheduler = Scheduler(center, ip=ip,
                          services={('http', http_port): HTTPScheduler},
                          loop=loop)
    scheduler.start(port)

    bokeh_proc = None
    if _bokeh:
        try:
            from distributed.bokeh.application import BokehWebInterface
            bokeh_proc = BokehWebInterface(host=host, http_port=http_port,
                    tcp_port=port, bokeh_port=bokeh_port,
                    bokeh_whitelist=bokeh_whitelist, show=show)
        except ImportError:
            logger.info("Please install Bokeh to get Web UI")
        except Exception as e:
            logger.warn("Could not start Bokeh web UI", exc_info=True)

    loop.start()
    loop.close()
    scheduler.stop()
    if bokeh_proc:
        bokeh_proc.close()

    logger.info("End scheduler at %s:%d", ip, port)
Exemple #3
0
def main(host, port, http_port, bokeh_port, bokeh_internal_port, show, _bokeh,
         bokeh_whitelist, prefix, use_xheaders, pid_file):

    if pid_file:
        with open(pid_file, 'w') as f:
            f.write(str(os.getpid()))

        def del_pid_file():
            if os.path.exists(pid_file):
                os.remove(pid_file)

        atexit.register(del_pid_file)

    if sys.platform.startswith('linux'):
        import resource  # module fails importing on Windows
        soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
        limit = max(soft, hard // 2)
        resource.setrlimit(resource.RLIMIT_NOFILE, (limit, hard))

    addr = uri_from_host_port(host, port, 8786)

    loop = IOLoop.current()
    logger.info('-' * 47)

    services = {('http', http_port): HTTPScheduler}
    if _bokeh:
        with ignoring(ImportError):
            from distributed.bokeh.scheduler import BokehScheduler
            services[('bokeh', bokeh_internal_port)] = BokehScheduler
    scheduler = Scheduler(loop=loop, services=services)
    scheduler.start(addr)

    bokeh_proc = None
    if _bokeh:
        try:
            from distributed.bokeh.application import BokehWebInterface
            bokeh_proc = BokehWebInterface(http_port=http_port,
                                           tcp_port=scheduler.port,
                                           bokeh_port=bokeh_port,
                                           bokeh_whitelist=bokeh_whitelist,
                                           show=show,
                                           prefix=prefix,
                                           use_xheaders=use_xheaders,
                                           quiet=False)
        except ImportError:
            logger.info("Please install Bokeh to get Web UI")
        except Exception as e:
            logger.warn("Could not start Bokeh web UI", exc_info=True)

    logger.info('-' * 47)
    try:
        loop.start()
        loop.close()
    finally:
        scheduler.stop()
        if bokeh_proc:
            bokeh_proc.close()

        logger.info("End scheduler at %r", addr)
Exemple #4
0
def main(host, port, http_port, bokeh_port, show, _bokeh, bokeh_whitelist,
         prefix, use_xheaders, pid_file):

    if pid_file:
        with open(pid_file, 'w') as f:
            f.write(str(os.getpid()))

        def del_pid_file():
            if os.path.exists(pid_file):
                os.remove(pid_file)

        atexit.register(del_pid_file)

    if sys.platform.startswith('linux'):
        import resource  # module fails importing on Windows
        soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
        limit = max(soft, hard // 2)
        resource.setrlimit(resource.RLIMIT_NOFILE, (limit, hard))

    given_host = host
    host = host or get_ip()
    if ':' in host and port == 8786:
        host, port = host.rsplit(':', 1)
        port = int(port)
    ip = socket.gethostbyname(host)
    loop = IOLoop.current()
    scheduler = Scheduler(ip=ip,
                          loop=loop,
                          services={('http', http_port): HTTPScheduler})
    scheduler.start(port)

    bokeh_proc = None
    if _bokeh:
        try:
            from distributed.bokeh.application import BokehWebInterface
            bokeh_proc = BokehWebInterface(host=host,
                                           http_port=http_port,
                                           tcp_port=port,
                                           bokeh_port=bokeh_port,
                                           bokeh_whitelist=bokeh_whitelist,
                                           show=show,
                                           prefix=prefix,
                                           use_xheaders=use_xheaders,
                                           quiet=False)
        except ImportError:
            logger.info("Please install Bokeh to get Web UI")
        except Exception as e:
            logger.warn("Could not start Bokeh web UI", exc_info=True)

    loop.start()
    loop.close()
    scheduler.stop()
    if bokeh_proc:
        bokeh_proc.close()

    logger.info("End scheduler at %s:%d", ip, port)
Exemple #5
0
    def start_diagnostics_server(self, port=8787, show=False,
            silence=logging.CRITICAL):
        """ Start Diagnostics Web Server

        This starts a web application to show diagnostics of what is happening
        on the cluster.  This application runs in a separate process and is
        generally available at the following location:

            http://localhost:8787/status/
        """
        try:
            from distributed.bokeh.application import BokehWebInterface
        except ImportError:
            logger.info("To start diagnostics web server please install Bokeh")
            return
        from ..http.scheduler import HTTPScheduler

        assert self.diagnostics is None
        if 'http' not in self.scheduler.services:
            self.scheduler.services['http'] = HTTPScheduler(self.scheduler,
                    io_loop=self.scheduler.loop)
            self.scheduler.services['http'].listen(0)
        self.diagnostics = BokehWebInterface(
                tcp_port=self.scheduler.port,
                http_port=self.scheduler.services['http'].port,
                bokeh_port=port, show=show,
                log_level=logging.getLevelName(silence).lower())
def main(host, port, http_port, bokeh_port, show, _bokeh,
         bokeh_whitelist, prefix, use_xheaders, pid_file):

    if pid_file:
        with open(pid_file, 'w') as f:
            f.write(str(os.getpid()))

        def del_pid_file():
            if os.path.exists(pid_file):
                os.remove(pid_file)
        atexit.register(del_pid_file)

    soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
    limit = max(soft, hard // 2)
    resource.setrlimit(resource.RLIMIT_NOFILE, (limit, hard))

    given_host = host
    host = host or get_ip()
    if ':' in host and port == 8786:
        host, port = host.rsplit(':', 1)
        port = int(port)
    ip = socket.gethostbyname(host)
    loop = IOLoop.current()
    scheduler = Scheduler(ip=ip, loop=loop,
                          services={('http', http_port): HTTPScheduler})
    scheduler.start(port)

    bokeh_proc = None
    if _bokeh:
        try:
            from distributed.bokeh.application import BokehWebInterface
            bokeh_proc = BokehWebInterface(host=host, http_port=http_port,
                    tcp_port=port, bokeh_port=bokeh_port,
                    bokeh_whitelist=bokeh_whitelist, show=show, prefix=prefix,
                    use_xheaders=use_xheaders, quiet=False)
        except ImportError:
            logger.info("Please install Bokeh to get Web UI")
        except Exception as e:
            logger.warn("Could not start Bokeh web UI", exc_info=True)

    loop.start()
    loop.close()
    scheduler.stop()
    if bokeh_proc:
        bokeh_proc.close()

    logger.info("End scheduler at %s:%d", ip, port)
Exemple #7
0
def main(host, port, http_port, bokeh_port, show, _bokeh, bokeh_whitelist,
         prefix, use_xheaders):
    given_host = host
    host = host or get_ip()
    if ':' in host and port == 8786:
        host, port = host.rsplit(':', 1)
        port = int(port)
    ip = socket.gethostbyname(host)
    loop = IOLoop.current()
    scheduler = Scheduler(ip=ip,
                          loop=loop,
                          services={('http', http_port): HTTPScheduler})
    scheduler.start(port)

    bokeh_proc = None
    if _bokeh:
        try:
            from distributed.bokeh.application import BokehWebInterface
            bokeh_proc = BokehWebInterface(host=host,
                                           http_port=http_port,
                                           tcp_port=port,
                                           bokeh_port=bokeh_port,
                                           bokeh_whitelist=bokeh_whitelist,
                                           show=show,
                                           prefix=prefix,
                                           use_xheaders=use_xheaders,
                                           quiet=False)
        except ImportError:
            logger.info("Please install Bokeh to get Web UI")
        except Exception as e:
            logger.warn("Could not start Bokeh web UI", exc_info=True)

    loop.start()
    loop.close()
    scheduler.stop()
    if bokeh_proc:
        bokeh_proc.close()

    logger.info("End scheduler at %s:%d", ip, port)
def test_BokehWebInterface(loop):
    with LocalCluster(2, loop=loop, scheduler_port=0,
                      services={('http', 0): HTTPScheduler},
                      diagnostics_port=None) as c:
        with pytest.raises(Exception):
            response = requests.get('http://127.0.0.1:8787/status/')
        with BokehWebInterface(
                scheduler_address=c.scheduler.address,
                http_port=c.scheduler.services['http'].port,
                bokeh_port=8787) as w:
            start = time()
            while True:
                with ignoring(Exception):
                    response = requests.get('http://127.0.0.1:8787/status/')
                    if response.ok:
                        break
                assert time() < start + 5
                sleep(0.01)
    with pytest.raises(Exception):
        response = requests.get('http://127.0.0.1:8787/status/')
Exemple #9
0
class LocalCluster(object):
    """ Create local Scheduler and Workers

    This creates a "cluster" of a scheduler and workers running on the local
    machine.

    Parameters
    ----------
    n_workers: int
        Number of workers to start
    threads_per_worker: int
        Number of threads per each worker
    nanny: boolean
        If true start the workers in separate processes managed by a nanny.
        If False keep the workers in the main calling process
    scheduler_port: int
        Port of the scheduler.  8786 by default, use 0 to choose a random port
    silence_logs: logging level
        Level of logs to print out to stdout.  ``logging.CRITICAL`` by default.
        Use a falsey value like False or None for no change.
    kwargs: dict
        Extra worker arguments, will be passed to the Worker constructor.

    Examples
    --------
    >>> c = LocalCluster()  # Create a local cluster with as many workers as cores  # doctest: +SKIP
    >>> c  # doctest: +SKIP
    LocalCluster("127.0.0.1:8786", workers=8, ncores=8)

    >>> c = Client(c)  # connect to local cluster  # doctest: +SKIP

    Add a new worker to the cluster
    >>> w = c.start_worker(ncores=2)  # doctest: +SKIP

    Shut down the extra worker
    >>> c.remove_worker(w)  # doctest: +SKIP

    Start a diagnostic web server and open a new browser tab
    >>> c.start_diagnostics_server(show=True)  # doctest: +SKIP
    """
    def __init__(self,
                 n_workers=None,
                 threads_per_worker=None,
                 nanny=True,
                 loop=None,
                 start=True,
                 scheduler_port=8786,
                 silence_logs=logging.CRITICAL,
                 diagnostics_port=8787,
                 services={},
                 worker_services={},
                 **kwargs):
        self.status = None
        self.nanny = nanny
        self.silence_logs = silence_logs
        if silence_logs:
            for l in [
                    'distributed.scheduler', 'distributed.worker',
                    'distributed.core', 'distributed.nanny'
            ]:
                logging.getLogger(l).setLevel(silence_logs)
        if n_workers is None and threads_per_worker is None:
            if nanny:
                n_workers = _ncores
                threads_per_worker = 1
            else:
                n_workers = 1
                threads_per_worker = _ncores
        if n_workers is None and threads_per_worker is not None:
            n_workers = max(1, _ncores // threads_per_worker)
        if n_workers and threads_per_worker is None:
            # Overcommit threads per worker, rather than undercommit
            threads_per_worker = max(1, int(math.ceil(_ncores / n_workers)))

        self.loop = loop or IOLoop()
        if start and not self.loop._running:
            self._thread = Thread(target=self.loop.start)
            self._thread.daemon = True
            self._thread.start()
            while not self.loop._running:
                sleep(0.001)

        self.scheduler = Scheduler(loop=self.loop,
                                   ip='127.0.0.1',
                                   services=services)
        self.scheduler.start(scheduler_port)
        self.workers = []

        if start:
            _start_worker = self.start_worker
        else:
            _start_worker = partial(self.loop.add_callback, self._start_worker)
        for i in range(n_workers):
            _start_worker(ncores=threads_per_worker,
                          nanny=nanny,
                          services=worker_services,
                          **kwargs)
        self.status = 'running'

        self.diagnostics = None
        if diagnostics_port is not None:
            self.start_diagnostics_server(diagnostics_port,
                                          silence=silence_logs)

    def __str__(self):
        return 'LocalCluster("%s", workers=%d, ncores=%d)' % (
            self.scheduler_address, len(
                self.workers), sum(w.ncores for w in self.workers))

    __repr__ = __str__

    @gen.coroutine
    def _start_worker(self, port=0, nanny=None, **kwargs):
        if nanny is None:
            nanny = self.nanny
        if nanny:
            W = Nanny
            kwargs['quiet'] = True
        else:
            W = Worker
        w = W(self.scheduler.ip,
              self.scheduler.port,
              loop=self.loop,
              silence_logs=self.silence_logs,
              **kwargs)
        yield w._start(port)

        self.workers.append(w)

        while w.worker_address not in self.scheduler.worker_info:
            yield gen.sleep(0.01)

        raise gen.Return(w)

    def start_worker(self, port=0, ncores=0, **kwargs):
        """ Add a new worker to the running cluster

        Parameters
        ----------
        port: int (optional)
            Port on which to serve the worker, defaults to 0 or random
        ncores: int (optional)
            Number of threads to use.  Defaults to number of logical cores
        nanny: boolean
            If true start worker in separate process managed by a nanny

        Examples
        --------
        >>> c = LocalCluster()  # doctest: +SKIP
        >>> c.start_worker(ncores=2)  # doctest: +SKIP

        Returns
        -------
        The created Worker or Nanny object.  Can be discarded.
        """
        return sync(self.loop,
                    self._start_worker,
                    port,
                    ncores=ncores,
                    **kwargs)

    @gen.coroutine
    def _stop_worker(self, w):
        yield w._close()
        self.workers.remove(w)

    def stop_worker(self, w):
        """ Stop a running worker

        Examples
        --------
        >>> c = LocalCluster()  # doctest: +SKIP
        >>> w = c.start_worker(ncores=2)  # doctest: +SKIP
        >>> c.stop_worker(w)  # doctest: +SKIP
        """
        sync(self.loop, self._stop_worker, w)

    def start_diagnostics_server(self,
                                 port=8787,
                                 show=False,
                                 silence=logging.CRITICAL):
        """ Start Diagnostics Web Server

        This starts a web application to show diagnostics of what is happening
        on the cluster.  This application runs in a separate process and is
        generally available at the following location:

            http://localhost:8787/status/
        """
        try:
            from distributed.bokeh.application import BokehWebInterface
        except ImportError:
            logger.info("To start diagnostics web server please install Bokeh")
            return
        from ..http.scheduler import HTTPScheduler

        assert self.diagnostics is None
        if 'http' not in self.scheduler.services:
            self.scheduler.services['http'] = HTTPScheduler(
                self.scheduler, io_loop=self.scheduler.loop)
            self.scheduler.services['http'].listen(0)
        self.diagnostics = BokehWebInterface(
            tcp_port=self.scheduler.port,
            http_port=self.scheduler.services['http'].port,
            bokeh_port=port,
            show=show,
            log_level=logging.getLevelName(silence).lower())

    @gen.coroutine
    def _close(self):
        with ignoring(gen.TimeoutError, StreamClosedError, OSError):
            yield All([w._close() for w in self.workers])
        with ignoring(gen.TimeoutError, StreamClosedError, OSError):
            yield self.scheduler.close(fast=True)
        del self.workers[:]
        if self.diagnostics:
            self.diagnostics.close()

    def close(self):
        """ Close the cluster """
        if self.status == 'running':
            self.status = 'closed'
            if self.loop._running:
                sync(self.loop, self._close)
            if hasattr(self, '_thread'):
                sync(self.loop, self.loop.stop)
                self._thread.join(timeout=1)
                self.loop.close()
                del self._thread

    @gen.coroutine
    def scale_up(self, n, **kwargs):
        """ Bring the total count of workers up to ``n``

        This function/coroutine should bring the total number of workers up to
        the number ``n``.

        This can be implemented either as a function or as a Tornado coroutine.
        """
        yield [
            self._start_worker(**kwargs) for i in range(n - len(self.workers))
        ]

    @gen.coroutine
    def scale_down(self, workers):
        """ Remove ``workers`` from the cluster

        Given a list of worker addresses this function should remove those
        workers from the cluster.  This may require tracking which jobs are
        associated to which worker address.

        This can be implemented either as a function or as a Tornado coroutine.
        """
        workers = set(workers)
        yield [
            self._stop_worker(w) for w in self.workers
            if w.worker_address in workers
        ]
        while workers & set(self.workers):
            yield gen.sleep(0.01)

    def __del__(self):
        self.close()

    def __enter__(self):
        return self

    def __exit__(self, *args):
        self.close()

    @property
    def scheduler_address(self):
        return self.scheduler.address
Exemple #10
0
class LocalCluster(object):
    """ Create local Scheduler and Workers

    This creates a "cluster" of a scheduler and workers running on the local
    machine.

    Parameters
    ----------
    n_workers: int
        Number of workers to start
    threads_per_worker: int
        Number of threads per each worker
    nanny: boolean
        If true start the workers in separate processes managed by a nanny.
        If False keep the workers in the main calling process
    scheduler_port: int
        Port of the scheduler.  8786 by default, use 0 to choose a random port
    silence_logs: logging level
        Level of logs to print out to stdout.  ``logging.CRITICAL`` by default.
        Use a falsey value like False or None for no change.
    kwargs: dict
        Extra worker arguments, will be passed to the Worker constructor.

    Examples
    --------
    >>> c = LocalCluster()  # Create a local cluster with as many workers as cores  # doctest: +SKIP
    >>> c  # doctest: +SKIP
    LocalCluster("127.0.0.1:8786", workers=8, ncores=8)

    >>> c = Client(c)  # connect to local cluster  # doctest: +SKIP

    Add a new worker to the cluster
    >>> w = c.start_worker(ncores=2)  # doctest: +SKIP

    Shut down the extra worker
    >>> c.remove_worker(w)  # doctest: +SKIP

    Start a diagnostic web server and open a new browser tab
    >>> c.start_diagnostics_server(show=True)  # doctest: +SKIP
    """
    def __init__(self, n_workers=None, threads_per_worker=None, nanny=True,
            loop=None, start=True, scheduler_port=8786,
            silence_logs=logging.CRITICAL, diagnostics_port=8787,
            services={}, worker_services={}, **kwargs):
        self.status = None
        self.nanny = nanny
        if silence_logs:
            for l in ['distributed.scheduler',
                      'distributed.worker',
                      'distributed.core',
                      'distributed.nanny']:
                logging.getLogger(l).setLevel(silence_logs)
        if n_workers is None and threads_per_worker is None:
            if nanny:
                n_workers = _ncores
                threads_per_worker = 1
            else:
                n_workers = 1
                threads_per_worker = _ncores
        if n_workers is None and threads_per_worker is not None:
            n_workers = max(1, _ncores // threads_per_worker)
        if n_workers and threads_per_worker is None:
            # Overcommit threads per worker, rather than undercommit
            threads_per_worker = max(1, int(math.ceil(_ncores / n_workers)))

        self.loop = loop or IOLoop()
        if start and not self.loop._running:
            self._thread = Thread(target=self.loop.start)
            self._thread.daemon = True
            self._thread.start()
            while not self.loop._running:
                sleep(0.001)

        self.scheduler = Scheduler(loop=self.loop, ip='127.0.0.1',
                                   services=services)
        self.scheduler.start(scheduler_port)
        self.workers = []

        if start:
            _start_worker = self.start_worker
        else:
            _start_worker = partial(self.loop.add_callback, self._start_worker)
        for i in range(n_workers):
            _start_worker(ncores=threads_per_worker, nanny=nanny,
                    services=worker_services, **kwargs)
        self.status = 'running'

        self.diagnostics = None
        if diagnostics_port is not None:
            self.start_diagnostics_server(diagnostics_port,
                                          silence=silence_logs)

    def __str__(self):
        return 'LocalCluster("%s", workers=%d, ncores=%d)' % (
                self.scheduler_address,
                len(self.workers),
                sum(w.ncores for w in self.workers))

    __repr__ = __str__

    @gen.coroutine
    def _start_worker(self, port=0, nanny=None, **kwargs):
        if nanny is None:
            nanny = self.nanny
        if nanny:
            W = Nanny
            kwargs['quiet'] = True
        else:
            W = Worker
        w = W(self.scheduler.ip, self.scheduler.port, loop=self.loop, **kwargs)
        yield w._start(port)

        self.workers.append(w)

        while w.worker_address not in self.scheduler.worker_info:
            yield gen.sleep(0.01)

        raise gen.Return(w)

    def start_worker(self, port=0, ncores=0, **kwargs):
        """ Add a new worker to the running cluster

        Parameters
        ----------
        port: int (optional)
            Port on which to serve the worker, defaults to 0 or random
        ncores: int (optional)
            Number of threads to use.  Defaults to number of logical cores
        nanny: boolean
            If true start worker in separate process managed by a nanny

        Examples
        --------
        >>> c = LocalCluster()  # doctest: +SKIP
        >>> c.start_worker(ncores=2)  # doctest: +SKIP

        Returns
        -------
        The created Worker or Nanny object.  Can be discarded.
        """
        return sync(self.loop, self._start_worker, port, ncores=ncores, **kwargs)

    @gen.coroutine
    def _stop_worker(self, w):
        yield w._close()
        self.workers.remove(w)

    def stop_worker(self, w):
        """ Stop a running worker

        Examples
        --------
        >>> c = LocalCluster()  # doctest: +SKIP
        >>> w = c.start_worker(ncores=2)  # doctest: +SKIP
        >>> c.stop_worker(w)  # doctest: +SKIP
        """
        sync(self.loop, self._stop_worker, w)

    def start_diagnostics_server(self, port=8787, show=False,
            silence=logging.CRITICAL):
        """ Start Diagnostics Web Server

        This starts a web application to show diagnostics of what is happening
        on the cluster.  This application runs in a separate process and is
        generally available at the following location:

            http://localhost:8787/status/
        """
        try:
            from distributed.bokeh.application import BokehWebInterface
        except ImportError:
            logger.info("To start diagnostics web server please install Bokeh")
            return
        from ..http.scheduler import HTTPScheduler

        assert self.diagnostics is None
        if 'http' not in self.scheduler.services:
            self.scheduler.services['http'] = HTTPScheduler(self.scheduler,
                    io_loop=self.scheduler.loop)
            self.scheduler.services['http'].listen(0)
        self.diagnostics = BokehWebInterface(
                tcp_port=self.scheduler.port,
                http_port=self.scheduler.services['http'].port,
                bokeh_port=port, show=show,
                log_level=logging.getLevelName(silence).lower())

    @gen.coroutine
    def _close(self):
        with ignoring(gen.TimeoutError, StreamClosedError, OSError):
            yield All([w._close() for w in self.workers])
        with ignoring(gen.TimeoutError, StreamClosedError, OSError):
            yield self.scheduler.close(fast=True)
        del self.workers[:]
        if self.diagnostics:
            self.diagnostics.close()

    def close(self):
        """ Close the cluster """
        if self.status == 'running':
            self.status = 'closed'
            if self.loop._running:
                sync(self.loop, self._close)
            if hasattr(self, '_thread'):
                sync(self.loop, self.loop.stop)
                self._thread.join(timeout=1)
                self.loop.close()
                del self._thread

    @gen.coroutine
    def scale_up(self, n, **kwargs):
        """ Bring the total count of workers up to ``n``

        This function/coroutine should bring the total number of workers up to
        the number ``n``.

        This can be implemented either as a function or as a Tornado coroutine.
        """
        yield [self._start_worker(**kwargs)
                for i in range(n - len(self.workers))]

    @gen.coroutine
    def scale_down(self, workers):
        """ Remove ``workers`` from the cluster

        Given a list of worker addresses this function should remove those
        workers from the cluster.  This may require tracking which jobs are
        associated to which worker address.

        This can be implemented either as a function or as a Tornado coroutine.
        """
        workers = set(workers)
        yield [self._stop_worker(w)
                for w in self.workers
                if w.worker_address in workers]
        while workers & set(self.workers):
            yield gen.sleep(0.01)

    def __del__(self):
        self.close()

    def __enter__(self):
        return self

    def __exit__(self, *args):
        self.close()

    @property
    def scheduler_address(self):
        return self.scheduler.address
Exemple #11
0
class LocalCluster(object):
    """ Create local Scheduler and Workers

    This creates a "cluster" of a scheduler and workers running on the local
    machine.

    Parameters
    ----------
    n_workers: int
        Number of workers to start
    threads_per_worker: int
        Number of threads per each worker
    nanny: boolean
        If true start the workers in separate processes managed by a nanny.
        If False keep the workers in the main calling process
    scheduler_port: int
        Port of the scheduler.  8786 by default, use 0 to choose a random port

    Examples
    --------
    >>> c = LocalCluster()  # Create a local cluster with as many workers as cores  # doctest: +SKIP
    >>> c  # doctest: +SKIP
    LocalCluster("127.0.0.1:8786", workers=8, ncores=8)

    >>> e = Executor(c)  # connect to local cluster  # doctest: +SKIP

    Add a new worker to the cluster
    >>> w = c.start_worker(ncores=2)  # doctest: +SKIP

    Shut down the extra worker
    >>> c.remove_worker(w)  # doctest: +SKIP

    Start a diagnostic web server and open a new browser tab
    >>> c.start_diagnostics_server(show=True)  # doctest: +SKIP
    """
    def __init__(self, n_workers=None, threads_per_worker=None, nanny=True,
            loop=None, start=True, scheduler_port=8786,
            silence_logs=logging.CRITICAL, diagnostics_port=8787,
            services={'http': HTTPScheduler}, **kwargs):
        self.status = None
        if silence_logs:
            for l in ['distributed.scheduler',
                      'distributed.worker',
                      'distributed.core',
                      'distributed.nanny']:
                logging.getLogger(l).setLevel(silence_logs)
        if n_workers is None and threads_per_worker is None:
            if nanny:
                n_workers = _ncores
                threads_per_worker = 1
            else:
                n_workers = 1
                threads_per_worker = _ncores
        if n_workers is None and threads_per_worker is not None:
            n_workers = max(1, _ncores // threads_per_worker)
        if n_workers and threads_per_worker is None:
            threads_per_worker = max(1, _ncores // n_workers)

        self.loop = loop or IOLoop()
        if not self.loop._running:
            self._thread = Thread(target=self.loop.start)
            self._thread.daemon = True
            self._thread.start()
            while not self.loop._running:
                sleep(0.001)

        self.scheduler = Scheduler(loop=self.loop, ip='127.0.0.1',
                                   services=services)
        self.scheduler.start(scheduler_port)
        self.workers = []

        if start:
            _start_worker = self.start_worker
        else:
            _start_worker = lambda *args, **kwargs: self.loop.add_callback(self._start_worker, *args, **kwargs)
        for i in range(n_workers):
            _start_worker(ncores=threads_per_worker, nanny=nanny)
        self.status = 'running'

        self.diagnostics = None
        if diagnostics_port is not None:
            self.start_diagnostics_server(diagnostics_port,
                                          silence=silence_logs)

    def __str__(self):
        return 'LocalCluster("%s", workers=%d, ncores=%d)' % (
                self.scheduler_address,
                len(self.workers),
                sum(w.ncores for w in self.workers))

    __repr__ = __str__

    @gen.coroutine
    def _start_worker(self, port=0, nanny=True, **kwargs):
        if nanny:
            W = Nanny
        else:
            W = Worker
        w = W(self.scheduler.ip, self.scheduler.port, loop=self.loop, **kwargs)
        yield w._start(port)
        self.workers.append(w)
        raise gen.Return(w)

    def start_worker(self, port=0, ncores=0, **kwargs):
        """ Add a new worker to the running cluster

        Parameters
        ----------
        port: int (optional)
            Port on which to serve the worker, defaults to 0 or random
        ncores: int (optional)
            Number of threads to use.  Defaults to number of logical cores
        nanny: boolean
            If true start worker in separate process managed by a nanny

        Examples
        --------
        >>> c = LocalCluster()  # doctest: +SKIP
        >>> c.start_worker(ncores=2)  # doctest: +SKIP

        Returns
        -------
        The created Worker or Nanny object.  Can be discarded.
        """
        return sync(self.loop, self._start_worker, port, ncores=ncores, **kwargs)

    @gen.coroutine
    def _stop_worker(self, w):
        yield w._close()
        self.workers.remove(w)

    def stop_worker(self, w):
        """ Stop a running worker

        Examples
        --------
        >>> c = LocalCluster()  # doctest: +SKIP
        >>> w = c.start_worker(ncores=2)  # doctest: +SKIP
        >>> c.stop_worker(w)  # doctest: +SKIP
        """
        sync(self.loop, self._stop_worker, w)

    def start_diagnostics_server(self, port=8787, show=False,
            silence=logging.CRITICAL):
        """ Start Diagnostics Web Server

        This starts a web application to show diagnostics of what is happening
        on the cluster.  This application runs in a separate process and is
        generally available at the following location:

            http://localhost:8787/status/
        """
        assert self.diagnostics is None
        if 'http' not in self.scheduler.services:
            self.scheduler.services['http'] = HTTPScheduler(self.scheduler,
                    io_loop=self.scheduler.loop)
            self.scheduler.services['http'].listen(0)
        from distributed.bokeh.application import BokehWebInterface
        self.diagnostics = BokehWebInterface(
                tcp_port=self.scheduler.port,
                http_port=self.scheduler.services['http'].port,
                bokeh_port=port, show=show,
                log_level=logging.getLevelName(silence).lower())

    @gen.coroutine
    def _close(self):
        with ignoring(gen.TimeoutError, StreamClosedError, OSError):
            yield All([w._close() for w in self.workers])
        with ignoring(gen.TimeoutError, StreamClosedError, OSError):
            yield self.scheduler.close(fast=True)
        del self.workers[:]
        if self.diagnostics:
            self.diagnostics.close()

    def close(self):
        """ Close the cluster """
        if self.status == 'running':
            self.status = 'closed'
            if self.loop._running:
                sync(self.loop, self._close)
            if hasattr(self, '_thread'):
                sync(self.loop, self.loop.stop)
                self._thread.join(timeout=1)
                self.loop.close()
                del self._thread

    def __del__(self):
        self.close()

    def __enter__(self):
        return self

    def __exit__(self, *args):
        self.close()

    @property
    def scheduler_address(self):
        return self.scheduler.address
Exemple #12
0
def main(host, port, http_port, bokeh_port, bokeh_internal_port, show, _bokeh,
         bokeh_whitelist, prefix, use_xheaders, pid_file, scheduler_file,
         interface):

    if pid_file:
        with open(pid_file, 'w') as f:
            f.write(str(os.getpid()))

        def del_pid_file():
            if os.path.exists(pid_file):
                os.remove(pid_file)

        atexit.register(del_pid_file)

    if sys.platform.startswith('linux'):
        import resource  # module fails importing on Windows
        soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
        limit = max(soft, hard // 2)
        resource.setrlimit(resource.RLIMIT_NOFILE, (limit, hard))

    if interface:
        if host:
            raise ValueError("Can not specify both interface and host")
        else:
            host = get_ip_interface(interface)

    addr = uri_from_host_port(host, port, 8786)

    loop = IOLoop.current()
    logger.info('-' * 47)

    services = {('http', http_port): HTTPScheduler}
    if _bokeh:
        with ignoring(ImportError):
            from distributed.bokeh.scheduler import BokehScheduler
            services[('bokeh', bokeh_internal_port)] = BokehScheduler
    scheduler = Scheduler(loop=loop,
                          services=services,
                          scheduler_file=scheduler_file)
    scheduler.start(addr)

    bokeh_proc = None
    if _bokeh:
        if bokeh_port == 0:  # This is a hack and not robust
            bokeh_port = open_port()  # This port may be taken by the OS
        try:  # before we successfully pass it to Bokeh
            from distributed.bokeh.application import BokehWebInterface
            bokeh_proc = BokehWebInterface(http_port=http_port,
                                           scheduler_address=scheduler.address,
                                           bokeh_port=bokeh_port,
                                           bokeh_whitelist=bokeh_whitelist,
                                           show=show,
                                           prefix=prefix,
                                           use_xheaders=use_xheaders,
                                           quiet=False)
        except ImportError:
            logger.info("Please install Bokeh to get Web UI")
        except Exception as e:
            logger.warn("Could not start Bokeh web UI", exc_info=True)

    logger.info('-' * 47)
    try:
        loop.start()
        loop.close()
    finally:
        scheduler.stop()
        if bokeh_proc:
            bokeh_proc.close()

        logger.info("End scheduler at %r", addr)
Exemple #13
0
class LocalCluster(object):
    """ Create local Scheduler and Workers

    This creates a "cluster" of a scheduler and workers running on the local
    machine.

    Parameters
    ----------
    n_workers: int
        Number of workers to start
    threads_per_worker: int
        Number of threads per each worker
    nanny: boolean
        If true start the workers in separate processes managed by a nanny.
        If False keep the workers in the main calling process
    scheduler_port: int
        Port of the scheduler.  8786 by default, use 0 to choose a random port

    Examples
    --------
    >>> c = LocalCluster()  # Create a local cluster with as many workers as cores  # doctest: +SKIP
    >>> c  # doctest: +SKIP
    LocalCluster("127.0.0.1:8786", workers=8, ncores=8)

    >>> e = Executor(c)  # connect to local cluster  # doctest: +SKIP

    Add a new worker to the cluster
    >>> w = c.start_worker(ncores=2)  # doctest: +SKIP

    Shut down the extra worker
    >>> c.remove_worker(w)  # doctest: +SKIP

    Start a diagnostic web server and open a new browser tab
    >>> c.start_diagnostics_server(show=True)  # doctest: +SKIP
    """
    def __init__(self, n_workers=None, threads_per_worker=None, nanny=True,
            loop=None, start=True, scheduler_port=8786,
            silence_logs=logging.CRITICAL, diagnostics_port=8787,
            services={'http': HTTPScheduler}, **kwargs):
        self.status = None
        if silence_logs:
            for l in ['distributed.scheduler',
                      'distributed.worker',
                      'distributed.core',
                      'distributed.nanny']:
                logging.getLogger(l).setLevel(silence_logs)
        if n_workers is None and threads_per_worker is None:
            if nanny:
                n_workers = _ncores
                threads_per_worker = 1
            else:
                n_workers = 1
                threads_per_worker = _ncores
        if n_workers is None and threads_per_worker is not None:
            n_workers = max(1, _ncores // threads_per_worker)
        if n_workers and threads_per_worker is None:
            threads_per_worker = max(1, _ncores // n_workers)

        self.loop = loop or IOLoop()
        if not self.loop._running:
            self._thread = Thread(target=self.loop.start)
            self._thread.daemon = True
            self._thread.start()
            while not self.loop._running:
                sleep(0.001)

        self.scheduler = Scheduler(loop=self.loop, ip='127.0.0.1',
                                   services=services)
        self.scheduler.start(scheduler_port)
        self.workers = []

        if start:
            _start_worker = self.start_worker
        else:
            _start_worker = lambda *args, **kwargs: self.loop.add_callback(self._start_worker, *args, **kwargs)
        for i in range(n_workers):
            _start_worker(ncores=threads_per_worker, nanny=nanny)
        self.status = 'running'

        self.diagnostics = None
        if diagnostics_port is not None:
            self.start_diagnostics_server(diagnostics_port,
                                          silence=silence_logs)

    def __str__(self):
        return 'LocalCluster("%s", workers=%d, ncores=%d)' % (
                self.scheduler_address,
                len(self.workers),
                sum(w.ncores for w in self.workers))

    __repr__ = __str__

    @gen.coroutine
    def _start_worker(self, port=0, nanny=True, **kwargs):
        if nanny:
            W = Nanny
        else:
            W = Worker
        w = W(self.scheduler.ip, self.scheduler.port, loop=self.loop, **kwargs)
        yield w._start(port)
        self.workers.append(w)
        raise gen.Return(w)

    def start_worker(self, port=0, ncores=0, **kwargs):
        """ Add a new worker to the running cluster

        Parameters
        ----------
        port: int (optional)
            Port on which to serve the worker, defaults to 0 or random
        ncores: int (optional)
            Number of threads to use.  Defaults to number of logical cores
        nanny: boolean
            If true start worker in separate process managed by a nanny

        Examples
        --------
        >>> c = LocalCluster()  # doctest: +SKIP
        >>> c.start_worker(ncores=2)  # doctest: +SKIP

        Returns
        -------
        The created Worker or Nanny object.  Can be discarded.
        """
        return sync(self.loop, self._start_worker, port, ncores=ncores, **kwargs)

    @gen.coroutine
    def _stop_worker(self, w):
        yield w._close()
        self.workers.remove(w)

    def stop_worker(self, w):
        """ Stop a running worker

        Examples
        --------
        >>> c = LocalCluster()  # doctest: +SKIP
        >>> w = c.start_worker(ncores=2)  # doctest: +SKIP
        >>> c.stop_worker(w)  # doctest: +SKIP
        """
        sync(self.loop, self._stop_worker, w)

    def start_diagnostics_server(self, port=8787, show=False,
            silence=logging.CRITICAL):
        """ Start Diagnostics Web Server

        This starts a web application to show diagnostics of what is happening
        on the cluster.  This application runs in a separate process and is
        generally available at the following location:

            http://localhost:8787/status/
        """
        try:
            from distributed.bokeh.application import BokehWebInterface
        except ImportError:
            logger.info("To start diagnostics web server please install Bokeh")
            return

        assert self.diagnostics is None
        if 'http' not in self.scheduler.services:
            self.scheduler.services['http'] = HTTPScheduler(self.scheduler,
                    io_loop=self.scheduler.loop)
            self.scheduler.services['http'].listen(0)
        self.diagnostics = BokehWebInterface(
                tcp_port=self.scheduler.port,
                http_port=self.scheduler.services['http'].port,
                bokeh_port=port, show=show,
                log_level=logging.getLevelName(silence).lower())

    @gen.coroutine
    def _close(self):
        with ignoring(gen.TimeoutError, StreamClosedError, OSError):
            yield All([w._close() for w in self.workers])
        with ignoring(gen.TimeoutError, StreamClosedError, OSError):
            yield self.scheduler.close(fast=True)
        del self.workers[:]
        if self.diagnostics:
            self.diagnostics.close()

    def close(self):
        """ Close the cluster """
        if self.status == 'running':
            self.status = 'closed'
            if self.loop._running:
                sync(self.loop, self._close)
            if hasattr(self, '_thread'):
                sync(self.loop, self.loop.stop)
                self._thread.join(timeout=1)
                self.loop.close()
                del self._thread

    def __del__(self):
        self.close()

    def __enter__(self):
        return self

    def __exit__(self, *args):
        self.close()

    @property
    def scheduler_address(self):
        return self.scheduler.address
Exemple #14
0
def main(host, port, http_port, bokeh_port, bokeh_external_port,
         bokeh_internal_port, show, _bokeh, bokeh_whitelist, bokeh_prefix,
         use_xheaders, pid_file, scheduler_file, interface, local_directory,
         preload, prefix, tls_ca_file, tls_cert, tls_key):

    if bokeh_internal_port:
        print("The --bokeh-internal-port keyword has been removed.\n"
              "The internal bokeh server is now the default bokeh server.\n"
              "Use --bokeh-port %d instead" % bokeh_internal_port)
        sys.exit(1)

    if prefix:
        print("The --prefix keyword has moved to --bokeh-prefix")
        sys.exit(1)

    sec = Security(
        tls_ca_file=tls_ca_file,
        tls_scheduler_cert=tls_cert,
        tls_scheduler_key=tls_key,
    )

    if pid_file:
        with open(pid_file, 'w') as f:
            f.write(str(os.getpid()))

        def del_pid_file():
            if os.path.exists(pid_file):
                os.remove(pid_file)

        atexit.register(del_pid_file)

    local_directory_created = False
    if local_directory:
        if not os.path.exists(local_directory):
            os.mkdir(local_directory)
            local_directory_created = True
    else:
        local_directory = tempfile.mkdtemp(prefix='scheduler-')
        local_directory_created = True
    if local_directory not in sys.path:
        sys.path.insert(0, local_directory)

    if sys.platform.startswith('linux'):
        import resource  # module fails importing on Windows
        soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
        limit = max(soft, hard // 2)
        resource.setrlimit(resource.RLIMIT_NOFILE, (limit, hard))

    if interface:
        if host:
            raise ValueError("Can not specify both interface and host")
        else:
            host = get_ip_interface(interface)

    addr = uri_from_host_port(host, port, 8786)

    loop = IOLoop.current()
    logger.info('-' * 47)

    services = {('http', http_port): HTTPScheduler}
    if _bokeh:
        with ignoring(ImportError):
            from distributed.bokeh.scheduler import BokehScheduler
            services[('bokeh', bokeh_port)] = partial(BokehScheduler,
                                                      prefix=bokeh_prefix)
    scheduler = Scheduler(loop=loop,
                          services=services,
                          scheduler_file=scheduler_file,
                          security=sec)
    scheduler.start(addr)
    preload_modules(preload, parameter=scheduler, file_dir=local_directory)

    bokeh_proc = None
    if _bokeh and bokeh_external_port is not None:
        if bokeh_external_port == 0:  # This is a hack and not robust
            bokeh_external_port = open_port(
            )  # This port may be taken by the OS
        try:  # before we successfully pass it to Bokeh
            from distributed.bokeh.application import BokehWebInterface
            bokeh_proc = BokehWebInterface(http_port=http_port,
                                           scheduler_address=scheduler.address,
                                           bokeh_port=bokeh_external_port,
                                           bokeh_whitelist=bokeh_whitelist,
                                           show=show,
                                           prefix=bokeh_prefix,
                                           use_xheaders=use_xheaders,
                                           quiet=False)
        except ImportError:
            logger.info("Please install Bokeh to get Web UI")
        except Exception as e:
            logger.warning("Could not start Bokeh web UI", exc_info=True)

    logger.info('Local Directory: %26s', local_directory)
    logger.info('-' * 47)
    try:
        loop.start()
        loop.close()
    finally:
        scheduler.stop()
        if bokeh_proc:
            bokeh_proc.close()
        if local_directory_created:
            shutil.rmtree(local_directory)

        logger.info("End scheduler at %r", addr)
Exemple #15
0
def main(host, port, http_port, bokeh_port, show, _bokeh, bokeh_whitelist, prefix, use_xheaders, pid_file):

    if pid_file:
        with open(pid_file, "w") as f:
            f.write(str(os.getpid()))

        def del_pid_file():
            if os.path.exists(pid_file):
                os.remove(pid_file)

        atexit.register(del_pid_file)

    if sys.platform.startswith("linux"):
        import resource  # module fails importing on Windows

        soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
        limit = max(soft, hard // 2)
        resource.setrlimit(resource.RLIMIT_NOFILE, (limit, hard))

    given_host = host
    host = host or get_ip()
    if ":" in host and port == 8786:
        host, port = host.rsplit(":", 1)
        port = int(port)
    ip = socket.gethostbyname(host)
    loop = IOLoop.current()
    logger.info("-" * 47)

    services = {("http", http_port): HTTPScheduler}
    if _bokeh:
        from distributed.bokeh.scheduler import BokehScheduler

        services[("bokeh", 8788)] = BokehScheduler
    scheduler = Scheduler(ip=ip, loop=loop, services=services)
    scheduler.start(port)

    bokeh_proc = None
    if _bokeh:
        try:
            from distributed.bokeh.application import BokehWebInterface

            bokeh_proc = BokehWebInterface(
                host=host,
                http_port=http_port,
                tcp_port=port,
                bokeh_port=bokeh_port,
                bokeh_whitelist=bokeh_whitelist,
                show=show,
                prefix=prefix,
                use_xheaders=use_xheaders,
                quiet=False,
            )
        except ImportError:
            logger.info("Please install Bokeh to get Web UI")
        except Exception as e:
            logger.warn("Could not start Bokeh web UI", exc_info=True)

    logger.info("-" * 47)
    try:
        loop.start()
        loop.close()
    finally:
        scheduler.stop()
        if bokeh_proc:
            bokeh_proc.close()

        logger.info("End scheduler at %s:%d", ip, port)