Exemple #1
0
def main(scheduler_file, interface, nthreads, local_directory, memory_limit,
         scheduler, bokeh_port, bokeh_prefix, nanny, bokeh_worker_port):
    if interface:
        host = get_ip_interface(interface)
    else:
        host = None

    if rank == 0 and scheduler:
        try:
            from distributed.bokeh.scheduler import BokehScheduler
        except ImportError:
            services = {}
        else:
            services = {
                ('bokeh', bokeh_port): partial(BokehScheduler,
                                               prefix=bokeh_prefix)
            }
        scheduler = Scheduler(scheduler_file=scheduler_file,
                              loop=loop,
                              services=services)
        addr = uri_from_host_port(host, None, 8786)
        scheduler.start(addr)
        try:
            loop.start()
            loop.close()
        finally:
            scheduler.stop()
    else:
        W = Nanny if nanny else Worker
        worker = W(scheduler_file=scheduler_file,
                   loop=loop,
                   name=rank if scheduler else None,
                   ncores=nthreads,
                   local_dir=local_directory,
                   services={('bokeh', bokeh_worker_port): BokehWorker},
                   memory_limit=memory_limit)
        addr = uri_from_host_port(host, None, 0)

        @gen.coroutine
        def run():
            yield worker._start(addr)
            while worker.status != 'closed':
                yield gen.sleep(0.2)

        try:
            loop.run_sync(run)
            loop.close()
        finally:
            pass

        @gen.coroutine
        def close():
            yield worker._close(timeout=2)

        loop.run_sync(close)
Exemple #2
0
def main(scheduler_file, interface, nthreads, local_directory, memory_limit,
         scheduler, bokeh_port, bokeh_prefix, nanny, bokeh_worker_port):
    if interface:
        host = get_ip_interface(interface)
    else:
        host = None

    if rank == 0 and scheduler:
        try:
            from distributed.bokeh.scheduler import BokehScheduler
        except ImportError:
            services = {}
        else:
            services = {('bokeh',  bokeh_port): partial(BokehScheduler,
                                                        prefix=bokeh_prefix)}
        scheduler = Scheduler(scheduler_file=scheduler_file,
                              loop=loop,
                              services=services)
        addr = uri_from_host_port(host, None, 8786)
        scheduler.start(addr)
        try:
            loop.start()
            loop.close()
        finally:
            scheduler.stop()
    else:
        W = Nanny if nanny else Worker
        worker = W(scheduler_file=scheduler_file,
                   loop=loop,
                   name=rank if scheduler else None,
                   ncores=nthreads,
                   local_dir=local_directory,
                   services={('bokeh', bokeh_worker_port): BokehWorker},
                   memory_limit=memory_limit)
        addr = uri_from_host_port(host, None, 0)

        @gen.coroutine
        def run():
            yield worker._start(addr)
            while worker.status != 'closed':
                yield gen.sleep(0.2)

        try:
            loop.run_sync(run)
            loop.close()
        finally:
            pass

        @gen.coroutine
        def close():
            yield worker._close(timeout=2)

        loop.run_sync(close)
Exemple #3
0
def create_scheduler(loop,
                     scheduler_file=None,
                     host=None,
                     bokeh=True,
                     bokeh_port=8787,
                     bokeh_prefix=None,
                     scheduler_port=None):
    try:
        from distributed.bokeh.scheduler import BokehScheduler
    except ImportError:
        BokehScheduler = None

    if bokeh and BokehScheduler:
        services = {
            ('bokeh', bokeh_port): partial(BokehScheduler, prefix=bokeh_prefix)
        }
    else:
        services = {}

    scheduler = Scheduler(loop=loop,
                          services=services,
                          scheduler_file=scheduler_file)
    addr = uri_from_host_port(host, scheduler_port, 8786)
    scheduler.start(addr)
    return scheduler
Exemple #4
0
def main(host, port, http_port, bokeh_port, bokeh_internal_port, show, _bokeh,
         bokeh_whitelist, prefix, use_xheaders, pid_file):

    if pid_file:
        with open(pid_file, 'w') as f:
            f.write(str(os.getpid()))

        def del_pid_file():
            if os.path.exists(pid_file):
                os.remove(pid_file)

        atexit.register(del_pid_file)

    if sys.platform.startswith('linux'):
        import resource  # module fails importing on Windows
        soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
        limit = max(soft, hard // 2)
        resource.setrlimit(resource.RLIMIT_NOFILE, (limit, hard))

    addr = uri_from_host_port(host, port, 8786)

    loop = IOLoop.current()
    logger.info('-' * 47)

    services = {('http', http_port): HTTPScheduler}
    if _bokeh:
        with ignoring(ImportError):
            from distributed.bokeh.scheduler import BokehScheduler
            services[('bokeh', bokeh_internal_port)] = BokehScheduler
    scheduler = Scheduler(loop=loop, services=services)
    scheduler.start(addr)

    bokeh_proc = None
    if _bokeh:
        try:
            from distributed.bokeh.application import BokehWebInterface
            bokeh_proc = BokehWebInterface(http_port=http_port,
                                           tcp_port=scheduler.port,
                                           bokeh_port=bokeh_port,
                                           bokeh_whitelist=bokeh_whitelist,
                                           show=show,
                                           prefix=prefix,
                                           use_xheaders=use_xheaders,
                                           quiet=False)
        except ImportError:
            logger.info("Please install Bokeh to get Web UI")
        except Exception as e:
            logger.warn("Could not start Bokeh web UI", exc_info=True)

    logger.info('-' * 47)
    try:
        loop.start()
        loop.close()
    finally:
        scheduler.stop()
        if bokeh_proc:
            bokeh_proc.close()

        logger.info("End scheduler at %r", addr)
Exemple #5
0
def main(scheduler_file, interface, nthreads, local_directory, memory_limit,
         scheduler):
    if interface:
        host = get_ip_interface(interface)
    else:
        host = None

    if rank == 0 and scheduler:
        scheduler = Scheduler(scheduler_file=scheduler_file,
                              loop=loop,
                              services={('bokeh', 8787): BokehScheduler})
        addr = uri_from_host_port(host, None, 8786)
        scheduler.start(addr)
        try:
            loop.start()
            loop.close()
        finally:
            scheduler.stop()
    else:
        worker = Worker(scheduler_file=scheduler_file,
                        loop=loop,
                        name=rank if scheduler else None,
                        ncores=nthreads,
                        local_dir=local_directory,
                        services={'bokeh': BokehWorker},
                        memory_limit=memory_limit)
        addr = uri_from_host_port(host, None, 0)

        @gen.coroutine
        def run():
            yield worker._start(addr)
            while worker.status != 'closed':
                yield gen.sleep(0.2)

        try:
            loop.run_sync(run)
            loop.close()
        finally:
            pass

        @gen.coroutine
        def close():
            yield worker._close(timeout=2)

        loop.run_sync(close)
Exemple #6
0
def create_and_run_worker(loop,
                          host=None,
                          rank=0,
                          scheduler_file=None,
                          nanny=False,
                          local_directory='',
                          nthreads=0,
                          memory_limit='auto',
                          bokeh=True,
                          bokeh_port=8789,
                          bokeh_prefix=None):
    try:
        from distributed.bokeh.worker import BokehWorker
    except ImportError:
        BokehWorker = None

    if bokeh and BokehWorker:
        services = {
            ('bokeh', bokeh_port): partial(BokehWorker, prefix=bokeh_prefix)
        }
    else:
        services = {}

    W = Nanny if nanny else Worker
    worker = W(scheduler_file=scheduler_file,
               loop=loop,
               name='mpi-rank-%d' % rank,
               ncores=nthreads,
               local_dir=local_directory,
               services=services,
               memory_limit=memory_limit)
    addr = uri_from_host_port(host, None, 0)

    @gen.coroutine
    def run_until_closed():
        yield worker._start(addr)
        while worker.status != 'closed':
            yield gen.sleep(0.2)

    loop = worker.loop
    try:
        loop.run_sync(run_until_closed)
    finally:

        @gen.coroutine
        def close():
            yield worker._close(timeout=2)

        loop.run_sync(close)
        loop.close()
Exemple #7
0
def scheduler():  # pragma: nocover
    app_client = skein.ApplicationClient.from_current()

    enable_proctitle_on_current()
    enable_proctitle_on_children()

    if sys.platform.startswith('linux'):
        import resource  # module fails importing on Windows
        soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
        limit = max(soft, hard // 2)
        resource.setrlimit(resource.RLIMIT_NOFILE, (limit, hard))

    addr = uri_from_host_port('', None, 0)

    loop = IOLoop.current()

    services = {}
    bokeh = False
    with ignoring(ImportError):
        from distributed.bokeh.scheduler import BokehScheduler
        services[('bokeh', 0)] = (BokehScheduler, {})
        bokeh = True

    scheduler = Scheduler(loop=loop, services=services)
    scheduler.start(addr)

    install_signal_handlers(loop)

    # Set dask.dashboard before dask.scheduler since the YarnCluster object
    # waits on dask.scheduler only
    if bokeh:
        bokeh_port = scheduler.services['bokeh'].port
        bokeh_host = urlparse(scheduler.address).hostname
        bokeh_address = 'http://%s:%d' % (bokeh_host, bokeh_port)

        app_client.kv['dask.dashboard'] = bokeh_address.encode()

    app_client.kv['dask.scheduler'] = scheduler.address.encode()

    try:
        loop.start()
        loop.close()
    finally:
        scheduler.stop()
Exemple #8
0
def main(host, port, bokeh_port, show, _bokeh, bokeh_whitelist, bokeh_prefix,
         use_xheaders, pid_file, scheduler_file, interface, local_directory,
         preload, preload_argv, tls_ca_file, tls_cert, tls_key):

    enable_proctitle_on_current()
    enable_proctitle_on_children()

    sec = Security(
        tls_ca_file=tls_ca_file,
        tls_scheduler_cert=tls_cert,
        tls_scheduler_key=tls_key,
    )

    if not host and (tls_ca_file or tls_cert or tls_key):
        host = 'tls://'

    if pid_file:
        with open(pid_file, 'w') as f:
            f.write(str(os.getpid()))

        def del_pid_file():
            if os.path.exists(pid_file):
                os.remove(pid_file)

        atexit.register(del_pid_file)

    local_directory_created = False
    if local_directory:
        if not os.path.exists(local_directory):
            os.mkdir(local_directory)
            local_directory_created = True
    else:
        local_directory = tempfile.mkdtemp(prefix='scheduler-')
        local_directory_created = True
    if local_directory not in sys.path:
        sys.path.insert(0, local_directory)

    if sys.platform.startswith('linux'):
        import resource  # module fails importing on Windows
        soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
        limit = max(soft, hard // 2)
        resource.setrlimit(resource.RLIMIT_NOFILE, (limit, hard))

    if interface:
        if host:
            raise ValueError("Can not specify both interface and host")
        else:
            host = get_ip_interface(interface)

    addr = uri_from_host_port(host, port, 8786)

    loop = IOLoop.current()
    logger.info('-' * 47)

    services = {}
    if _bokeh:
        with ignoring(ImportError):
            from distributed.bokeh.scheduler import BokehScheduler
            services[('bokeh', bokeh_port)] = (BokehScheduler, {
                'prefix': bokeh_prefix
            })
    scheduler = Scheduler(loop=loop,
                          services=services,
                          scheduler_file=scheduler_file,
                          security=sec)
    scheduler.start(addr)
    if not preload:
        preload = dask.config.get('distributed.scheduler.preload')
    if not preload_argv:
        preload_argv = dask.config.get('distributed.scheduler.preload-argv')
    preload_modules(preload,
                    parameter=scheduler,
                    file_dir=local_directory,
                    argv=preload_argv)

    logger.info('Local Directory: %26s', local_directory)
    logger.info('-' * 47)

    install_signal_handlers(loop)

    try:
        loop.start()
        loop.close()
    finally:
        scheduler.stop()
        if local_directory_created:
            shutil.rmtree(local_directory)

        logger.info("End scheduler at %r", addr)
def main(host, port, bokeh_port, show, _bokeh, bokeh_whitelist, bokeh_prefix,
        use_xheaders, pid_file, scheduler_file, interface,
        local_directory, preload, preload_argv, tls_ca_file, tls_cert, tls_key):

    enable_proctitle_on_current()
    enable_proctitle_on_children()

    sec = Security(tls_ca_file=tls_ca_file,
                   tls_scheduler_cert=tls_cert,
                   tls_scheduler_key=tls_key,
                   )

    if not host and (tls_ca_file or tls_cert or tls_key):
        host = 'tls://'

    if pid_file:
        with open(pid_file, 'w') as f:
            f.write(str(os.getpid()))

        def del_pid_file():
            if os.path.exists(pid_file):
                os.remove(pid_file)
        atexit.register(del_pid_file)

    local_directory_created = False
    if local_directory:
        if not os.path.exists(local_directory):
            os.mkdir(local_directory)
            local_directory_created = True
    else:
        local_directory = tempfile.mkdtemp(prefix='scheduler-')
        local_directory_created = True
    if local_directory not in sys.path:
        sys.path.insert(0, local_directory)

    if sys.platform.startswith('linux'):
        import resource   # module fails importing on Windows
        soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
        limit = max(soft, hard // 2)
        resource.setrlimit(resource.RLIMIT_NOFILE, (limit, hard))

    if interface:
        if host:
            raise ValueError("Can not specify both interface and host")
        else:
            host = get_ip_interface(interface)

    addr = uri_from_host_port(host, port, 8786)

    loop = IOLoop.current()
    logger.info('-' * 47)

    services = {}
    if _bokeh:
        try:
            from distributed.bokeh.scheduler import BokehScheduler
            services[('bokeh', bokeh_port)] = (BokehScheduler,
                                               {'prefix': bokeh_prefix})
        except ImportError as error:
            if str(error).startswith('No module named'):
                logger.info('Web dashboard not loaded.  Unable to import bokeh')
            else:
                logger.info('Unable to import bokeh: %s' % str(error))

    scheduler = Scheduler(loop=loop, services=services,
                          scheduler_file=scheduler_file,
                          security=sec)
    scheduler.start(addr)
    if not preload:
        preload = dask.config.get('distributed.scheduler.preload')
    if not preload_argv:
        preload_argv = dask.config.get('distributed.scheduler.preload-argv')
    preload_modules(preload, parameter=scheduler, file_dir=local_directory, argv=preload_argv)

    logger.info('Local Directory: %26s', local_directory)
    logger.info('-' * 47)

    install_signal_handlers(loop)

    try:
        loop.start()
        loop.close()
    finally:
        scheduler.stop()
        if local_directory_created:
            shutil.rmtree(local_directory)

        logger.info("End scheduler at %r", addr)
def main(
    scheduler,
    host,
    worker_port,
    listen_address,
    contact_address,
    nanny_port,
    nthreads,
    nprocs,
    nanny,
    name,
    memory_limit,
    pid_file,
    reconnect,
    resources,
    bokeh,
    bokeh_port,
    local_directory,
    scheduler_file,
    interface,
    death_timeout,
    preload,
    preload_argv,
    bokeh_prefix,
    tls_ca_file,
    tls_cert,
    tls_key,
    dashboard_address,
):
    g0, g1, g2 = gc.get_threshold(
    )  # https://github.com/dask/distributed/issues/1653
    gc.set_threshold(g0 * 3, g1 * 3, g2 * 3)

    enable_proctitle_on_current()
    enable_proctitle_on_children()

    if bokeh_port is not None:
        warnings.warn(
            "The --bokeh-port flag has been renamed to --dashboard-address. "
            "Consider adding ``--dashboard-address :%d`` " % bokeh_port)
        dashboard_address = bokeh_port

    sec = Security(tls_ca_file=tls_ca_file,
                   tls_worker_cert=tls_cert,
                   tls_worker_key=tls_key)

    if nprocs > 1 and worker_port != 0:
        logger.error(
            "Failed to launch worker.  You cannot use the --port argument when nprocs > 1."
        )
        exit(1)

    if nprocs > 1 and not nanny:
        logger.error(
            "Failed to launch worker.  You cannot use the --no-nanny argument when nprocs > 1."
        )
        exit(1)

    if contact_address and not listen_address:
        logger.error(
            "Failed to launch worker. "
            "Must specify --listen-address when --contact-address is given")
        exit(1)

    if nprocs > 1 and listen_address:
        logger.error("Failed to launch worker. "
                     "You cannot specify --listen-address when nprocs > 1.")
        exit(1)

    if (worker_port or host) and listen_address:
        logger.error(
            "Failed to launch worker. "
            "You cannot specify --listen-address when --worker-port or --host is given."
        )
        exit(1)

    try:
        if listen_address:
            (host, worker_port) = get_address_host_port(listen_address,
                                                        strict=True)

        if contact_address:
            # we only need this to verify it is getting parsed
            (_, _) = get_address_host_port(contact_address, strict=True)
        else:
            # if contact address is not present we use the listen_address for contact
            contact_address = listen_address
    except ValueError as e:
        logger.error("Failed to launch worker. " + str(e))
        exit(1)

    if nanny:
        port = nanny_port
    else:
        port = worker_port

    if not nthreads:
        nthreads = _ncores // nprocs

    if pid_file:
        with open(pid_file, "w") as f:
            f.write(str(os.getpid()))

        def del_pid_file():
            if os.path.exists(pid_file):
                os.remove(pid_file)

        atexit.register(del_pid_file)

    services = {}

    if bokeh:
        try:
            from distributed.bokeh.worker import BokehWorker
        except ImportError:
            pass
        else:
            if bokeh_prefix:
                result = (BokehWorker, {"prefix": bokeh_prefix})
            else:
                result = BokehWorker
            services[("bokeh", dashboard_address)] = result

    if resources:
        resources = resources.replace(",", " ").split()
        resources = dict(pair.split("=") for pair in resources)
        resources = valmap(float, resources)
    else:
        resources = None

    loop = IOLoop.current()

    if nanny:
        kwargs = {"worker_port": worker_port, "listen_address": listen_address}
        t = Nanny
    else:
        kwargs = {}
        if nanny_port:
            kwargs["service_ports"] = {"nanny": nanny_port}
        t = Worker

    if (not scheduler and not scheduler_file
            and dask.config.get("scheduler-address", None) is None):
        raise ValueError("Need to provide scheduler address like\n"
                         "dask-worker SCHEDULER_ADDRESS:8786")

    if interface:
        if host:
            raise ValueError("Can not specify both interface and host")
        else:
            host = get_ip_interface(interface)

    if host or port:
        addr = uri_from_host_port(host, port, 0)
    else:
        # Choose appropriate address for scheduler
        addr = None

    if death_timeout is not None:
        death_timeout = parse_timedelta(death_timeout, "s")

    nannies = [
        t(scheduler,
          scheduler_file=scheduler_file,
          ncores=nthreads,
          services=services,
          loop=loop,
          resources=resources,
          memory_limit=memory_limit,
          reconnect=reconnect,
          local_dir=local_directory,
          death_timeout=death_timeout,
          preload=preload,
          preload_argv=preload_argv,
          security=sec,
          contact_address=contact_address,
          name=name if nprocs == 1 or not name else name + "-" + str(i),
          **kwargs) for i in range(nprocs)
    ]

    @gen.coroutine
    def close_all():
        # Unregister all workers from scheduler
        if nanny:
            yield [n.close(timeout=2) for n in nannies]

    def on_signal(signum):
        logger.info("Exiting on signal %d", signum)
        close_all()

    @gen.coroutine
    def run():
        yield [n._start(addr) for n in nannies]
        while all(n.status != "closed" for n in nannies):
            yield gen.sleep(0.2)

    install_signal_handlers(loop, cleanup=on_signal)

    try:
        loop.run_sync(run)
    except (KeyboardInterrupt, TimeoutError):
        pass
    finally:
        logger.info("End worker")
Exemple #11
0
def main(host, port, http_port, bokeh_port, bokeh_internal_port, show, _bokeh,
         bokeh_whitelist, bokeh_prefix, use_xheaders, pid_file, scheduler_file,
         interface, local_directory, preload, prefix, tls_ca_file, tls_cert,
         tls_key):

    if bokeh_internal_port:
        print("The --bokeh-internal-port keyword has been removed.\n"
              "The internal bokeh server is now the default bokeh server.\n"
              "Use --bokeh-port %d instead" % bokeh_internal_port)
        sys.exit(1)

    if prefix:
        print("The --prefix keyword has moved to --bokeh-prefix")
        sys.exit(1)

    sec = Security(
        tls_ca_file=tls_ca_file,
        tls_scheduler_cert=tls_cert,
        tls_scheduler_key=tls_key,
    )

    if pid_file:
        with open(pid_file, 'w') as f:
            f.write(str(os.getpid()))

        def del_pid_file():
            if os.path.exists(pid_file):
                os.remove(pid_file)

        atexit.register(del_pid_file)

    local_directory_created = False
    if local_directory:
        if not os.path.exists(local_directory):
            os.mkdir(local_directory)
            local_directory_created = True
    else:
        local_directory = tempfile.mkdtemp(prefix='scheduler-')
        local_directory_created = True
    if local_directory not in sys.path:
        sys.path.insert(0, local_directory)

    if sys.platform.startswith('linux'):
        import resource  # module fails importing on Windows
        soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
        limit = max(soft, hard // 2)
        resource.setrlimit(resource.RLIMIT_NOFILE, (limit, hard))

    if interface:
        if host:
            raise ValueError("Can not specify both interface and host")
        else:
            host = get_ip_interface(interface)

    addr = uri_from_host_port(host, port, 8786)

    loop = IOLoop.current()
    logger.info('-' * 47)

    services = {('http', http_port): HTTPScheduler}
    if _bokeh:
        with ignoring(ImportError):
            from distributed.bokeh.scheduler import BokehScheduler
            services[('bokeh', bokeh_port)] = partial(BokehScheduler,
                                                      prefix=bokeh_prefix)
    scheduler = Scheduler(loop=loop,
                          services=services,
                          scheduler_file=scheduler_file,
                          security=sec)
    scheduler.start(addr)
    preload_modules(preload, parameter=scheduler, file_dir=local_directory)

    logger.info('Local Directory: %26s', local_directory)
    logger.info('-' * 47)
    try:
        loop.start()
        loop.close()
    finally:
        scheduler.stop()
        if local_directory_created:
            shutil.rmtree(local_directory)

        logger.info("End scheduler at %r", addr)
Exemple #12
0
def main(
    scheduler,
    host,
    nthreads,
    name,
    memory_limit,
    pid_file,
    reconnect,
    resources,
    bokeh,
    bokeh_port,
    local_directory,
    scheduler_file,
    interface,
    death_timeout,
    preload,
    preload_argv,
    bokeh_prefix,
    tls_ca_file,
    tls_cert,
    tls_key,
):
    enable_proctitle_on_current()
    enable_proctitle_on_children()

    sec = Security(
        tls_ca_file=tls_ca_file, tls_worker_cert=tls_cert, tls_worker_key=tls_key
    )

    try:
        nprocs = len(os.environ["CUDA_VISIBLE_DEVICES"].split(","))
    except KeyError:
        nprocs = get_n_gpus()

    if not nthreads:
        nthreads = min(1, _ncores // nprocs )

    if pid_file:
        with open(pid_file, "w") as f:
            f.write(str(os.getpid()))

        def del_pid_file():
            if os.path.exists(pid_file):
                os.remove(pid_file)

        atexit.register(del_pid_file)

    services = {}

    if bokeh:
        try:
            from distributed.bokeh.worker import BokehWorker
        except ImportError:
            pass
        else:
            if bokeh_prefix:
                result = (BokehWorker, {"prefix": bokeh_prefix})
            else:
                result = BokehWorker
            services[("bokeh", bokeh_port)] = result

    if resources:
        resources = resources.replace(",", " ").split()
        resources = dict(pair.split("=") for pair in resources)
        resources = valmap(float, resources)
    else:
        resources = None

    loop = IOLoop.current()

    kwargs = {"worker_port": None, "listen_address": None}
    t = Nanny

    if not scheduler and not scheduler_file and "scheduler-address" not in config:
        raise ValueError(
            "Need to provide scheduler address like\n"
            "dask-worker SCHEDULER_ADDRESS:8786"
        )

    if interface:
        if host:
            raise ValueError("Can not specify both interface and host")
        else:
            host = get_ip_interface(interface)

    if host:
        addr = uri_from_host_port(host, 0, 0)
    else:
        # Choose appropriate address for scheduler
        addr = None

    if death_timeout is not None:
        death_timeout = parse_timedelta(death_timeout, "s")

    nannies = [
        t(
            scheduler,
            scheduler_file=scheduler_file,
            ncores=nthreads,
            services=services,
            loop=loop,
            resources=resources,
            memory_limit=memory_limit,
            reconnect=reconnect,
            local_dir=local_directory,
            death_timeout=death_timeout,
            preload=preload,
            preload_argv=preload_argv,
            security=sec,
            contact_address=None,
            env={"CUDA_VISIBLE_DEVICES": cuda_visible_devices(i)},
            name=name if nprocs == 1 or not name else name + "-" + str(i),
            **kwargs
        )
        for i in range(nprocs)
    ]

    @gen.coroutine
    def close_all():
        # Unregister all workers from scheduler
        yield [n._close(timeout=2) for n in nannies]

    def on_signal(signum):
        logger.info("Exiting on signal %d", signum)
        close_all()

    @gen.coroutine
    def run():
        yield [n._start(addr) for n in nannies]
        while all(n.status != "closed" for n in nannies):
            yield gen.sleep(0.2)

    install_signal_handlers(loop, cleanup=on_signal)

    try:
        loop.run_sync(run)
    except (KeyboardInterrupt, TimeoutError):
        pass
    finally:
        logger.info("End worker")
Exemple #13
0
def main(host, port, bokeh_port, show, _bokeh, bokeh_whitelist, bokeh_prefix,
         use_xheaders, pid_file, scheduler_file, interface, local_directory,
         preload, preload_argv, tls_ca_file, tls_cert, tls_key):
    logger = SchedulerLogger.getLogger()
    enable_proctitle_on_current()
    enable_proctitle_on_children()
    log_metrics = EdasEnv.getBool("log.metrics", False)
    logger.info(f"Log Metrics: {log_metrics}")
    plugins = [EDASSchedulerPlugin(logger)] if log_metrics else []

    sec = Security(
        tls_ca_file=tls_ca_file,
        tls_scheduler_cert=tls_cert,
        tls_scheduler_key=tls_key,
    )

    if not host and (tls_ca_file or tls_cert or tls_key):
        host = 'tls://'

    if pid_file:
        with open(pid_file, 'w') as f:
            f.write(str(os.getpid()))

        def del_pid_file():
            if os.path.exists(pid_file):
                os.remove(pid_file)

        atexit.register(del_pid_file)

    local_directory_created = False
    if local_directory:
        if not os.path.exists(local_directory):
            os.mkdir(local_directory)
            local_directory_created = True
    else:
        local_directory = tempfile.mkdtemp(prefix='scheduler-')
        local_directory_created = True
    if local_directory not in sys.path:
        sys.path.insert(0, local_directory)

    if sys.platform.startswith('linux'):
        import resource  # module fails importing on Windows
        soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
        limit = max(soft, hard // 2)
        resource.setrlimit(resource.RLIMIT_NOFILE, (limit, hard))

    if interface:
        if host:
            raise ValueError("Can not specify both interface and host")
        else:
            host = get_ip_interface(interface)

    addr = uri_from_host_port(host, port, 8786)

    loop = IOLoop.current()
    logger.info('-' * 47)

    services = {}
    if _bokeh:
        try:
            from distributed.bokeh.scheduler import BokehScheduler
            services[('bokeh', bokeh_port)] = (BokehScheduler, {
                'prefix': bokeh_prefix
            })
        except ImportError as error:
            if str(error).startswith('No module named'):
                logger.info(
                    'Web dashboard not loaded.  Unable to import bokeh')
            else:
                logger.info('Unable to import bokeh: %s' % str(error))

    scheduler = Scheduler(loop=loop,
                          services=services,
                          scheduler_file=scheduler_file,
                          security=sec)

    for plugin in plugins:
        logger.info(f"@SP: Adding scheduler plugin: {plugin}")
        scheduler.add_plugin(plugin)
    scheduler.start(addr)
    comm = Comm(scheduler)
    comm.start()
    if not preload:
        preload = dask.config.get('distributed.scheduler.preload', {})
    if not preload_argv:
        preload_argv = dask.config.get('distributed.scheduler.preload-argv',
                                       {})
    preload_modules(preload,
                    parameter=scheduler,
                    file_dir=local_directory,
                    argv=preload_argv)

    logger.info('Local Directory: %26s', local_directory)
    logger.info('-' * 47)
    install_signal_handlers(loop)

    def shutdown_scheduler():
        comm.terminate()
        scheduler.stop()
        if local_directory_created:
            shutil.rmtree(local_directory)
        logger.info("End scheduler at %r", addr)

    def close_loop():
        loop.stop()
        loop.close()
        shutdown_scheduler()

    atexit.register(close_loop)

    try:
        loop.start()
        loop.close()
    finally:
        shutdown_scheduler()
def main(host, port, http_port, bokeh_port, bokeh_external_port,
         bokeh_internal_port, show, _bokeh, bokeh_whitelist, prefix,
         use_xheaders, pid_file, scheduler_file, interface, local_directory,
         preload):

    if bokeh_internal_port:
        print("The --bokeh-internal-port keyword has been removed.\n"
              "The internal bokeh server is now the default bokeh server.\n"
              "Use --bokeh-port %d instead" % bokeh_internal_port)
        sys.exit(1)

    if pid_file:
        with open(pid_file, 'w') as f:
            f.write(str(os.getpid()))

        def del_pid_file():
            if os.path.exists(pid_file):
                os.remove(pid_file)

        atexit.register(del_pid_file)

    local_directory_created = False
    if local_directory:
        if not os.path.exists(local_directory):
            os.mkdir(local_directory)
            local_directory_created = True
    else:
        local_directory = tempfile.mkdtemp(prefix='scheduler-')
        local_directory_created = True
    if local_directory not in sys.path:
        sys.path.insert(0, local_directory)

    if sys.platform.startswith('linux'):
        import resource  # module fails importing on Windows
        soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
        limit = max(soft, hard // 2)
        resource.setrlimit(resource.RLIMIT_NOFILE, (limit, hard))

    if interface:
        if host:
            raise ValueError("Can not specify both interface and host")
        else:
            host = get_ip_interface(interface)

    addr = uri_from_host_port(host, port, 8786)

    loop = IOLoop.current()
    logger.info('-' * 47)

    services = {('http', http_port): HTTPScheduler}
    if _bokeh:
        with ignoring(ImportError):
            from distributed.bokeh.scheduler import BokehScheduler
            services[('bokeh', bokeh_port)] = BokehScheduler
    scheduler = Scheduler(loop=loop,
                          services=services,
                          scheduler_file=scheduler_file)
    scheduler.start(addr)
    preload_modules(preload, parameter=scheduler, file_dir=local_directory)

    bokeh_proc = None
    if _bokeh and bokeh_external_port is not None:
        if bokeh_external_port == 0:  # This is a hack and not robust
            bokeh_port = open_port()  # This port may be taken by the OS
        try:  # before we successfully pass it to Bokeh
            from distributed.bokeh.application import BokehWebInterface
            bokeh_proc = BokehWebInterface(http_port=http_port,
                                           scheduler_address=scheduler.address,
                                           bokeh_port=bokeh_external_port,
                                           bokeh_whitelist=bokeh_whitelist,
                                           show=show,
                                           prefix=prefix,
                                           use_xheaders=use_xheaders,
                                           quiet=False)
        except ImportError:
            logger.info("Please install Bokeh to get Web UI")
        except Exception as e:
            logger.warn("Could not start Bokeh web UI", exc_info=True)

    logger.info('Local Directory: %26s', local_directory)
    logger.info('-' * 47)
    try:
        loop.start()
        loop.close()
    finally:
        scheduler.stop()
        if bokeh_proc:
            bokeh_proc.close()
        if local_directory_created:
            shutil.rmtree(local_directory)

        logger.info("End scheduler at %r", addr)
Exemple #15
0
def main(
    host,
    port,
    bokeh_port,
    show,
    _bokeh,
    bokeh_whitelist,
    bokeh_prefix,
    use_xheaders,
    pid_file,
    scheduler_file,
    interface,
    local_directory,
    preload,
    preload_argv,
    tls_ca_file,
    tls_cert,
    tls_key,
    dashboard_address,
):

    enable_proctitle_on_current()
    enable_proctitle_on_children()

    if bokeh_port is not None:
        warnings.warn(
            "The --bokeh-port flag has been renamed to --dashboard-address. "
            "Consider adding ``--dashboard-address :%d`` " % bokeh_port)
        dashboard_address = bokeh_port

    sec = Security(tls_ca_file=tls_ca_file,
                   tls_scheduler_cert=tls_cert,
                   tls_scheduler_key=tls_key)

    if not host and (tls_ca_file or tls_cert or tls_key):
        host = "tls://"

    if pid_file:
        with open(pid_file, "w") as f:
            f.write(str(os.getpid()))

        def del_pid_file():
            if os.path.exists(pid_file):
                os.remove(pid_file)

        atexit.register(del_pid_file)

    local_directory_created = False
    if local_directory:
        if not os.path.exists(local_directory):
            os.mkdir(local_directory)
            local_directory_created = True
    else:
        local_directory = tempfile.mkdtemp(prefix="scheduler-")
        local_directory_created = True
    if local_directory not in sys.path:
        sys.path.insert(0, local_directory)

    if sys.platform.startswith("linux"):
        import resource  # module fails importing on Windows

        soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
        limit = max(soft, hard // 2)
        resource.setrlimit(resource.RLIMIT_NOFILE, (limit, hard))

    if interface:
        if host:
            raise ValueError("Can not specify both interface and host")
        else:
            host = get_ip_interface(interface)

    addr = uri_from_host_port(host, port, 8786)

    loop = IOLoop.current()
    logger.info("-" * 47)

    services = {}
    if _bokeh:
        try:
            from distributed.bokeh.scheduler import BokehScheduler

            services[("bokeh", dashboard_address)] = (
                BokehScheduler,
                {
                    "prefix": bokeh_prefix
                },
            )
        except ImportError as error:
            if str(error).startswith("No module named"):
                logger.info(
                    "Web dashboard not loaded.  Unable to import bokeh")
            else:
                logger.info("Unable to import bokeh: %s" % str(error))

    scheduler = Scheduler(loop=loop,
                          services=services,
                          scheduler_file=scheduler_file,
                          security=sec)
    scheduler.start(addr)
    if not preload:
        preload = dask.config.get("distributed.scheduler.preload")
    if not preload_argv:
        preload_argv = dask.config.get("distributed.scheduler.preload-argv")
    preload_modules(preload,
                    parameter=scheduler,
                    file_dir=local_directory,
                    argv=preload_argv)

    logger.info("Local Directory: %26s", local_directory)
    logger.info("-" * 47)

    install_signal_handlers(loop)

    try:
        loop.start()
        loop.close()
    finally:
        scheduler.stop()
        if local_directory_created:
            shutil.rmtree(local_directory)

        logger.info("End scheduler at %r", addr)
Exemple #16
0
def main(scheduler, host, worker_port, http_port, nanny_port, nthreads, nprocs,
         nanny, name, memory_limit, pid_file, reconnect, resources, bokeh,
         bokeh_port, local_directory, scheduler_file, interface, death_timeout,
         preload, bokeh_prefix, tls_ca_file, tls_cert, tls_key):
    sec = Security(
        tls_ca_file=tls_ca_file,
        tls_worker_cert=tls_cert,
        tls_worker_key=tls_key,
    )

    if nanny:
        port = nanny_port
    else:
        port = worker_port

    if nprocs > 1 and worker_port != 0:
        logger.error(
            "Failed to launch worker.  You cannot use the --port argument when nprocs > 1."
        )
        exit(1)

    if nprocs > 1 and name:
        logger.error(
            "Failed to launch worker.  You cannot use the --name argument when nprocs > 1."
        )
        exit(1)

    if nprocs > 1 and not nanny:
        logger.error(
            "Failed to launch worker.  You cannot use the --no-nanny argument when nprocs > 1."
        )
        exit(1)

    if not nthreads:
        nthreads = _ncores // nprocs

    if pid_file:
        with open(pid_file, 'w') as f:
            f.write(str(os.getpid()))

        def del_pid_file():
            if os.path.exists(pid_file):
                os.remove(pid_file)

        atexit.register(del_pid_file)

    services = {('http', http_port): HTTPWorker}

    if bokeh:
        try:
            from distributed.bokeh.worker import BokehWorker
        except ImportError:
            pass
        else:
            if bokeh_prefix:
                result = (BokehWorker, {'prefix': bokeh_prefix})
            else:
                result = BokehWorker
            services[('bokeh', bokeh_port)] = result

    if resources:
        resources = resources.replace(',', ' ').split()
        resources = dict(pair.split('=') for pair in resources)
        resources = valmap(float, resources)
    else:
        resources = None

    loop = IOLoop.current()

    if nanny:
        kwargs = {'worker_port': worker_port}
        t = Nanny
    else:
        kwargs = {}
        if nanny_port:
            kwargs['service_ports'] = {'nanny': nanny_port}
        t = Worker

    if scheduler_file:
        while not os.path.exists(scheduler_file):
            sleep(0.01)
        for i in range(10):
            try:
                with open(scheduler_file) as f:
                    cfg = json.load(f)
                scheduler = cfg['address']
                break
            except (ValueError, KeyError):  # race with scheduler on file
                sleep(0.01)

    if not scheduler:
        raise ValueError("Need to provide scheduler address like\n"
                         "dask-worker SCHEDULER_ADDRESS:8786")

    if interface:
        if host:
            raise ValueError("Can not specify both interface and host")
        else:
            host = get_ip_interface(interface)

    if host or port:
        addr = uri_from_host_port(host, port, 0)
    else:
        # Choose appropriate address for scheduler
        addr = None

    nannies = [
        t(scheduler,
          ncores=nthreads,
          services=services,
          name=name,
          loop=loop,
          resources=resources,
          memory_limit=memory_limit,
          reconnect=reconnect,
          local_dir=local_directory,
          death_timeout=death_timeout,
          preload=preload,
          security=sec,
          **kwargs) for i in range(nprocs)
    ]

    @gen.coroutine
    def close_all():
        try:
            if nanny:
                yield [n._close(timeout=2) for n in nannies]
        finally:
            loop.stop()

    def handle_signal(signum, frame):
        logger.info("Exiting on signal %d", signum)
        if loop._running:
            loop.add_callback_from_signal(loop.stop)
        else:
            exit(0)

    # NOTE: We can't use the generic install_signal_handlers() function from
    # distributed.cli.utils because we're handling the signal differently.
    signal.signal(signal.SIGINT, handle_signal)
    signal.signal(signal.SIGTERM, handle_signal)

    for n in nannies:
        n.start(addr)

    @gen.coroutine
    def run():
        while all(n.status != 'closed' for n in nannies):
            yield gen.sleep(0.2)

    try:
        loop.run_sync(run)
    except (KeyboardInterrupt, TimeoutError):
        pass
    finally:
        logger.info("End worker")

    # Clean exit: unregister all workers from scheduler
    loop.run_sync(close_all)
Exemple #17
0
def main(host, port, http_port, bokeh_port, bokeh_internal_port, show, _bokeh,
         bokeh_whitelist, prefix, use_xheaders, pid_file, scheduler_file,
         interface):

    if pid_file:
        with open(pid_file, 'w') as f:
            f.write(str(os.getpid()))

        def del_pid_file():
            if os.path.exists(pid_file):
                os.remove(pid_file)

        atexit.register(del_pid_file)

    if sys.platform.startswith('linux'):
        import resource  # module fails importing on Windows
        soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
        limit = max(soft, hard // 2)
        resource.setrlimit(resource.RLIMIT_NOFILE, (limit, hard))

    if interface:
        if host:
            raise ValueError("Can not specify both interface and host")
        else:
            host = get_ip_interface(interface)

    addr = uri_from_host_port(host, port, 8786)

    loop = IOLoop.current()
    logger.info('-' * 47)

    services = {('http', http_port): HTTPScheduler}
    if _bokeh:
        with ignoring(ImportError):
            from distributed.bokeh.scheduler import BokehScheduler
            services[('bokeh', bokeh_internal_port)] = BokehScheduler
    scheduler = Scheduler(loop=loop,
                          services=services,
                          scheduler_file=scheduler_file)
    scheduler.start(addr)

    bokeh_proc = None
    if _bokeh:
        if bokeh_port == 0:  # This is a hack and not robust
            bokeh_port = open_port()  # This port may be taken by the OS
        try:  # before we successfully pass it to Bokeh
            from distributed.bokeh.application import BokehWebInterface
            bokeh_proc = BokehWebInterface(http_port=http_port,
                                           scheduler_address=scheduler.address,
                                           bokeh_port=bokeh_port,
                                           bokeh_whitelist=bokeh_whitelist,
                                           show=show,
                                           prefix=prefix,
                                           use_xheaders=use_xheaders,
                                           quiet=False)
        except ImportError:
            logger.info("Please install Bokeh to get Web UI")
        except Exception as e:
            logger.warn("Could not start Bokeh web UI", exc_info=True)

    logger.info('-' * 47)
    try:
        loop.start()
        loop.close()
    finally:
        scheduler.stop()
        if bokeh_proc:
            bokeh_proc.close()

        logger.info("End scheduler at %r", addr)
Exemple #18
0
def main(scheduler, host, worker_port, listen_address, contact_address,
         nanny_port, nthreads, nprocs, nanny, name,
         memory_limit, pid_file, reconnect, resources, bokeh,
         bokeh_port, local_directory, scheduler_file, interface,
         death_timeout, preload, preload_argv, bokeh_prefix, tls_ca_file,
         tls_cert, tls_key):
    enable_proctitle_on_current()
    enable_proctitle_on_children()

    sec = Security(tls_ca_file=tls_ca_file,
                   tls_worker_cert=tls_cert,
                   tls_worker_key=tls_key,
                   )

    if nprocs > 1 and worker_port != 0:
        logger.error("Failed to launch worker.  You cannot use the --port argument when nprocs > 1.")
        exit(1)

    if nprocs > 1 and not nanny:
        logger.error("Failed to launch worker.  You cannot use the --no-nanny argument when nprocs > 1.")
        exit(1)

    if contact_address and not listen_address:
        logger.error("Failed to launch worker. "
                     "Must specify --listen-address when --contact-address is given")
        exit(1)

    if nprocs > 1 and listen_address:
        logger.error("Failed to launch worker. "
                     "You cannot specify --listen-address when nprocs > 1.")
        exit(1)

    if (worker_port or host) and listen_address:
        logger.error("Failed to launch worker. "
                     "You cannot specify --listen-address when --worker-port or --host is given.")
        exit(1)

    try:
        if listen_address:
            (host, worker_port) = get_address_host_port(listen_address, strict=True)

        if contact_address:
            # we only need this to verify it is getting parsed
            (_, _) = get_address_host_port(contact_address, strict=True)
        else:
            # if contact address is not present we use the listen_address for contact
            contact_address = listen_address
    except ValueError as e:
        logger.error("Failed to launch worker. " + str(e))
        exit(1)

    if nanny:
        port = nanny_port
    else:
        port = worker_port

    if not nthreads:
        nthreads = _ncores // nprocs

    if pid_file:
        with open(pid_file, 'w') as f:
            f.write(str(os.getpid()))

        def del_pid_file():
            if os.path.exists(pid_file):
                os.remove(pid_file)
        atexit.register(del_pid_file)

    services = {}

    if bokeh:
        try:
            from distributed.bokeh.worker import BokehWorker
        except ImportError:
            pass
        else:
            if bokeh_prefix:
                result = (BokehWorker, {'prefix': bokeh_prefix})
            else:
                result = BokehWorker
            services[('bokeh', bokeh_port)] = result

    if resources:
        resources = resources.replace(',', ' ').split()
        resources = dict(pair.split('=') for pair in resources)
        resources = valmap(float, resources)
    else:
        resources = None

    loop = IOLoop.current()

    if nanny:
        kwargs = {'worker_port': worker_port, 'listen_address': listen_address}
        t = Nanny
    else:
        kwargs = {}
        if nanny_port:
            kwargs['service_ports'] = {'nanny': nanny_port}
        t = Worker

    if not scheduler and not scheduler_file and 'scheduler-address' not in config:
        raise ValueError("Need to provide scheduler address like\n"
                         "dask-worker SCHEDULER_ADDRESS:8786")

    if interface:
        if host:
            raise ValueError("Can not specify both interface and host")
        else:
            host = get_ip_interface(interface)

    if host or port:
        addr = uri_from_host_port(host, port, 0)
    else:
        # Choose appropriate address for scheduler
        addr = None

    if death_timeout is not None:
        death_timeout = parse_timedelta(death_timeout, 's')

    nannies = [t(scheduler, scheduler_file=scheduler_file, ncores=nthreads,
                 services=services, loop=loop, resources=resources,
                 memory_limit=memory_limit, reconnect=reconnect,
                 local_dir=local_directory, death_timeout=death_timeout,
                 preload=preload, preload_argv=preload_argv,
                 security=sec, contact_address=contact_address,
                 name=name if nprocs == 1 or not name else name + '-' + str(i),
                 **kwargs)
               for i in range(nprocs)]

    @gen.coroutine
    def close_all():
        # Unregister all workers from scheduler
        if nanny:
            yield [n._close(timeout=2) for n in nannies]

    def on_signal(signum):
        logger.info("Exiting on signal %d", signum)
        close_all()

    @gen.coroutine
    def run():
        yield [n._start(addr) for n in nannies]
        while all(n.status != 'closed' for n in nannies):
            yield gen.sleep(0.2)

    install_signal_handlers(loop, cleanup=on_signal)

    try:
        loop.run_sync(run)
    except (KeyboardInterrupt, TimeoutError):
        pass
    finally:
        logger.info("End worker")
def main(scheduler, host, worker_port, listen_address, contact_address,
         nanny_port, nthreads, nprocs, nanny, name, memory_limit, pid_file,
         reconnect, resources, bokeh, bokeh_port, local_directory,
         scheduler_file, interface, death_timeout, preload, bokeh_prefix,
         tls_ca_file, tls_cert, tls_key):
    sec = Security(
        tls_ca_file=tls_ca_file,
        tls_worker_cert=tls_cert,
        tls_worker_key=tls_key,
    )

    if nprocs > 1 and worker_port != 0:
        logger.error(
            "Failed to launch worker.  You cannot use the --port argument when nprocs > 1."
        )
        exit(1)

    if nprocs > 1 and name:
        logger.error(
            "Failed to launch worker.  You cannot use the --name argument when nprocs > 1."
        )
        exit(1)

    if nprocs > 1 and not nanny:
        logger.error(
            "Failed to launch worker.  You cannot use the --no-nanny argument when nprocs > 1."
        )
        exit(1)

    if contact_address and not listen_address:
        logger.error(
            "Failed to launch worker. "
            "Must specify --listen-address when --contact-address is given")
        exit(1)

    if nprocs > 1 and listen_address:
        logger.error("Failed to launch worker. "
                     "You cannot specify --listen-address when nprocs > 1.")
        exit(1)

    if (worker_port or host) and listen_address:
        logger.error(
            "Failed to launch worker. "
            "You cannot specify --listen-address when --worker-port or --host is given."
        )
        exit(1)

    try:
        if listen_address:
            (host, worker_port) = get_address_host_port(listen_address,
                                                        strict=True)

        if contact_address:
            # we only need this to verify it is getting parsed
            (_, _) = get_address_host_port(contact_address, strict=True)
        else:
            # if contact address is not present we use the listen_address for contact
            contact_address = listen_address
    except ValueError as e:
        logger.error("Failed to launch worker. " + str(e))
        exit(1)

    if nanny:
        port = nanny_port
    else:
        port = worker_port

    if not nthreads:
        nthreads = _ncores // nprocs

    if pid_file:
        with open(pid_file, 'w') as f:
            f.write(str(os.getpid()))

        def del_pid_file():
            if os.path.exists(pid_file):
                os.remove(pid_file)

        atexit.register(del_pid_file)

    services = {}

    if bokeh:
        try:
            from distributed.bokeh.worker import BokehWorker
        except ImportError:
            pass
        else:
            if bokeh_prefix:
                result = (BokehWorker, {'prefix': bokeh_prefix})
            else:
                result = BokehWorker
            services[('bokeh', bokeh_port)] = result

    if resources:
        resources = resources.replace(',', ' ').split()
        resources = dict(pair.split('=') for pair in resources)
        resources = valmap(float, resources)
    else:
        resources = None

    loop = IOLoop.current()

    if nanny:
        kwargs = {'worker_port': worker_port, 'listen_address': listen_address}
        t = Nanny
    else:
        kwargs = {}
        if nanny_port:
            kwargs['service_ports'] = {'nanny': nanny_port}
        t = Worker

    if not scheduler and not scheduler_file:
        raise ValueError("Need to provide scheduler address like\n"
                         "dask-worker SCHEDULER_ADDRESS:8786")

    if interface:
        if host:
            raise ValueError("Can not specify both interface and host")
        else:
            host = get_ip_interface(interface)

    if host or port:
        addr = uri_from_host_port(host, port, 0)
    else:
        # Choose appropriate address for scheduler
        addr = None

    nannies = [
        t(scheduler,
          scheduler_file=scheduler_file,
          ncores=nthreads,
          services=services,
          name=name,
          loop=loop,
          resources=resources,
          memory_limit=memory_limit,
          reconnect=reconnect,
          local_dir=local_directory,
          death_timeout=death_timeout,
          preload=preload,
          security=sec,
          contact_address=contact_address,
          **kwargs) for i in range(nprocs)
    ]

    @gen.coroutine
    def close_all():
        # Unregister all workers from scheduler
        if nanny:
            yield [n._close(timeout=2) for n in nannies]

    def on_signal(signum):
        logger.info("Exiting on signal %d", signum)
        close_all()

    @gen.coroutine
    def run():
        yield [n.start(addr) for n in nannies]
        while all(n.status != 'closed' for n in nannies):
            yield gen.sleep(0.2)

    install_signal_handlers(loop, cleanup=on_signal)

    try:
        loop.run_sync(run)
    except (KeyboardInterrupt, TimeoutError):
        pass
    finally:
        logger.info("End worker")
Exemple #20
0
def main(scheduler, host, worker_port, http_port, nanny_port, nthreads, nprocs,
         nanny, name, memory_limit, pid_file, reconnect, resources, bokeh,
         bokeh_port, local_directory, scheduler_file, interface, death_timeout,
         preload, bokeh_prefix, tls_ca_file, tls_cert, tls_key):
    sec = Security(
        tls_ca_file=tls_ca_file,
        tls_worker_cert=tls_cert,
        tls_worker_key=tls_key,
    )

    if nanny:
        port = nanny_port
    else:
        port = worker_port

    if nprocs > 1 and worker_port != 0:
        logger.error(
            "Failed to launch worker.  You cannot use the --port argument when nprocs > 1."
        )
        exit(1)

    if nprocs > 1 and name:
        logger.error(
            "Failed to launch worker.  You cannot use the --name argument when nprocs > 1."
        )
        exit(1)

    if not nthreads:
        nthreads = _ncores // nprocs

    if pid_file:
        with open(pid_file, 'w') as f:
            f.write(str(os.getpid()))

        def del_pid_file():
            if os.path.exists(pid_file):
                os.remove(pid_file)

        atexit.register(del_pid_file)

    services = {('http', http_port): HTTPWorker}

    if bokeh:
        try:
            from distributed.bokeh.worker import BokehWorker
        except ImportError:
            pass
        else:
            if bokeh_prefix:
                result = (BokehWorker, {'prefix': bokeh_prefix})
            else:
                result = BokehWorker
            services[('bokeh', bokeh_port)] = result

    if resources:
        resources = resources.replace(',', ' ').split()
        resources = dict(pair.split('=') for pair in resources)
        resources = valmap(float, resources)
    else:
        resources = None

    loop = IOLoop.current()

    if nanny:
        kwargs = {'worker_port': worker_port}
        t = Nanny
    else:
        kwargs = {}
        if nanny_port:
            kwargs['service_ports'] = {'nanny': nanny_port}
        t = Worker

    if scheduler_file:
        while not os.path.exists(scheduler_file):
            sleep(0.01)
        for i in range(10):
            try:
                with open(scheduler_file) as f:
                    cfg = json.load(f)
                scheduler = cfg['address']
                break
            except (ValueError, KeyError):  # race with scheduler on file
                sleep(0.01)

    if not scheduler:
        raise ValueError("Need to provide scheduler address like\n"
                         "dask-worker SCHEDULER_ADDRESS:8786")

    nannies = [
        t(scheduler,
          ncores=nthreads,
          services=services,
          name=name,
          loop=loop,
          resources=resources,
          memory_limit=memory_limit,
          reconnect=reconnect,
          local_dir=local_directory,
          death_timeout=death_timeout,
          preload=preload,
          security=sec,
          **kwargs) for i in range(nprocs)
    ]

    if interface:
        if host:
            raise ValueError("Can not specify both interface and host")
        else:
            host = get_ip_interface(interface)

    if host or port:
        addr = uri_from_host_port(host, port, 0)
    else:
        # Choose appropriate address for scheduler
        addr = None

    for n in nannies:
        n.start(addr)
        if t is Nanny:
            global_nannies.append(n)

    @gen.coroutine
    def run():
        while all(n.status != 'closed' for n in nannies):
            yield gen.sleep(0.2)

    try:
        loop.run_sync(run)
    except (KeyboardInterrupt, TimeoutError):
        pass
    finally:
        logger.info("End worker")
        loop.close()

    # Clean exit: unregister all workers from scheduler

    loop2 = IOLoop()

    @gen.coroutine
    def f():
        if nanny:
            w = nannies[0]
            with w.rpc(w.scheduler.address) as scheduler:
                yield gen.with_timeout(timeout=timedelta(seconds=2),
                                       future=All([
                                           scheduler.unregister(
                                               address=n.worker_address,
                                               close=True) for n in nannies
                                           if n.process and n.worker_address
                                       ]),
                                       io_loop=loop2)

    loop2.run_sync(f)
    loop2.close()

    if nanny:
        for n in nannies:
            if isalive(n.process):
                n.process.terminate()

    if nanny:
        start = time()
        while (any(isalive(n.process) for n in nannies)
               and time() < start + 1):
            sleep(0.1)

    for nanny in nannies:
        nanny.stop()