def run_worker_fork(q, scheduler_addr, ncores, nanny_port, worker_ip, worker_port, local_dir, **kwargs): """ Create a worker in a forked child. """ from distributed import Worker # pragma: no cover from tornado.ioloop import IOLoop # pragma: no cover try: from dask.multiprocessing import initialize_worker_process except ImportError: # old Dask version pass else: initialize_worker_process() IOLoop.clear_instance() # pragma: no cover loop = IOLoop() # pragma: no cover loop.make_current() # pragma: no cover worker = Worker(scheduler_addr, ncores=ncores, service_ports={'nanny': nanny_port}, local_dir=local_dir, **kwargs) # pragma: no cover @gen.coroutine # pragma: no cover def run(): try: # pragma: no cover yield worker._start(worker_port) # pragma: no cover except Exception as e: # pragma: no cover logger.exception(e) # pragma: no cover q.put(e) # pragma: no cover else: assert worker.port # pragma: no cover q.put({ 'address': worker.address, 'dir': worker.local_dir }) # pragma: no cover yield worker.wait_until_closed() logger.info("Worker closed") try: loop.run_sync(run) except TimeoutError: logger.info("Worker timed out") except KeyboardInterrupt: pass finally: loop.stop() loop.close(all_fds=True)
def _run( cls, worker_kwargs, worker_start_args, silence_logs, init_result_q, child_stop_q, uid, env, config, Worker, ): # pragma: no cover os.environ.update(env) dask.config.set(config) try: from dask.multiprocessing import initialize_worker_process except ImportError: # old Dask version pass else: initialize_worker_process() if silence_logs: logger.setLevel(silence_logs) IOLoop.clear_instance() loop = IOLoop() loop.make_current() worker = Worker(**worker_kwargs) async def do_stop(timeout=5, executor_wait=True): try: await worker.close( report=False, nanny=False, executor_wait=executor_wait, timeout=timeout, ) finally: loop.stop() def watch_stop_q(): """ Wait for an incoming stop message and then stop the worker cleanly. """ while True: try: msg = child_stop_q.get(timeout=1000) except Empty: pass else: child_stop_q.close() assert msg.pop("op") == "stop" loop.add_callback(do_stop, **msg) break t = threading.Thread(target=watch_stop_q, name="Nanny stop queue watch") t.daemon = True t.start() async def run(): """ Try to start worker and inform parent of outcome. """ try: await worker except Exception as e: logger.exception("Failed to start worker") init_result_q.put({"uid": uid, "exception": e}) init_result_q.close() else: try: assert worker.address except ValueError: pass else: init_result_q.put( { "address": worker.address, "dir": worker.local_directory, "uid": uid, } ) init_result_q.close() await worker.finished() logger.info("Worker closed") try: loop.run_sync(run) except TimeoutError: # Loop was stopped before wait_until_closed() returned, ignore pass except KeyboardInterrupt: pass
def _run(cls, worker_args, worker_kwargs, worker_start_args, silence_logs, init_result_q, child_stop_q, uid, Worker): # pragma: no cover try: from dask.multiprocessing import initialize_worker_process except ImportError: # old Dask version pass else: initialize_worker_process() if silence_logs: logger.setLevel(silence_logs) IOLoop.clear_instance() loop = IOLoop() loop.make_current() worker = Worker(*worker_args, **worker_kwargs) @gen.coroutine def do_stop(timeout=5, executor_wait=True): try: yield worker._close(report=False, nanny=False, executor_wait=executor_wait, timeout=timeout) finally: loop.stop() def watch_stop_q(): """ Wait for an incoming stop message and then stop the worker cleanly. """ while True: try: msg = child_stop_q.get(timeout=1000) except Empty: pass else: child_stop_q.close() assert msg.pop('op') == 'stop' loop.add_callback(do_stop, **msg) break t = threading.Thread(target=watch_stop_q, name="Nanny stop queue watch") t.daemon = True t.start() @gen.coroutine def run(): """ Try to start worker and inform parent of outcome. """ try: yield worker._start(*worker_start_args) except Exception as e: logger.exception("Failed to start worker") init_result_q.put({'uid': uid, 'exception': e}) init_result_q.close() else: assert worker.address init_result_q.put({ 'address': worker.address, 'dir': worker.local_dir, 'uid': uid }) init_result_q.close() yield worker.wait_until_closed() logger.info("Worker closed") try: loop.run_sync(run) except TimeoutError: # Loop was stopped before wait_until_closed() returned, ignore pass except KeyboardInterrupt: pass
def _run( cls, worker_kwargs, worker_start_args, silence_logs, init_result_q, child_stop_q, uid, env, config, Worker, ): # pragma: no cover try: os.environ.update(env) dask.config.set(config) try: from dask.multiprocessing import initialize_worker_process except ImportError: # old Dask version pass else: initialize_worker_process() if silence_logs: logger.setLevel(silence_logs) IOLoop.clear_instance() loop = IOLoop() loop.make_current() worker = Worker(**worker_kwargs) async def do_stop(timeout=5, executor_wait=True): try: await worker.close( report=True, nanny=False, safe=True, # TODO: Graceful or not? executor_wait=executor_wait, timeout=timeout, ) finally: loop.stop() def watch_stop_q(): """ Wait for an incoming stop message and then stop the worker cleanly. """ msg = child_stop_q.get() child_stop_q.close() assert msg.pop("op") == "stop" loop.add_callback(do_stop, **msg) t = threading.Thread(target=watch_stop_q, name="Nanny stop queue watch") t.daemon = True t.start() async def run(): """ Try to start worker and inform parent of outcome. """ try: await worker except Exception as e: logger.exception("Failed to start worker") init_result_q.put({"uid": uid, "exception": e}) init_result_q.close() # If we hit an exception here we need to wait for a least # one interval for the outside to pick up this message. # Otherwise we arrive in a race condition where the process # cleanup wipes the queue before the exception can be # properly handled. See also # WorkerProcess._wait_until_connected (the 2 is for good # measure) sync_sleep(cls._init_msg_interval * 2) else: try: assert worker.address except ValueError: pass else: init_result_q.put({ "address": worker.address, "dir": worker.local_directory, "uid": uid, }) init_result_q.close() await worker.finished() logger.info("Worker closed") except Exception as e: logger.exception("Failed to initialize Worker") init_result_q.put({"uid": uid, "exception": e}) init_result_q.close() # If we hit an exception here we need to wait for a least one # interval for the outside to pick up this message. Otherwise we # arrive in a race condition where the process cleanup wipes the # queue before the exception can be properly handled. See also # WorkerProcess._wait_until_connected (the 2 is for good measure) sync_sleep(cls._init_msg_interval * 2) else: try: loop.run_sync(run) except (TimeoutError, gen.TimeoutError): # Loop was stopped before wait_until_closed() returned, ignore pass except KeyboardInterrupt: # At this point the loop is not running thus we have to run # do_stop() explicitly. loop.run_sync(do_stop)
def _run(cls, worker_args, worker_kwargs, worker_start_args, silence_logs, init_result_q, child_stop_q, uid, env, Worker): # pragma: no cover os.environ.update(env) try: from dask.multiprocessing import initialize_worker_process except ImportError: # old Dask version pass else: initialize_worker_process() if silence_logs: logger.setLevel(silence_logs) IOLoop.clear_instance() loop = IOLoop() loop.make_current() worker = Worker(*worker_args, **worker_kwargs) @gen.coroutine def do_stop(timeout=5, executor_wait=True): try: yield worker._close(report=False, nanny=False, executor_wait=executor_wait, timeout=timeout) finally: loop.stop() def watch_stop_q(): """ Wait for an incoming stop message and then stop the worker cleanly. """ while True: try: msg = child_stop_q.get(timeout=1000) except Empty: pass else: child_stop_q.close() assert msg.pop('op') == 'stop' loop.add_callback(do_stop, **msg) break t = threading.Thread(target=watch_stop_q, name="Nanny stop queue watch") t.daemon = True t.start() @gen.coroutine def run(): """ Try to start worker and inform parent of outcome. """ try: yield worker._start(*worker_start_args) except Exception as e: logger.exception("Failed to start worker") init_result_q.put({'uid': uid, 'exception': e}) init_result_q.close() else: assert worker.address init_result_q.put({'address': worker.address, 'dir': worker.local_dir, 'uid': uid}) init_result_q.close() yield worker.wait_until_closed() logger.info("Worker closed") try: loop.run_sync(run) except TimeoutError: # Loop was stopped before wait_until_closed() returned, ignore pass except KeyboardInterrupt: pass