Exemple #1
0
def run_worker_fork(q, scheduler_addr, ncores, nanny_port, worker_ip,
                    worker_port, local_dir, **kwargs):
    """
    Create a worker in a forked child.
    """
    from distributed import Worker  # pragma: no cover
    from tornado.ioloop import IOLoop  # pragma: no cover

    try:
        from dask.multiprocessing import initialize_worker_process
    except ImportError:  # old Dask version
        pass
    else:
        initialize_worker_process()

    IOLoop.clear_instance()  # pragma: no cover
    loop = IOLoop()  # pragma: no cover
    loop.make_current()  # pragma: no cover
    worker = Worker(scheduler_addr,
                    ncores=ncores,
                    service_ports={'nanny': nanny_port},
                    local_dir=local_dir,
                    **kwargs)  # pragma: no cover

    @gen.coroutine  # pragma: no cover
    def run():
        try:  # pragma: no cover
            yield worker._start(worker_port)  # pragma: no cover
        except Exception as e:  # pragma: no cover
            logger.exception(e)  # pragma: no cover
            q.put(e)  # pragma: no cover
        else:
            assert worker.port  # pragma: no cover
            q.put({
                'address': worker.address,
                'dir': worker.local_dir
            })  # pragma: no cover

        yield worker.wait_until_closed()

        logger.info("Worker closed")

    try:
        loop.run_sync(run)
    except TimeoutError:
        logger.info("Worker timed out")
    except KeyboardInterrupt:
        pass
    finally:
        loop.stop()
        loop.close(all_fds=True)
Exemple #2
0
    def _run(
        cls,
        worker_kwargs,
        worker_start_args,
        silence_logs,
        init_result_q,
        child_stop_q,
        uid,
        env,
        config,
        Worker,
    ):  # pragma: no cover
        os.environ.update(env)
        dask.config.set(config)
        try:
            from dask.multiprocessing import initialize_worker_process
        except ImportError:  # old Dask version
            pass
        else:
            initialize_worker_process()

        if silence_logs:
            logger.setLevel(silence_logs)

        IOLoop.clear_instance()
        loop = IOLoop()
        loop.make_current()
        worker = Worker(**worker_kwargs)

        async def do_stop(timeout=5, executor_wait=True):
            try:
                await worker.close(
                    report=False,
                    nanny=False,
                    executor_wait=executor_wait,
                    timeout=timeout,
                )
            finally:
                loop.stop()

        def watch_stop_q():
            """
            Wait for an incoming stop message and then stop the
            worker cleanly.
            """
            while True:
                try:
                    msg = child_stop_q.get(timeout=1000)
                except Empty:
                    pass
                else:
                    child_stop_q.close()
                    assert msg.pop("op") == "stop"
                    loop.add_callback(do_stop, **msg)
                    break

        t = threading.Thread(target=watch_stop_q, name="Nanny stop queue watch")
        t.daemon = True
        t.start()

        async def run():
            """
            Try to start worker and inform parent of outcome.
            """
            try:
                await worker
            except Exception as e:
                logger.exception("Failed to start worker")
                init_result_q.put({"uid": uid, "exception": e})
                init_result_q.close()
            else:
                try:
                    assert worker.address
                except ValueError:
                    pass
                else:
                    init_result_q.put(
                        {
                            "address": worker.address,
                            "dir": worker.local_directory,
                            "uid": uid,
                        }
                    )
                    init_result_q.close()
                    await worker.finished()
                    logger.info("Worker closed")

        try:
            loop.run_sync(run)
        except TimeoutError:
            # Loop was stopped before wait_until_closed() returned, ignore
            pass
        except KeyboardInterrupt:
            pass
Exemple #3
0
    def _run(cls, worker_args, worker_kwargs, worker_start_args, silence_logs,
             init_result_q, child_stop_q, uid, Worker):  # pragma: no cover
        try:
            from dask.multiprocessing import initialize_worker_process
        except ImportError:  # old Dask version
            pass
        else:
            initialize_worker_process()

        if silence_logs:
            logger.setLevel(silence_logs)

        IOLoop.clear_instance()
        loop = IOLoop()
        loop.make_current()
        worker = Worker(*worker_args, **worker_kwargs)

        @gen.coroutine
        def do_stop(timeout=5, executor_wait=True):
            try:
                yield worker._close(report=False,
                                    nanny=False,
                                    executor_wait=executor_wait,
                                    timeout=timeout)
            finally:
                loop.stop()

        def watch_stop_q():
            """
            Wait for an incoming stop message and then stop the
            worker cleanly.
            """
            while True:
                try:
                    msg = child_stop_q.get(timeout=1000)
                except Empty:
                    pass
                else:
                    child_stop_q.close()
                    assert msg.pop('op') == 'stop'
                    loop.add_callback(do_stop, **msg)
                    break

        t = threading.Thread(target=watch_stop_q,
                             name="Nanny stop queue watch")
        t.daemon = True
        t.start()

        @gen.coroutine
        def run():
            """
            Try to start worker and inform parent of outcome.
            """
            try:
                yield worker._start(*worker_start_args)
            except Exception as e:
                logger.exception("Failed to start worker")
                init_result_q.put({'uid': uid, 'exception': e})
                init_result_q.close()
            else:
                assert worker.address
                init_result_q.put({
                    'address': worker.address,
                    'dir': worker.local_dir,
                    'uid': uid
                })
                init_result_q.close()
                yield worker.wait_until_closed()
                logger.info("Worker closed")

        try:
            loop.run_sync(run)
        except TimeoutError:
            # Loop was stopped before wait_until_closed() returned, ignore
            pass
        except KeyboardInterrupt:
            pass
Exemple #4
0
    def _run(
        cls,
        worker_kwargs,
        worker_start_args,
        silence_logs,
        init_result_q,
        child_stop_q,
        uid,
        env,
        config,
        Worker,
    ):  # pragma: no cover
        try:
            os.environ.update(env)
            dask.config.set(config)
            try:
                from dask.multiprocessing import initialize_worker_process
            except ImportError:  # old Dask version
                pass
            else:
                initialize_worker_process()

            if silence_logs:
                logger.setLevel(silence_logs)

            IOLoop.clear_instance()
            loop = IOLoop()
            loop.make_current()
            worker = Worker(**worker_kwargs)

            async def do_stop(timeout=5, executor_wait=True):
                try:
                    await worker.close(
                        report=True,
                        nanny=False,
                        safe=True,  # TODO: Graceful or not?
                        executor_wait=executor_wait,
                        timeout=timeout,
                    )
                finally:
                    loop.stop()

            def watch_stop_q():
                """
                Wait for an incoming stop message and then stop the
                worker cleanly.
                """
                msg = child_stop_q.get()
                child_stop_q.close()
                assert msg.pop("op") == "stop"
                loop.add_callback(do_stop, **msg)

            t = threading.Thread(target=watch_stop_q,
                                 name="Nanny stop queue watch")
            t.daemon = True
            t.start()

            async def run():
                """
                Try to start worker and inform parent of outcome.
                """
                try:
                    await worker
                except Exception as e:
                    logger.exception("Failed to start worker")
                    init_result_q.put({"uid": uid, "exception": e})
                    init_result_q.close()
                    # If we hit an exception here we need to wait for a least
                    # one interval for the outside to pick up this message.
                    # Otherwise we arrive in a race condition where the process
                    # cleanup wipes the queue before the exception can be
                    # properly handled. See also
                    # WorkerProcess._wait_until_connected (the 2 is for good
                    # measure)
                    sync_sleep(cls._init_msg_interval * 2)
                else:
                    try:
                        assert worker.address
                    except ValueError:
                        pass
                    else:
                        init_result_q.put({
                            "address": worker.address,
                            "dir": worker.local_directory,
                            "uid": uid,
                        })
                        init_result_q.close()
                        await worker.finished()
                        logger.info("Worker closed")

        except Exception as e:
            logger.exception("Failed to initialize Worker")
            init_result_q.put({"uid": uid, "exception": e})
            init_result_q.close()
            # If we hit an exception here we need to wait for a least one
            # interval for the outside to pick up this message. Otherwise we
            # arrive in a race condition where the process cleanup wipes the
            # queue before the exception can be properly handled. See also
            # WorkerProcess._wait_until_connected (the 2 is for good measure)
            sync_sleep(cls._init_msg_interval * 2)
        else:
            try:
                loop.run_sync(run)
            except (TimeoutError, gen.TimeoutError):
                # Loop was stopped before wait_until_closed() returned, ignore
                pass
            except KeyboardInterrupt:
                # At this point the loop is not running thus we have to run
                # do_stop() explicitly.
                loop.run_sync(do_stop)
Exemple #5
0
    def _run(cls, worker_args, worker_kwargs, worker_start_args,
             silence_logs, init_result_q, child_stop_q, uid, env, Worker):  # pragma: no cover
        os.environ.update(env)
        try:
            from dask.multiprocessing import initialize_worker_process
        except ImportError:   # old Dask version
            pass
        else:
            initialize_worker_process()

        if silence_logs:
            logger.setLevel(silence_logs)

        IOLoop.clear_instance()
        loop = IOLoop()
        loop.make_current()
        worker = Worker(*worker_args, **worker_kwargs)

        @gen.coroutine
        def do_stop(timeout=5, executor_wait=True):
            try:
                yield worker._close(report=False,
                                    nanny=False,
                                    executor_wait=executor_wait,
                                    timeout=timeout)
            finally:
                loop.stop()

        def watch_stop_q():
            """
            Wait for an incoming stop message and then stop the
            worker cleanly.
            """
            while True:
                try:
                    msg = child_stop_q.get(timeout=1000)
                except Empty:
                    pass
                else:
                    child_stop_q.close()
                    assert msg.pop('op') == 'stop'
                    loop.add_callback(do_stop, **msg)
                    break

        t = threading.Thread(target=watch_stop_q, name="Nanny stop queue watch")
        t.daemon = True
        t.start()

        @gen.coroutine
        def run():
            """
            Try to start worker and inform parent of outcome.
            """
            try:
                yield worker._start(*worker_start_args)
            except Exception as e:
                logger.exception("Failed to start worker")
                init_result_q.put({'uid': uid, 'exception': e})
                init_result_q.close()
            else:
                assert worker.address
                init_result_q.put({'address': worker.address,
                                   'dir': worker.local_dir,
                                   'uid': uid})
                init_result_q.close()
                yield worker.wait_until_closed()
                logger.info("Worker closed")

        try:
            loop.run_sync(run)
        except TimeoutError:
            # Loop was stopped before wait_until_closed() returned, ignore
            pass
        except KeyboardInterrupt:
            pass