Exemple #1
0
def test_exitcode():
    q = mp_context.Queue()

    proc = AsyncProcess(target=exit, kwargs={'q': q})
    proc.daemon = True
    assert not proc.is_alive()
    assert proc.exitcode is None

    yield proc.start()
    assert proc.is_alive()
    assert proc.exitcode is None

    q.put(5)
    yield proc.join(timeout=3.0)
    assert not proc.is_alive()
    assert proc.exitcode == 5
def test_exitcode():
    q = mp_context.Queue()

    proc = AsyncProcess(target=exit, kwargs={'q': q})
    proc.daemon = True
    assert not proc.is_alive()
    assert proc.exitcode is None

    yield proc.start()
    assert proc.is_alive()
    assert proc.exitcode is None

    q.put(5)
    yield proc.join(timeout=3.0)
    assert not proc.is_alive()
    assert proc.exitcode == 5
Exemple #3
0
def test_terminate():
    proc = AsyncProcess(target=wait)
    proc.daemon = True
    yield proc.start()
    yield proc.terminate()

    yield proc.join(timeout=3.0)
    assert not proc.is_alive()
    assert proc.exitcode in (-signal.SIGTERM, 255)
def test_terminate():
    proc = AsyncProcess(target=wait)
    proc.daemon = True
    yield proc.start()
    yield proc.terminate()

    yield proc.join(timeout=3.0)
    assert not proc.is_alive()
    assert proc.exitcode in (-signal.SIGTERM, 255)
async def test_exit_callback():
    to_child = mp_context.Queue()
    from_child = mp_context.Queue()
    evt = Event()

    # FIXME: this breaks if changed to async def...
    @gen.coroutine
    def on_stop(_proc):
        assert _proc is proc
        yield gen.moment
        evt.set()

    # Normal process exit
    proc = AsyncProcess(target=feed, args=(to_child, from_child))
    evt.clear()
    proc.set_exit_callback(on_stop)
    proc.daemon = True

    await proc.start()
    await asyncio.sleep(0.05)
    assert proc.is_alive()
    assert not evt.is_set()

    to_child.put(None)
    await evt.wait(timedelta(seconds=3))
    assert evt.is_set()
    assert not proc.is_alive()

    # Process terminated
    proc = AsyncProcess(target=wait)
    evt.clear()
    proc.set_exit_callback(on_stop)
    proc.daemon = True

    await proc.start()
    await asyncio.sleep(0.05)
    assert proc.is_alive()
    assert not evt.is_set()

    await proc.terminate()
    await evt.wait(timedelta(seconds=3))
    assert evt.is_set()
Exemple #6
0
def test_signal():
    proc = AsyncProcess(target=exit_with_signal, args=(signal.SIGINT, ))
    proc.daemon = True
    assert not proc.is_alive()
    assert proc.exitcode is None

    yield proc.start()
    yield proc.join(timeout=3.0)

    assert not proc.is_alive()
    # Can be 255 with forkserver, see https://bugs.python.org/issue30589
    assert proc.exitcode in (-signal.SIGINT, 255)

    proc = AsyncProcess(target=wait)
    yield proc.start()
    os.kill(proc.pid, signal.SIGTERM)
    yield proc.join(timeout=3.0)

    assert not proc.is_alive()
    assert proc.exitcode in (-signal.SIGTERM, 255)
def test_signal():
    proc = AsyncProcess(target=exit_with_signal, args=(signal.SIGINT,))
    proc.daemon = True
    assert not proc.is_alive()
    assert proc.exitcode is None

    yield proc.start()
    yield proc.join(timeout=3.0)

    assert not proc.is_alive()
    # Can be 255 with forkserver, see https://bugs.python.org/issue30589
    assert proc.exitcode in (-signal.SIGINT, 255)

    proc = AsyncProcess(target=wait)
    yield proc.start()
    os.kill(proc.pid, signal.SIGTERM)
    yield proc.join(timeout=3.0)

    assert not proc.is_alive()
    assert proc.exitcode in (-signal.SIGTERM, 255)
def test_exit_callback():
    to_child = mp_context.Queue()
    from_child = mp_context.Queue()
    evt = Event()

    @gen.coroutine
    def on_stop(_proc):
        assert _proc is proc
        yield gen.moment
        evt.set()

    # Normal process exit
    proc = AsyncProcess(target=feed, args=(to_child, from_child))
    evt.clear()
    proc.set_exit_callback(on_stop)
    proc.daemon = True

    yield proc.start()
    yield gen.sleep(0.05)
    assert proc.is_alive()
    assert not evt.is_set()

    to_child.put(None)
    yield evt.wait(timedelta(seconds=3))
    assert evt.is_set()
    assert not proc.is_alive()

    # Process terminated
    proc = AsyncProcess(target=wait)
    evt.clear()
    proc.set_exit_callback(on_stop)
    proc.daemon = True

    yield proc.start()
    yield gen.sleep(0.05)
    assert proc.is_alive()
    assert not evt.is_set()

    yield proc.terminate()
    yield evt.wait(timedelta(seconds=3))
    assert evt.is_set()
async def test_num_fds():
    # Warm up
    proc = AsyncProcess(target=exit_now)
    proc.daemon = True
    await proc.start()
    await proc.join()

    p = psutil.Process()
    before = p.num_fds()

    proc = AsyncProcess(target=exit_now)
    proc.daemon = True
    await proc.start()
    await proc.join()
    assert not proc.is_alive()
    assert proc.exitcode == 0

    while p.num_fds() > before:
        await asyncio.sleep(0.01)
Exemple #10
0
def test_num_fds():
    psutil = pytest.importorskip('psutil')

    # Warm up
    proc = AsyncProcess(target=exit_now)
    proc.daemon = True
    yield proc.start()
    yield proc.join()

    p = psutil.Process()
    before = p.num_fds()

    proc = AsyncProcess(target=exit_now)
    proc.daemon = True
    yield proc.start()
    yield proc.join()
    assert not proc.is_alive()
    assert proc.exitcode == 0

    start = time()
    while p.num_fds() > before:
        yield gen.sleep(0.1)
        print("fds:", before, p.num_fds())
        assert time() < start + 10
def test_num_fds():
    psutil = pytest.importorskip('psutil')

    # Warm up
    proc = AsyncProcess(target=exit_now)
    proc.daemon = True
    yield proc.start()
    yield proc.join()

    p = psutil.Process()
    before = p.num_fds()

    proc = AsyncProcess(target=exit_now)
    proc.daemon = True
    yield proc.start()
    yield proc.join()
    assert not proc.is_alive()
    assert proc.exitcode == 0

    start = time()
    while p.num_fds() > before:
        yield gen.sleep(0.1)
        print("fds:", before, p.num_fds())
        assert time() < start + 10
Exemple #12
0
def test_simple():
    to_child = mp_context.Queue()
    from_child = mp_context.Queue()

    proc = AsyncProcess(target=feed, args=(to_child, from_child))
    assert not proc.is_alive()
    assert proc.pid is None
    assert proc.exitcode is None
    assert not proc.daemon
    proc.daemon = True
    assert proc.daemon

    wr1 = weakref.ref(proc)
    wr2 = weakref.ref(proc._process)

    # join() before start()
    with pytest.raises(AssertionError):
        yield proc.join()

    yield proc.start()
    assert proc.is_alive()
    assert proc.pid is not None
    assert proc.exitcode is None

    t1 = time()
    yield proc.join(timeout=0.02)
    dt = time() - t1
    assert 0.2 >= dt >= 0.01
    assert proc.is_alive()
    assert proc.pid is not None
    assert proc.exitcode is None

    # setting daemon attribute after start()
    with pytest.raises(AssertionError):
        proc.daemon = False

    to_child.put(5)
    assert from_child.get() == 5

    # child should be stopping now
    t1 = time()
    yield proc.join(timeout=10)
    dt = time() - t1
    assert dt <= 1.0
    assert not proc.is_alive()
    assert proc.pid is not None
    assert proc.exitcode == 0

    # join() again
    t1 = time()
    yield proc.join()
    dt = time() - t1
    assert dt <= 0.6

    del proc
    gc.collect()
    if wr1() is not None:
        # Help diagnosing
        from types import FrameType
        p = wr1()
        if p is not None:
            rc = sys.getrefcount(p)
            refs = gc.get_referrers(p)
            del p
            print("refs to proc:", rc, refs)
            frames = [r for r in refs if isinstance(r, FrameType)]
            for i, f in enumerate(frames):
                print("frames #%d:" % i, f.f_code.co_name,
                      f.f_code.co_filename, sorted(f.f_locals))
        pytest.fail("AsyncProcess should have been destroyed")
    t1 = time()
    while wr2() is not None:
        yield gen.sleep(0.01)
        gc.collect()
        dt = time() - t1
        assert dt < 2.0
Exemple #13
0
class WorkerProcess:
    running: asyncio.Event
    stopped: asyncio.Event

    # The interval how often to check the msg queue for init
    _init_msg_interval = 0.05

    def __init__(
        self,
        worker_kwargs,
        worker_start_args,
        silence_logs,
        on_exit,
        worker,
        env,
        config,
    ):
        self.status = Status.init
        self.silence_logs = silence_logs
        self.worker_kwargs = worker_kwargs
        self.worker_start_args = worker_start_args
        self.on_exit = on_exit
        self.process = None
        self.Worker = worker
        self.env = env
        self.config = config

        # Initialized when worker is ready
        self.worker_dir = None
        self.worker_address = None

    async def start(self) -> Status:
        """
        Ensure the worker process is started.
        """
        enable_proctitle_on_children()
        if self.status == Status.running:
            return self.status
        if self.status == Status.starting:
            await self.running.wait()
            return self.status

        self.init_result_q = init_q = mp_context.Queue()
        self.child_stop_q = mp_context.Queue()
        uid = uuid.uuid4().hex

        self.process = AsyncProcess(
            target=self._run,
            name="Dask Worker process (from Nanny)",
            kwargs=dict(
                worker_kwargs=self.worker_kwargs,
                worker_start_args=self.worker_start_args,
                silence_logs=self.silence_logs,
                init_result_q=self.init_result_q,
                child_stop_q=self.child_stop_q,
                uid=uid,
                Worker=self.Worker,
                env=self.env,
                config=self.config,
            ),
        )
        self.process.daemon = dask.config.get("distributed.worker.daemon",
                                              default=True)
        self.process.set_exit_callback(self._on_exit)
        self.running = asyncio.Event()
        self.stopped = asyncio.Event()
        self.status = Status.starting

        try:
            await self.process.start()
        except OSError:
            logger.exception("Nanny failed to start process", exc_info=True)
            self.process.terminate()
            self.status = Status.failed
            return self.status
        try:
            msg = await self._wait_until_connected(uid)
        except Exception:
            self.status = Status.failed
            self.process.terminate()
            raise
        if not msg:
            return self.status
        self.worker_address = msg["address"]
        self.worker_dir = msg["dir"]
        assert self.worker_address
        self.status = Status.running
        self.running.set()

        init_q.close()

        return self.status

    def _on_exit(self, proc):
        if proc is not self.process:
            # Ignore exit of old process instance
            return
        self.mark_stopped()

    def _death_message(self, pid, exitcode):
        assert exitcode is not None
        if exitcode == 255:
            return "Worker process %d was killed by unknown signal" % (pid, )
        elif exitcode >= 0:
            return "Worker process %d exited with status %d" % (pid, exitcode)
        else:
            return "Worker process %d was killed by signal %d" % (pid,
                                                                  -exitcode)

    def is_alive(self):
        return self.process is not None and self.process.is_alive()

    @property
    def pid(self):
        return self.process.pid if self.process and self.process.is_alive(
        ) else None

    def mark_stopped(self):
        if self.status != Status.stopped:
            r = self.process.exitcode
            assert r is not None
            if r != 0:
                msg = self._death_message(self.process.pid, r)
                logger.info(msg)
            self.status = Status.stopped
            self.stopped.set()
            # Release resources
            self.process.close()
            self.init_result_q = None
            self.child_stop_q = None
            self.process = None
            # Best effort to clean up worker directory
            if self.worker_dir and os.path.exists(self.worker_dir):
                shutil.rmtree(self.worker_dir, ignore_errors=True)
            self.worker_dir = None
            # User hook
            if self.on_exit is not None:
                self.on_exit(r)

    async def kill(self, timeout: float = 2, executor_wait: bool = True):
        """
        Ensure the worker process is stopped, waiting at most
        *timeout* seconds before terminating it abruptly.
        """
        deadline = time() + timeout

        if self.status == Status.stopped:
            return
        if self.status == Status.stopping:
            await self.stopped.wait()
            return
        assert self.status in (Status.starting, Status.running)
        self.status = Status.stopping

        process = self.process
        self.child_stop_q.put({
            "op": "stop",
            "timeout": max(0, deadline - time()) * 0.8,
            "executor_wait": executor_wait,
        })
        await asyncio.sleep(0)  # otherwise we get broken pipe errors
        self.child_stop_q.close()

        while process.is_alive() and time() < deadline:
            await asyncio.sleep(0.05)

        if process.is_alive():
            logger.warning(
                f"Worker process still alive after {timeout} seconds, killing")
            try:
                await process.terminate()
            except Exception as e:
                logger.error("Failed to kill worker process: %s", e)

    async def _wait_until_connected(self, uid):
        while True:
            if self.status != Status.starting:
                return
            # This is a multiprocessing queue and we'd block the event loop if
            # we simply called get
            try:
                msg = self.init_result_q.get_nowait()
            except Empty:
                await asyncio.sleep(self._init_msg_interval)
                continue

            if msg["uid"] != uid:  # ensure that we didn't cross queues
                continue

            if "exception" in msg:
                logger.error("Failed while trying to start worker process: %s",
                             msg["exception"])
                raise msg["exception"]
            else:
                return msg

    @classmethod
    def _run(
        cls,
        worker_kwargs,
        worker_start_args,
        silence_logs,
        init_result_q,
        child_stop_q,
        uid,
        env,
        config,
        Worker,
    ):  # pragma: no cover
        try:
            os.environ.update(env)
            dask.config.set(config)
            try:
                from dask.multiprocessing import initialize_worker_process
            except ImportError:  # old Dask version
                pass
            else:
                initialize_worker_process()

            if silence_logs:
                logger.setLevel(silence_logs)

            IOLoop.clear_instance()
            loop = IOLoop()
            loop.make_current()
            worker = Worker(**worker_kwargs)

            async def do_stop(timeout=5, executor_wait=True):
                try:
                    await worker.close(
                        report=True,
                        nanny=False,
                        safe=True,  # TODO: Graceful or not?
                        executor_wait=executor_wait,
                        timeout=timeout,
                    )
                finally:
                    loop.stop()

            def watch_stop_q():
                """
                Wait for an incoming stop message and then stop the
                worker cleanly.
                """
                msg = child_stop_q.get()
                child_stop_q.close()
                assert msg.pop("op") == "stop"
                loop.add_callback(do_stop, **msg)

            t = threading.Thread(target=watch_stop_q,
                                 name="Nanny stop queue watch")
            t.daemon = True
            t.start()

            async def run():
                """
                Try to start worker and inform parent of outcome.
                """
                try:
                    await worker
                except Exception as e:
                    logger.exception("Failed to start worker")
                    init_result_q.put({"uid": uid, "exception": e})
                    init_result_q.close()
                    # If we hit an exception here we need to wait for a least
                    # one interval for the outside to pick up this message.
                    # Otherwise we arrive in a race condition where the process
                    # cleanup wipes the queue before the exception can be
                    # properly handled. See also
                    # WorkerProcess._wait_until_connected (the 2 is for good
                    # measure)
                    sync_sleep(cls._init_msg_interval * 2)
                else:
                    try:
                        assert worker.address
                    except ValueError:
                        pass
                    else:
                        init_result_q.put({
                            "address": worker.address,
                            "dir": worker.local_directory,
                            "uid": uid,
                        })
                        init_result_q.close()
                        await worker.finished()
                        logger.info("Worker closed")

        except Exception as e:
            logger.exception("Failed to initialize Worker")
            init_result_q.put({"uid": uid, "exception": e})
            init_result_q.close()
            # If we hit an exception here we need to wait for a least one
            # interval for the outside to pick up this message. Otherwise we
            # arrive in a race condition where the process cleanup wipes the
            # queue before the exception can be properly handled. See also
            # WorkerProcess._wait_until_connected (the 2 is for good measure)
            sync_sleep(cls._init_msg_interval * 2)
        else:
            try:
                loop.run_sync(run)
            except (TimeoutError, gen.TimeoutError):
                # Loop was stopped before wait_until_closed() returned, ignore
                pass
            except KeyboardInterrupt:
                # At this point the loop is not running thus we have to run
                # do_stop() explicitly.
                loop.run_sync(do_stop)
def test_simple():
    to_child = mp_context.Queue()
    from_child = mp_context.Queue()

    proc = AsyncProcess(target=feed, args=(to_child, from_child))
    assert not proc.is_alive()
    assert proc.pid is None
    assert proc.exitcode is None
    assert not proc.daemon
    proc.daemon = True
    assert proc.daemon

    wr1 = weakref.ref(proc)
    wr2 = weakref.ref(proc._process)

    # join() before start()
    with pytest.raises(AssertionError):
        yield proc.join()

    yield proc.start()
    assert proc.is_alive()
    assert proc.pid is not None
    assert proc.exitcode is None

    t1 = time()
    yield proc.join(timeout=0.02)
    dt = time() - t1
    assert 0.2 >= dt >= 0.01
    assert proc.is_alive()
    assert proc.pid is not None
    assert proc.exitcode is None

    # setting daemon attribute after start()
    with pytest.raises(AssertionError):
        proc.daemon = False

    to_child.put(5)
    assert from_child.get() == 5

    # child should be stopping now
    t1 = time()
    yield proc.join(timeout=10)
    dt = time() - t1
    assert dt <= 1.0
    assert not proc.is_alive()
    assert proc.pid is not None
    assert proc.exitcode == 0

    # join() again
    t1 = time()
    yield proc.join()
    dt = time() - t1
    assert dt <= 0.6

    del proc
    gc.collect()
    start = time()
    while wr1() is not None and time() < start + 1:
        # Perhaps the GIL switched before _watch_process() exit,
        # help it a little
        sleep(0.001)
        gc.collect()
    if wr1() is not None:
        # Help diagnosing
        from types import FrameType
        p = wr1()
        if p is not None:
            rc = sys.getrefcount(p)
            refs = gc.get_referrers(p)
            del p
            print("refs to proc:", rc, refs)
            frames = [r for r in refs if isinstance(r, FrameType)]
            for i, f in enumerate(frames):
                print("frames #%d:" % i,
                      f.f_code.co_name, f.f_code.co_filename, sorted(f.f_locals))
        pytest.fail("AsyncProcess should have been destroyed")
    t1 = time()
    while wr2() is not None:
        yield gen.sleep(0.01)
        gc.collect()
        dt = time() - t1
        assert dt < 2.0