def test_child_main_thread(): """ The main thread in the child should be called "MainThread". """ q = mp_context.Queue() proc = AsyncProcess(target=threads_info, args=(q,)) yield proc.start() yield proc.join() n_threads = q.get() main_name = q.get() assert n_threads <= 3 assert main_name == "MainThread" q.close() q._reader.close() q._writer.close()
def parent_process_coroutine(): worker_ready = mp_context.Event() worker = AsyncProcess(target=_worker_process, args=(worker_ready, child_pipe)) yield worker.start() # Wait for the child process to have started. worker_ready.wait() # Exit immediately, without doing any process teardown (including atexit # and 'finally:' blocks) as if by SIGKILL. This should cause # worker_process to also exit. os._exit(255)
def test_signal(): proc = AsyncProcess(target=exit_with_signal, args=(signal.SIGINT,)) proc.daemon = True assert not proc.is_alive() assert proc.exitcode is None yield proc.start() yield proc.join(timeout=3.0) assert not proc.is_alive() # Can be 255 with forkserver, see https://bugs.python.org/issue30589 assert proc.exitcode in (-signal.SIGINT, 255) proc = AsyncProcess(target=wait) yield proc.start() os.kill(proc.pid, signal.SIGTERM) yield proc.join(timeout=3.0) assert not proc.is_alive() assert proc.exitcode in (-signal.SIGTERM, 255)
def test_terminate(): proc = AsyncProcess(target=wait) proc.daemon = True yield proc.start() yield proc.terminate() yield proc.join(timeout=3.0) assert not proc.is_alive() assert proc.exitcode in (-signal.SIGTERM, 255)
def test_exitcode(): q = mp_context.Queue() proc = AsyncProcess(target=exit, kwargs={'q': q}) proc.daemon = True assert not proc.is_alive() assert proc.exitcode is None yield proc.start() assert proc.is_alive() assert proc.exitcode is None q.put(5) yield proc.join(timeout=3.0) assert not proc.is_alive() assert proc.exitcode == 5
def test_num_fds(): psutil = pytest.importorskip('psutil') # Warm up proc = AsyncProcess(target=exit_now) proc.daemon = True yield proc.start() yield proc.join() p = psutil.Process() before = p.num_fds() proc = AsyncProcess(target=exit_now) proc.daemon = True yield proc.start() yield proc.join() assert not proc.is_alive() assert proc.exitcode == 0 start = time() while p.num_fds() > before: yield gen.sleep(0.1) print("fds:", before, p.num_fds()) assert time() < start + 10
def test_close(): proc = AsyncProcess(target=exit_now) proc.close() with pytest.raises(ValueError): yield proc.start() proc = AsyncProcess(target=exit_now) yield proc.start() proc.close() with pytest.raises(ValueError): yield proc.terminate() proc = AsyncProcess(target=exit_now) yield proc.start() yield proc.join() proc.close() with pytest.raises(ValueError): yield proc.join() proc.close()
def test_signal(): proc = AsyncProcess(target=exit_with_signal, args=(signal.SIGINT, )) proc.daemon = True assert not proc.is_alive() assert proc.exitcode is None yield proc.start() yield proc.join(timeout=3.0) assert not proc.is_alive() # Can be 255 with forkserver, see https://bugs.python.org/issue30589 assert proc.exitcode in (-signal.SIGINT, 255) proc = AsyncProcess(target=wait) yield proc.start() os.kill(proc.pid, signal.SIGTERM) yield proc.join(timeout=3.0) assert not proc.is_alive() assert proc.exitcode in (-signal.SIGTERM, 255)
def test_simple(): to_child = mp_context.Queue() from_child = mp_context.Queue() proc = AsyncProcess(target=feed, args=(to_child, from_child)) assert not proc.is_alive() assert proc.pid is None assert proc.exitcode is None assert not proc.daemon proc.daemon = True assert proc.daemon wr1 = weakref.ref(proc) wr2 = weakref.ref(proc._process) # join() before start() with pytest.raises(AssertionError): yield proc.join() yield proc.start() assert proc.is_alive() assert proc.pid is not None assert proc.exitcode is None t1 = time() yield proc.join(timeout=0.02) dt = time() - t1 assert 0.2 >= dt >= 0.01 assert proc.is_alive() assert proc.pid is not None assert proc.exitcode is None # setting daemon attribute after start() with pytest.raises(AssertionError): proc.daemon = False to_child.put(5) assert from_child.get() == 5 # child should be stopping now t1 = time() yield proc.join(timeout=10) dt = time() - t1 assert dt <= 1.0 assert not proc.is_alive() assert proc.pid is not None assert proc.exitcode == 0 # join() again t1 = time() yield proc.join() dt = time() - t1 assert dt <= 0.6 del proc gc.collect() start = time() while wr1() is not None and time() < start + 1: # Perhaps the GIL switched before _watch_process() exit, # help it a little sleep(0.001) gc.collect() if wr1() is not None: # Help diagnosing from types import FrameType p = wr1() if p is not None: rc = sys.getrefcount(p) refs = gc.get_referrers(p) del p print("refs to proc:", rc, refs) frames = [r for r in refs if isinstance(r, FrameType)] for i, f in enumerate(frames): print("frames #%d:" % i, f.f_code.co_name, f.f_code.co_filename, sorted(f.f_locals)) pytest.fail("AsyncProcess should have been destroyed") t1 = time() while wr2() is not None: yield gen.sleep(0.01) gc.collect() dt = time() - t1 assert dt < 2.0
def test_terminate_after_stop(): proc = AsyncProcess(target=sleep, args=(0, )) yield proc.start() yield gen.sleep(0.1) yield proc.terminate()
async def test_terminate_after_stop(): proc = AsyncProcess(target=sleep, args=(0, )) await proc.start() await asyncio.sleep(0.1) await proc.terminate()
class Scheduler(ProcessInterface): def __init__(self, env=None, *args, **kwargs): super().__init__() self.args = args self.kwargs = kwargs self.proc_cls = _Scheduler self.process = None self.env = env or {} def __repr__(self): self.child_info_stop_q.put({"op": "info"}) try: msg = self.parent_info_q.get(timeout=3000) except Empty: pass else: assert msg.pop("op") == "info" return "<Scheduler: '%s' processes: %d cores: %d>" % ( self.address, msg.pop("workers"), msg.pop("total_nthreads"), ) async def _wait_until_started(self): delay = 0.05 while True: if self.status != "starting": return try: msg = self.init_result_q.get_nowait() except Empty: await gen.sleep(delay) continue if "exception" in msg: logger.error( "Failed while trying to start scheduler process: %s", msg["exception"], ) await self.process.join() raise msg else: return msg async def start(self): if self.status == "running": return self.status if self.status == "starting": await self.running.wait() return self.status self.init_result_q = init_q = mp_context.Queue() self.child_info_stop_q = mp_context.Queue() self.parent_info_q = mp_context.Queue() self.process = AsyncProcess( target=self._run, name="Dask CUDA Scheduler process", kwargs=dict( proc_cls=self.proc_cls, kwargs=self.kwargs, silence_logs=False, init_result_q=self.init_result_q, child_info_stop_q=self.child_info_stop_q, parent_info_q=self.parent_info_q, env=self.env, ), ) # self.process.daemon = dask.config.get("distributed.worker.daemon", default=True) self.process.set_exit_callback(self._on_exit) self.running = Event() self.stopped = Event() self.status = "starting" try: await self.process.start() except OSError: logger.exception("Failed to start CUDA Scheduler process", exc_info=True) self.process.terminate() return msg = await self._wait_until_started() if not msg: return self.status self.address = msg["address"] assert self.address self.status = "running" self.running.set() init_q.close() await super().start() def _on_exit(self, proc): if proc is not self.process: return self.mark_stopped() def _death_message(self, pid, exitcode): assert exitcode is not None if exitcode == 255: return "Scheduler process %d was killed by unknown signal" % ( pid, ) elif exitcode >= 0: return "Scheduler process %d exited with status %d" % (pid, exitcode) else: return "Scheduler process %d was killed by signal %d" % (pid, -exitcode) def mark_stopped(self): if self.status != "stopped": r = self.process.exitcode assert r is not None if r != 0: msg = self._death_message(self.process.pid, r) logger.info(msg) self.status = "stopped" self.stopped.set() # Release resources self.process.close() self.init_result_q = None self.child_info_stop_q = None self.parent_info_q = None self.process = None async def close(self): timeout = 2 loop = IOLoop.current() deadline = loop.time() + timeout if self.status == "closing": await self.finished() assert self.status == "closed" if self.status == "closed": return try: if self.process is not None: #await self.kill() process = self.process self.child_info_stop_q.put({ "op": "stop", "timeout": max(0, deadline - loop.time()) * 0.8, }) self.child_info_stop_q.close() self.parent_info_q.close() while process.is_alive() and loop.time() < deadline: await gen.sleep(0.05) if process.is_alive(): logger.warning( "Scheduler process still alive after %d seconds, killing", timeout) try: await process.terminate() except Exception as e: logger.error("Failed to kill scheduler process: %s", e) except Exception: pass self.process = None self.status = "closed" await super().close() @classmethod def _run( cls, silence_logs, init_result_q, child_info_stop_q, parent_info_q, proc_cls, kwargs, env, ): # pragma: no cover os.environ.update(env) if silence_logs: logger.setLevel(silence_logs) IOLoop.clear_instance() loop = IOLoop() loop.make_current() scheduler = proc_cls(**kwargs) async def do_stop(timeout=5): try: await scheduler.close(comm=None, fast=False, close_workers=False) finally: loop.stop() def watch_stop_q(): """ Wait for an incoming stop message and then stop the scheduler cleanly. """ while True: try: msg = child_info_stop_q.get(timeout=1000) except Empty: pass else: op = msg.pop("op") assert op == "stop" or op == "info" if op == "stop": child_info_stop_q.close() loop.add_callback(do_stop, **msg) break elif op == "info": parent_info_q.put({ "op": "info", "workers": len(scheduler.workers), "total_nthreads": scheduler.total_nthreads, }) t = threading.Thread(target=watch_stop_q, name="Scheduler stop queue watch") t.daemon = True t.start() async def run(): """ Try to start scheduler and inform parent of outcome. """ try: await scheduler.start() except Exception as e: logger.exception("Failed to start scheduler") init_result_q.put({"exception": e}) init_result_q.close() else: try: assert scheduler.address except ValueError: pass else: init_result_q.put({"address": scheduler.address}) init_result_q.close() await scheduler.finished() logger.info("Scheduler closed") try: loop.run_sync(run) except TimeoutError: # Loop was stopped before wait_until_closed() returned, ignore pass except KeyboardInterrupt: pass
def test_terminate_after_stop(): proc = AsyncProcess(target=sleep, args=(0,)) yield proc.start() yield gen.sleep(0.1) yield proc.terminate()
def test_exit_callback(): to_child = mp_context.Queue() from_child = mp_context.Queue() evt = Event() @gen.coroutine def on_stop(_proc): assert _proc is proc yield gen.moment evt.set() # Normal process exit proc = AsyncProcess(target=feed, args=(to_child, from_child)) evt.clear() proc.set_exit_callback(on_stop) proc.daemon = True yield proc.start() yield gen.sleep(0.05) assert proc.is_alive() assert not evt.is_set() to_child.put(None) yield evt.wait(timedelta(seconds=3)) assert evt.is_set() assert not proc.is_alive() # Process terminated proc = AsyncProcess(target=wait) evt.clear() proc.set_exit_callback(on_stop) proc.daemon = True yield proc.start() yield gen.sleep(0.05) assert proc.is_alive() assert not evt.is_set() yield proc.terminate() yield evt.wait(timedelta(seconds=3)) assert evt.is_set()
def test_close(): proc = AsyncProcess(target=exit_now) proc.close() with pytest.raises(ValueError): yield proc.start() proc = AsyncProcess(target=exit_now) yield proc.start() proc.close() with pytest.raises(ValueError): yield proc.terminate() proc = AsyncProcess(target=exit_now) yield proc.start() yield proc.join() proc.close() with pytest.raises(ValueError): yield proc.join() proc.close()
def test_exit_callback(): to_child = mp_context.Queue() from_child = mp_context.Queue() evt = Event() @gen.coroutine def on_stop(_proc): assert _proc is proc yield gen.moment evt.set() # Normal process exit proc = AsyncProcess(target=feed, args=(to_child, from_child)) evt.clear() proc.set_exit_callback(on_stop) proc.daemon = True yield proc.start() yield gen.sleep(0.05) assert proc.is_alive() assert not evt.is_set() to_child.put(None) yield evt.wait(timedelta(seconds=3)) assert evt.is_set() assert not proc.is_alive() # Process terminated proc = AsyncProcess(target=wait) evt.clear() proc.set_exit_callback(on_stop) proc.daemon = True yield proc.start() yield gen.sleep(0.05) assert proc.is_alive() assert not evt.is_set() yield proc.terminate() yield evt.wait(timedelta(seconds=3)) assert evt.is_set()
def test_num_fds(): psutil = pytest.importorskip("psutil") # Warm up proc = AsyncProcess(target=exit_now) proc.daemon = True yield proc.start() yield proc.join() p = psutil.Process() before = p.num_fds() proc = AsyncProcess(target=exit_now) proc.daemon = True yield proc.start() yield proc.join() assert not proc.is_alive() assert proc.exitcode == 0 start = time() while p.num_fds() > before: yield gen.sleep(0.1) print("fds:", before, p.num_fds()) assert time() < start + 10
def test_simple(): to_child = mp_context.Queue() from_child = mp_context.Queue() proc = AsyncProcess(target=feed, args=(to_child, from_child)) assert not proc.is_alive() assert proc.pid is None assert proc.exitcode is None assert not proc.daemon proc.daemon = True assert proc.daemon wr1 = weakref.ref(proc) wr2 = weakref.ref(proc._process) # join() before start() with pytest.raises(AssertionError): yield proc.join() yield proc.start() assert proc.is_alive() assert proc.pid is not None assert proc.exitcode is None t1 = time() yield proc.join(timeout=0.02) dt = time() - t1 assert 0.2 >= dt >= 0.01 assert proc.is_alive() assert proc.pid is not None assert proc.exitcode is None # setting daemon attribute after start() with pytest.raises(AssertionError): proc.daemon = False to_child.put(5) assert from_child.get() == 5 # child should be stopping now t1 = time() yield proc.join(timeout=10) dt = time() - t1 assert dt <= 1.0 assert not proc.is_alive() assert proc.pid is not None assert proc.exitcode == 0 # join() again t1 = time() yield proc.join() dt = time() - t1 assert dt <= 0.6 del proc gc.collect() start = time() while wr1() is not None and time() < start + 1: # Perhaps the GIL switched before _watch_process() exit, # help it a little sleep(0.001) gc.collect() if wr1() is not None: # Help diagnosing from types import FrameType p = wr1() if p is not None: rc = sys.getrefcount(p) refs = gc.get_referrers(p) del p print("refs to proc:", rc, refs) frames = [r for r in refs if isinstance(r, FrameType)] for i, f in enumerate(frames): print("frames #%d:" % i, f.f_code.co_name, f.f_code.co_filename, sorted(f.f_locals)) pytest.fail("AsyncProcess should have been destroyed") t1 = time() while wr2() is not None: yield gen.sleep(0.01) gc.collect() dt = time() - t1 assert dt < 2.0