def _test_workspace_concurrency(tmpdir, timeout, max_procs): """ WorkSpace concurrency test. We merely check that no exception or deadlock happens. """ base_dir = str(tmpdir) err_q = mp_context.Queue() purged_q = mp_context.Queue() stop_evt = mp_context.Event() ws = WorkSpace(base_dir) # Make sure purging only happens in the child processes ws._purge_leftovers = lambda: None # Run a bunch of child processes that will try to purge concurrently NPROCS = 2 if sys.platform == "win32" else max_procs processes = [ mp_context.Process( target=_workspace_concurrency, args=(base_dir, purged_q, err_q, stop_evt) ) for i in range(NPROCS) ] for p in processes: p.start() n_created = 0 n_purged = 0 try: t1 = time() while time() - t1 < timeout: # Add a bunch of locks, and simulate forgetting them. # The concurrent processes should try to purge them. for i in range(50): d = ws.new_work_dir(prefix="workspace-concurrency-") d._finalizer.detach() n_created += 1 sleep(1e-2) finally: stop_evt.set() for p in processes: p.join() # Any errors? try: err = err_q.get_nowait() except Empty: pass else: raise err try: while True: n_purged += purged_q.get_nowait() except Empty: pass # We attempted to purge most directories at some point assert n_purged >= 0.5 * n_created > 0 return n_created, n_purged
def _test_workspace_concurrency(tmpdir, timeout, max_procs): base_dir = str(tmpdir) err_q = mp_context.Queue() purged_q = mp_context.Queue() stop_evt = mp_context.Event() ws = WorkSpace(base_dir) # Make sure purging only happens in the child processes ws._purge_leftovers = lambda: None NPROCS = 2 if sys.platform == 'win32' else max_procs processes = [ mp_context.Process(target=_workspace_concurrency, args=(base_dir, purged_q, err_q, stop_evt)) for i in range(NPROCS) ] for p in processes: p.start() n_created = 0 n_purged = 0 try: t1 = time() while time() - t1 < timeout: # Add a bunch of locks, and simulate forgetting them for i in range(50): d = ws.new_work_dir(prefix='workspace-concurrency-') d._finalizer.detach() n_created += 1 sleep(1e-2) finally: stop_evt.set() for p in processes: p.join() # Any errors? try: err = err_q.get_nowait() except Empty: pass else: raise err try: while True: n_purged += purged_q.get_nowait() except Empty: pass return n_created, n_purged
async def start(self): if self.status == "running": return self.status if self.status == "starting": await self.running.wait() return self.status self.init_result_q = init_q = mp_context.Queue() self.child_info_stop_q = mp_context.Queue() self.parent_info_q = mp_context.Queue() self.process = AsyncProcess( target=self._run, name="Dask CUDA Scheduler process", kwargs=dict( proc_cls=self.proc_cls, kwargs=self.kwargs, silence_logs=False, init_result_q=self.init_result_q, child_info_stop_q=self.child_info_stop_q, parent_info_q=self.parent_info_q, env=self.env, ), ) # self.process.daemon = dask.config.get("distributed.worker.daemon", default=True) self.process.set_exit_callback(self._on_exit) self.running = Event() self.stopped = Event() self.status = "starting" try: await self.process.start() except OSError: logger.exception("Failed to start CUDA Scheduler process", exc_info=True) self.process.terminate() return msg = await self._wait_until_started() if not msg: return self.status self.address = msg["address"] assert self.address self.status = "running" self.running.set() init_q.close() await super().start()
async def test_exit_callback(): to_child = mp_context.Queue() from_child = mp_context.Queue() evt = Event() # FIXME: this breaks if changed to async def... @gen.coroutine def on_stop(_proc): assert _proc is proc yield gen.moment evt.set() # Normal process exit proc = AsyncProcess(target=feed, args=(to_child, from_child)) evt.clear() proc.set_exit_callback(on_stop) proc.daemon = True await proc.start() await asyncio.sleep(0.05) assert proc.is_alive() assert not evt.is_set() to_child.put(None) await evt.wait(timedelta(seconds=3)) assert evt.is_set() assert not proc.is_alive() # Process terminated proc = AsyncProcess(target=wait) evt.clear() proc.set_exit_callback(on_stop) proc.daemon = True await proc.start() await asyncio.sleep(0.05) assert proc.is_alive() assert not evt.is_set() await proc.terminate() await evt.wait(timedelta(seconds=3)) assert evt.is_set()
def test_child_main_thread(): """ The main thread in the child should be called "MainThread". """ q = mp_context.Queue() proc = AsyncProcess(target=threads_info, args=(q, )) yield proc.start() yield proc.join() n_threads = q.get() main_name = q.get() assert n_threads == 1 assert main_name == "MainThread"
async def test_child_main_thread(): """ The main thread in the child should be called "MainThread". """ q = mp_context.Queue() proc = AsyncProcess(target=threads_info, args=(q, )) await proc.start() await proc.join() n_threads = q.get() main_name = q.get() assert n_threads <= 3 assert main_name == "MainThread" q.close() q._reader.close() q._writer.close()
def test_exitcode(): q = mp_context.Queue() proc = AsyncProcess(target=exit, kwargs={'q': q}) proc.daemon = True assert not proc.is_alive() assert proc.exitcode is None yield proc.start() assert proc.is_alive() assert proc.exitcode is None q.put(5) yield proc.join(timeout=3.0) assert not proc.is_alive() assert proc.exitcode == 5
def test_simple(): to_child = mp_context.Queue() from_child = mp_context.Queue() proc = AsyncProcess(target=feed, args=(to_child, from_child)) assert not proc.is_alive() assert proc.pid is None assert proc.exitcode is None assert not proc.daemon proc.daemon = True assert proc.daemon wr1 = weakref.ref(proc) wr2 = weakref.ref(proc._process) # join() before start() with pytest.raises(AssertionError): yield proc.join() yield proc.start() assert proc.is_alive() assert proc.pid is not None assert proc.exitcode is None t1 = time() yield proc.join(timeout=0.02) dt = time() - t1 assert 0.2 >= dt >= 0.01 assert proc.is_alive() assert proc.pid is not None assert proc.exitcode is None # setting daemon attribute after start() with pytest.raises(AssertionError): proc.daemon = False to_child.put(5) assert from_child.get() == 5 # child should be stopping now t1 = time() yield proc.join(timeout=10) dt = time() - t1 assert dt <= 1.0 assert not proc.is_alive() assert proc.pid is not None assert proc.exitcode == 0 # join() again t1 = time() yield proc.join() dt = time() - t1 assert dt <= 0.6 del proc gc.collect() if wr1() is not None: # Help diagnosing from types import FrameType p = wr1() if p is not None: rc = sys.getrefcount(p) refs = gc.get_referrers(p) del p print("refs to proc:", rc, refs) frames = [r for r in refs if isinstance(r, FrameType)] for i, f in enumerate(frames): print("frames #%d:" % i, f.f_code.co_name, f.f_code.co_filename, sorted(f.f_locals)) pytest.fail("AsyncProcess should have been destroyed") t1 = time() while wr2() is not None: yield gen.sleep(0.01) gc.collect() dt = time() - t1 assert dt < 2.0
async def start(self) -> Status: """ Ensure the worker process is started. """ enable_proctitle_on_children() if self.status == Status.running: return self.status if self.status == Status.starting: await self.running.wait() return self.status self.init_result_q = init_q = mp_context.Queue() self.child_stop_q = mp_context.Queue() uid = uuid.uuid4().hex self.process = AsyncProcess( target=self._run, name="Dask Worker process (from Nanny)", kwargs=dict( worker_kwargs=self.worker_kwargs, worker_start_args=self.worker_start_args, silence_logs=self.silence_logs, init_result_q=self.init_result_q, child_stop_q=self.child_stop_q, uid=uid, Worker=self.Worker, env=self.env, config=self.config, ), ) self.process.daemon = dask.config.get("distributed.worker.daemon", default=True) self.process.set_exit_callback(self._on_exit) self.running = asyncio.Event() self.stopped = asyncio.Event() self.status = Status.starting try: await self.process.start() except OSError: logger.exception("Nanny failed to start process", exc_info=True) self.process.terminate() self.status = Status.failed return self.status try: msg = await self._wait_until_connected(uid) except Exception: self.status = Status.failed self.process.terminate() raise if not msg: return self.status self.worker_address = msg["address"] self.worker_dir = msg["dir"] assert self.worker_address self.status = Status.running self.running.set() init_q.close() return self.status
def test_workspace_concurrency(tmpdir): """WorkSpace concurrency test. We merely check that no exception or deadlock happens. """ base_dir = str(tmpdir) err_q = mp_context.Queue() purged_q = mp_context.Queue() stop_evt = mp_context.Event() ws = WorkSpace(base_dir) # Make sure purging only happens in the child processes ws._purge_leftovers = lambda: None # Windows (or at least Windows GitHub CI) has been observed to be exceptionally # slow. Don't stress it too much. max_procs = 2 if WINDOWS else 16 # Run a bunch of child processes that will try to purge concurrently barrier = mp_context.Barrier(parties=max_procs + 1) processes = [ mp_context.Process( target=_workspace_concurrency, args=(base_dir, purged_q, err_q, stop_evt, barrier), ) for _ in range(max_procs) ] for p in processes: p.start() barrier.wait() n_created = 0 n_purged = 0 t1 = time() try: # On Linux, you will typically end with n_created > 10.000 # On Windows, it can take 60 seconds to create 50 locks! while time() - t1 < 10: # Add a bunch of locks and simulate forgetting them. # The concurrent processes should try to purge them. for _ in range(100): d = ws.new_work_dir(prefix="workspace-concurrency-") d._finalizer.detach() n_created += 1 finally: stop_evt.set() for p in processes: p.join() # Any errors? try: err = err_q.get_nowait() except queue.Empty: pass else: raise err try: while True: n_purged += purged_q.get_nowait() except queue.Empty: pass # We attempted to purge most directories at some point assert n_purged >= 0.5 * n_created > 0