def coro(): s, ws = yield start_cluster( ncores, scheduler, loop, security=security, Worker=Worker, scheduler_kwargs=scheduler_kwargs, worker_kwargs=worker_kwargs) workers[:] = ws args = [s] + workers if client: c = yield Client(s.address, loop=loop, security=security, asynchronous=True) args = [c] + args try: result = yield func(*args) # for w in workers: # assert not w._comms finally: if client: yield c._close() yield end_cluster(s, workers) _globals.clear() _globals.update(old_globals) raise gen.Return(result)
def coro(): for i in range(5): try: s, ws = yield start_cluster( ncores, scheduler, loop, security=security, Worker=Worker, scheduler_kwargs=scheduler_kwargs, worker_kwargs=worker_kwargs) except Exception: logger.error("Failed to start gen_cluster, retryng") else: break workers[:] = ws args = [s] + workers if client: c = yield Client(s.address, loop=loop, security=security, asynchronous=True) args = [c] + args try: result = yield func(*args) if s.validate: s.validate_state() finally: if client: yield c._close() yield end_cluster(s, workers) _globals.clear() _globals.update(old_globals) raise gen.Return(result)
def coro(): for i in range(5): try: s, ws = yield start_cluster( ncores, scheduler, loop, security=security, Worker=Worker, scheduler_kwargs=scheduler_kwargs, worker_kwargs=worker_kwargs) except Exception as e: logger.error( "Failed to start gen_cluster, retryng", exc_info=True) else: break workers[:] = ws args = [s] + workers if client: c = yield Client(s.address, loop=loop, security=security, asynchronous=True) args = [c] + args try: future = func(*args) if timeout: future = gen.with_timeout( timedelta(seconds=timeout), future) result = yield future if s.validate: s.validate_state() finally: if client: yield c._close(fast=s.status == 'closed') yield end_cluster(s, workers) yield gen.with_timeout(timedelta(seconds=1), cleanup_global_workers()) _globals.clear() _globals.update(old_globals) raise gen.Return(result)
def cluster(nworkers=2, nanny=False, worker_kwargs={}, active_rpc_timeout=1, scheduler_kwargs={}): ws = weakref.WeakSet() old_globals = _globals.copy() for name, level in logging_levels.items(): logging.getLogger(name).setLevel(level) with pristine_loop() as loop: with check_active_rpc(loop, active_rpc_timeout): if nanny: _run_worker = run_nanny else: _run_worker = run_worker # The scheduler queue will receive the scheduler's address scheduler_q = mp_context.Queue() # Launch scheduler scheduler = mp_context.Process(target=run_scheduler, args=(scheduler_q, nworkers + 1), kwargs=scheduler_kwargs) ws.add(scheduler) scheduler.daemon = True scheduler.start() # Launch workers workers = [] for i in range(nworkers): q = mp_context.Queue() fn = '_test_worker-%s' % uuid.uuid4() kwargs = merge( { 'ncores': 1, 'local_dir': fn, 'memory_limit': TOTAL_MEMORY }, worker_kwargs) proc = mp_context.Process(target=_run_worker, args=(q, scheduler_q), kwargs=kwargs) ws.add(proc) workers.append({'proc': proc, 'queue': q, 'dir': fn}) for worker in workers: worker['proc'].start() for worker in workers: worker['address'] = worker['queue'].get() saddr = scheduler_q.get() start = time() try: with rpc(saddr) as s: while True: ncores = loop.run_sync(s.ncores) if len(ncores) == nworkers: break if time() - start > 5: raise Exception("Timeout on cluster creation") # avoid sending processes down to function yield { 'address': saddr }, [{ 'address': w['address'], 'proc': weakref.ref(w['proc']) } for w in workers] finally: logger.debug("Closing out test cluster") loop.run_sync(lambda: disconnect_all( [w['address'] for w in workers], timeout=0.5)) loop.run_sync(lambda: disconnect(saddr, timeout=0.5)) scheduler.terminate() scheduler_q.close() scheduler_q._reader.close() scheduler_q._writer.close() for w in workers: w['proc'].terminate() w['queue'].close() w['queue']._reader.close() w['queue']._writer.close() scheduler.join(2) del scheduler for proc in [w['proc'] for w in workers]: proc.join(timeout=2) with ignoring(UnboundLocalError): del worker, w, proc del workers[:] for fn in glob('_test_worker-*'): shutil.rmtree(fn) _globals.clear() _globals.update(old_globals) assert not ws