def executor(kind: str, max_workers: int, daemon=True) -> typing.Iterator[Executor]: """General purpose utility to get an executor with its as_completed handler This allows us to easily use other executors as needed. """ if kind == "thread": with ThreadPoolExecutor(max_workers=max_workers) as pool_t: yield pool_t elif kind == "process": with ProcessPoolExecutor(max_workers=max_workers) as pool_p: yield pool_p elif kind in ["dask", "dask-process", "dask-thread"]: import dask import distributed from distributed.cfexecutor import ClientExecutor processes = kind == "dask" or kind == "dask-process" with dask.config.set({"distributed.worker.daemon": daemon}): with distributed.LocalCluster( n_workers=max_workers, processes=processes, ) as cluster: with distributed.Client(cluster) as client: yield ClientExecutor(client) else: raise NotImplementedError("That kind is not implemented")
def test_retrieve() -> None: cluster = distributed.LocalCluster( ip='localhost:8786', # I want a bokeh interface to check progress dashboard_address='localhost:8787', # single process, single thread allows ctrl+C backtrace to # show where the code is getting stuck. Otherwise, it will say, # "I'm stuck waiting for other processes." It also makes # time_code more meaningful processes=False, threads_per_worker=1, ) # TODO: put this in a reusable module with distributed.Client(cluster): # disable the cache, because I don't want to persist these results # in the cloud for cached_func in [ retrieve.get_rfs, retrieve.get_paragraphs, retrieve.get_raw_forms, retrieve.get_indexes ]: assert isinstance(cached_func, Cache) cast(Cache, cached_func).disabled = True rfs = dask.bag.zip( # pylint: disable=unused-variable retrieve.get_indexes('10-K', 1995, 1), retrieve.get_rfs(1995, 1)).take(10, npartitions=1)
def external_client(): # Explicitly specify we want only 4 workers so that when running on # continuous integration we don't request too many. cluster = distributed.LocalCluster(n_workers=4) client = distributed.Client(cluster) yield client client.close() cluster.close()
def dask_client(): cluster = distributed.LocalCluster(n_workers=3, threads_per_worker=1) client = distributed.Client(cluster) yield client client.close() cluster.close()
def start(self, n_workers): import distributed self.cluster = distributed.LocalCluster(n_workers=n_workers, threads_per_worker=1, scheduler_port=self.port, diagnostics_port=None) self.ctx = DistributedContext(port=self.port) assert len(self.ctx.executor.ncores()) == n_workers
def dask_client(mock_service_envs: None) -> Iterable[distributed.Client]: print(pformat(dask.config.get("distributed"))) with distributed.LocalCluster( worker_class=distributed.Worker, **{ "resources": {"CPU": 10, "GPU": 10, "MPI": 1}, "preload": "simcore_service_dask_sidecar.tasks", }, ) as cluster: with distributed.Client(cluster) as client: yield client
def init_client(processes, max_memory): memory_limit = int(max_memory / processes) memory_limit = '{0:d}MB'.format(memory_limit) logger.info( 'Initialising client with {0:d} workers and {1:s} per worker'.format( processes, memory_limit)) cluster = distributed.LocalCluster(n_workers=processes, threads_per_worker=1, memory_limit=memory_limit, local_directory='/scratch/u/u300636') client = distributed.Client(cluster) logger.info('Initialised client: {0}'.format(client)) return client
def test_setup_executor_distributed(n_workers=1, threads_per_worker=2): cluster = distributed.LocalCluster(n_workers=n_workers, threads_per_worker=threads_per_worker) client = distributed.Client(cluster) address = cluster.scheduler.address test = executor.setup_executor(address) assert test.scheduler.address == cluster.scheduler.address assert client.scheduler_info() == test.scheduler_info() test.close() cluster.close() client.close()
def executor(kind, max_workers): """General purpose utility to get an executor with its as_completed handler This allows us to easily use other executors as needed. """ if kind == 'thread': with ThreadPoolExecutor(max_workers=max_workers) as pool: yield pool, as_completed elif kind == 'process': with ProcessPoolExecutor(max_workers=max_workers) as pool: yield pool, as_completed elif kind == 'dask': import distributed with distributed.LocalCluster(n_workers=max_workers) as cluster: with distributed.Client(cluster) as client: yield client, distributed.as_completed else: raise NotImplementedError('That kind is not implemented')
def test_use_plain_dask(hdf5_ds_1): # We deactivate the resource scheduling and run on a plain dask cluster hdf5_ds_1.set_num_cores(2) mask = _mk_random(size=(16, 16)) with hdf5_ds_1.get_reader().get_h5ds() as h5ds: data = h5ds[:] expected = _naive_mask_apply([mask], data) with dd.LocalCluster(n_workers=2, threads_per_worker=1) as cluster: client = dd.Client(cluster, set_as_default=False) try: executor = DaskJobExecutor(client=client) ctx = api.Context(executor=executor) analysis = ctx.create_mask_analysis( dataset=hdf5_ds_1, factories=[lambda: mask] ) results = ctx.run(analysis) udf_res = ctx.run_udf(udf=DebugDeviceUDF(), dataset=hdf5_ds_1) # Requesting CuPy, which is not available with pytest.raises(RuntimeError): _ = ctx.run_udf(udf=DebugDeviceUDF(backends=('cupy',)), dataset=hdf5_ds_1) finally: # to fix "distributed.client - ERROR - Failed to reconnect to scheduler after 10.00 seconds, closing client" # NOQA client.close() assert np.allclose( results.mask_0.raw_data, expected ) for val in udf_res['device_id'].data[0].values(): print(val) # no CUDA assert val["cuda"] is None # Default without worker setup assert val["cpu"] == 0 for val in udf_res['backend'].data[0].values(): print(val) # no CUDA assert 'numpy' in val assert np.all(udf_res['device_class'].data == 'cpu') assert np.allclose(udf_res['on_device'].data, data.sum(axis=(0, 1)))
def executor(kind: str, max_workers: int) -> typing.Iterator[Executor]: """General purpose utility to get an executor with its as_completed handler This allows us to easily use other executors as needed. """ if kind == "thread": with ThreadPoolExecutor(max_workers=max_workers) as pool_t: yield pool_t elif kind == "process": with ProcessPoolExecutor(max_workers=max_workers) as pool_p: yield pool_p elif kind == "dask": import distributed from distributed.cfexecutor import ClientExecutor with distributed.LocalCluster(n_workers=max_workers) as cluster: with distributed.Client(cluster) as client: yield ClientExecutor(client) else: raise NotImplementedError("That kind is not implemented")
def __init__(self, memory_per_worker=1024, procs_per_worker=1, pool=None, reserved_memory=None, schedd_name=None, threads_per_worker=1, cleanup_interval=1000, worker_timeout=(24 * 60 * 60), **kwargs): global _global_schedulers if schedd_name is None: self.schedd = htcondor.Schedd() else: collector = htcondor.Collector(pool) self.schedd = htcondor.Schedd( collector.locate(htcondor.DaemonTypes.Schedd, schedd_name)) self.local_cluster = distributed.LocalCluster(ip='', n_workers=0, **kwargs) _global_schedulers.append((self.scheduler.id, self.schedd)) self.jobs = {} # {jobid: CLASSAD} if cleanup_interval < 1: raise ValueError("cleanup_interval must be >= 1") self._cleanup_callback = tornado.ioloop.PeriodicCallback( callback=self.cleanup_jobs, callback_time=cleanup_interval, io_loop=self.scheduler.loop) self._cleanup_callback.start() self.memory_per_worker = memory_per_worker self.procs_per_worker = procs_per_worker self.threads_per_worker = threads_per_worker self.reserved_memory = reserved_memory self.worker_timeout = worker_timeout
def _exec_calcs(calcs, parallelize=False, client=None, **compute_kwargs): """Execute the given calculations. Parameters ---------- calcs : Sequence of ``aospy.Calc`` objects parallelize : bool, default False Whether to submit the calculations in parallel or not client : distributed.Client or None The distributed Client used if parallelize is set to True; if None a distributed LocalCluster is used. compute_kwargs : dict of keyword arguments passed to ``Calc.compute`` Returns ------- A list of the values returned by each Calc object that was executed. """ if parallelize: def func(calc): """Wrap _compute_or_skip_on_error to require only the calc argument""" if 'write_to_tar' in compute_kwargs: compute_kwargs['write_to_tar'] = False return _compute_or_skip_on_error(calc, compute_kwargs) if client is None: n_workers = _n_workers_for_local_cluster(calcs) with distributed.LocalCluster(n_workers=n_workers) as cluster: with distributed.Client(cluster) as client: result = _submit_calcs_on_client(calcs, client, func) else: result = _submit_calcs_on_client(calcs, client, func) if compute_kwargs['write_to_tar']: _serial_write_to_tar(calcs) return result else: return [ _compute_or_skip_on_error(calc, compute_kwargs) for calc in calcs ]
def __init__(self, memory_per_worker=1024, disk_per_worker=1048576, pool=None, schedd_name=None, threads_per_worker=1, update_interval=1000, worker_timeout=(24 * 60 * 60), scheduler_port=8786, worker_tarball=None, pre_script=None, transfer_files=None, logdir='.', logger=None, **kwargs): self.logger = logger or logging.getLogger(__name__) if 'procs_per_worker' in kwargs: self.logger.warning("Multiple processes and adaptive scaling" " don't mix; ignoring procs_per_worker") self.procs_per_worker = 1 self.memory_per_worker = memory_per_worker self.disk_per_worker = disk_per_worker self.threads_per_worker = threads_per_worker if int(update_interval) < 1: raise ValueError("update_interval must be >= 1") self.worker_timeout = worker_timeout self.worker_tarball = worker_tarball self.pre_script = pre_script self.transfer_files = transfer_files if schedd_name is None: self.schedd = htcondor.Schedd() else: collector = htcondor.Collector(pool) self.schedd = htcondor.Schedd( collector.locate(htcondor.DaemonTypes.Schedd, schedd_name)) self.script = None if self.worker_tarball: if '://' not in self.worker_tarball: self._verify_tarball() pre_script_in_wrapper = "" if self.pre_script: pre_script_in_wrapper = "./" + os.path.basename( self.pre_script) self.script = tempfile.NamedTemporaryFile( suffix='.sh', prefix='dask-worker-wrapper-') self.script.write( SCRIPT_TEMPLATE % { 'worker_tarball': os.path.basename(self.worker_tarball), 'pre_script': pre_script_in_wrapper }) self.script.flush() @atexit.register def _erase_script(): self.script.close() self.logdir = logdir try: os.makedirs(self.logdir) except OSError as err: if err.errno == errno.EEXIST: pass else: self.logger.warning("Couldn't make log dir: %s", err) self.local_cluster = distributed.LocalCluster( ip='', n_workers=0, scheduler_port=scheduler_port, **kwargs) # dask-scheduler cannot distinguish task failure from # job removal/preemption. This might be a little extreme... self.scheduler.allowed_failures = 99999 global _global_schedulers _global_schedulers.append((self.scheduler.id, self.schedd)) self.jobs = {} # {jobid: CLASSAD} self.ignored_jobs = set() # set of jobids self._update_callback = tornado.ioloop.PeriodicCallback( callback=self.update_jobs, callback_time=update_interval, io_loop=self.scheduler.loop) self._update_callback.start()
def scheduler_context(args): """ Set the scheduler to use, based on the script arguments """ import dask sched_info = {} try: if args.scheduler in ("mt", "thread", "threads", "threaded", "threading"): logging.info("Using multithreaded scheduler") dask.config.set(scheduler="threads") sched_info = {"type": "threaded"} elif args.scheduler in ("mp", "processes", "multiprocessing"): raise ValueError("The Process Scheduler does not currently " "work with dask-ms") import dask.multiprocessing logging.info("Using multiprocessing scheduler") dask.config.set(scheduler="processes") sched_info = {"type": "multiprocessing"} else: import distributed local_cluster = None if args.scheduler == "local": local_cluster = distributed.LocalCluster(processes=False) address = local_cluster.scheduler_address elif args.scheduler.startswith('tcp'): address = args.scheduler else: import json with open(args.scheduler, 'r') as f: address = json.load(f)['address'] logging.info("Using distributed scheduler " "with address '{}'".format(address)) client = distributed.Client(address) dask.config.set(scheduler=client) client.restart() sched_info = { "type": "distributed", "client": client, "local_cluster": local_cluster} yield except Exception: logging.exception("Error setting up scheduler", exc_info=True) finally: try: sched_type = sched_info["type"] except KeyError: pass else: if sched_type == "distributed": try: client = sched_info["client"] except KeyError: pass else: client.close() try: local_cluster = sched_info["local_cluster"] except KeyError: pass else: local_cluster.close()
try: import mpi4py.futures with_mpi4py = True except ModuleNotFoundError: with_mpi4py = False with suppress(ModuleNotFoundError): import uvloop asyncio.set_event_loop_policy(uvloop.EventLoopPolicy()) if os.name == "nt": if with_distributed: _default_executor = distributed.Client _default_executor_kwargs = {"address": distributed.LocalCluster()} else: _windows_executor_msg = ( "The default executor on Windows for 'adaptive.Runner' cannot " "be used because the package 'distributed' is not installed. " "Either install 'distributed' or explicitly specify an executor " "when using 'adaptive.Runner'.") _default_executor_kwargs = {} def _default_executor(*args, **kwargs): raise RuntimeError(_windows_executor_msg) warnings.warn(_windows_executor_msg) else:
def external_client(): cluster = distributed.LocalCluster() client = distributed.Client(cluster) yield client client.shutdown() cluster.close()
def scheduler_context(): """ Set the scheduler to use, based on the script arguments """ import dask args = {'scheduler': 'threaded'} sched_info = {} try: if args['scheduler'] in ("mt", "thread", "threaded", "threading"): import dask.threaded logging.info("Using multithreaded scheduler") dask.config.set(scheduler='threads') sched_info = {"type": "threaded"} elif args['scheduler'] in ("mp", "multiprocessing"): import dask.multiprocessing logging.info("Using multiprocessing scheduler") dask.set_options(get=dask.multiprocessing.get) sched_info = {"type": "multiprocessing"} else: import distributed local_cluster = None if args['scheduler'] == "local": local_cluster = distributed.LocalCluster(processes=False) address = local_cluster.scheduler_address elif args['scheduler'].startswith('tcp'): address = args['scheduler'] else: import json with open(args['scheduler'], 'r') as f: address = json.load(f)['address'] logging.info("Using distributed scheduler " "with address '{}'".format(address)) client = distributed.Client(address) dask.set_options(get=client.get) client.restart() sched_info = { "type": "distributed", "client": client, "local_cluster": local_cluster } yield except Exception: logging.exception("Error setting up scheduler", exc_info=True) finally: try: sched_type = sched_info["type"] except KeyError: pass else: if sched_type == "distributed": try: client = sched_info["client"] except KeyError: pass else: client.close() try: local_cluster = sched_info["local_cluster"] except KeyError: pass else: local_cluster.close()