Ejemplo n.º 1
0
def executor(kind: str,
             max_workers: int,
             daemon=True) -> typing.Iterator[Executor]:
    """General purpose utility to get an executor with its as_completed handler

    This allows us to easily use other executors as needed.
    """
    if kind == "thread":
        with ThreadPoolExecutor(max_workers=max_workers) as pool_t:
            yield pool_t
    elif kind == "process":
        with ProcessPoolExecutor(max_workers=max_workers) as pool_p:
            yield pool_p
    elif kind in ["dask", "dask-process", "dask-thread"]:
        import dask
        import distributed
        from distributed.cfexecutor import ClientExecutor

        processes = kind == "dask" or kind == "dask-process"

        with dask.config.set({"distributed.worker.daemon": daemon}):
            with distributed.LocalCluster(
                    n_workers=max_workers,
                    processes=processes,
            ) as cluster:
                with distributed.Client(cluster) as client:
                    yield ClientExecutor(client)
    else:
        raise NotImplementedError("That kind is not implemented")
Ejemplo n.º 2
0
def test_retrieve() -> None:
    cluster = distributed.LocalCluster(
        ip='localhost:8786',
        # I want a bokeh interface to check progress
        dashboard_address='localhost:8787',
        # single process, single thread allows ctrl+C backtrace to
        # show where the code is getting stuck. Otherwise, it will say,
        # "I'm stuck waiting for other processes." It also makes
        # time_code more meaningful
        processes=False,
        threads_per_worker=1,
    )
    # TODO: put this in a reusable module

    with distributed.Client(cluster):

        # disable the cache, because I don't want to persist these results
        # in the cloud
        for cached_func in [
                retrieve.get_rfs, retrieve.get_paragraphs,
                retrieve.get_raw_forms, retrieve.get_indexes
        ]:
            assert isinstance(cached_func, Cache)
            cast(Cache, cached_func).disabled = True

        rfs = dask.bag.zip(  # pylint: disable=unused-variable
            retrieve.get_indexes('10-K', 1995, 1),
            retrieve.get_rfs(1995, 1)).take(10, npartitions=1)
Ejemplo n.º 3
0
def external_client():
    # Explicitly specify we want only 4 workers so that when running on
    # continuous integration we don't request too many.
    cluster = distributed.LocalCluster(n_workers=4)
    client = distributed.Client(cluster)
    yield client
    client.close()
    cluster.close()
Ejemplo n.º 4
0
def dask_client():
    cluster = distributed.LocalCluster(n_workers=3, threads_per_worker=1)
    client = distributed.Client(cluster)

    yield client

    client.close()
    cluster.close()
Ejemplo n.º 5
0
    def start(self, n_workers):
        import distributed

        self.cluster = distributed.LocalCluster(n_workers=n_workers,
                                                threads_per_worker=1,
                                                scheduler_port=self.port,
                                                diagnostics_port=None)

        self.ctx = DistributedContext(port=self.port)
        assert len(self.ctx.executor.ncores()) == n_workers
Ejemplo n.º 6
0
def dask_client(mock_service_envs: None) -> Iterable[distributed.Client]:
    print(pformat(dask.config.get("distributed")))
    with distributed.LocalCluster(
        worker_class=distributed.Worker,
        **{
            "resources": {"CPU": 10, "GPU": 10, "MPI": 1},
            "preload": "simcore_service_dask_sidecar.tasks",
        },
    ) as cluster:
        with distributed.Client(cluster) as client:
            yield client
Ejemplo n.º 7
0
def init_client(processes, max_memory):
    memory_limit = int(max_memory / processes)
    memory_limit = '{0:d}MB'.format(memory_limit)
    logger.info(
        'Initialising client with {0:d} workers and {1:s} per worker'.format(
            processes, memory_limit))
    cluster = distributed.LocalCluster(n_workers=processes,
                                       threads_per_worker=1,
                                       memory_limit=memory_limit,
                                       local_directory='/scratch/u/u300636')
    client = distributed.Client(cluster)
    logger.info('Initialised client: {0}'.format(client))
    return client
Ejemplo n.º 8
0
def test_setup_executor_distributed(n_workers=1, threads_per_worker=2):
    cluster = distributed.LocalCluster(n_workers=n_workers,
                                       threads_per_worker=threads_per_worker)
    client = distributed.Client(cluster)
    address = cluster.scheduler.address

    test = executor.setup_executor(address)

    assert test.scheduler.address == cluster.scheduler.address
    assert client.scheduler_info() == test.scheduler_info()

    test.close()
    cluster.close()
    client.close()
Ejemplo n.º 9
0
def executor(kind, max_workers):
    """General purpose utility to get an executor with its as_completed handler

    This allows us to easily use other executors as needed.
    """
    if kind == 'thread':
        with ThreadPoolExecutor(max_workers=max_workers) as pool:
            yield pool, as_completed
    elif kind == 'process':
        with ProcessPoolExecutor(max_workers=max_workers) as pool:
            yield pool, as_completed
    elif kind == 'dask':
        import distributed
        with distributed.LocalCluster(n_workers=max_workers) as cluster:
            with distributed.Client(cluster) as client:
                yield client, distributed.as_completed
    else:
        raise NotImplementedError('That kind is not implemented')
Ejemplo n.º 10
0
def test_use_plain_dask(hdf5_ds_1):
    # We deactivate the resource scheduling and run on a plain dask cluster
    hdf5_ds_1.set_num_cores(2)
    mask = _mk_random(size=(16, 16))
    with hdf5_ds_1.get_reader().get_h5ds() as h5ds:
        data = h5ds[:]
        expected = _naive_mask_apply([mask], data)
    with dd.LocalCluster(n_workers=2, threads_per_worker=1) as cluster:
        client = dd.Client(cluster, set_as_default=False)
        try:
            executor = DaskJobExecutor(client=client)
            ctx = api.Context(executor=executor)
            analysis = ctx.create_mask_analysis(
                dataset=hdf5_ds_1, factories=[lambda: mask]
            )
            results = ctx.run(analysis)
            udf_res = ctx.run_udf(udf=DebugDeviceUDF(), dataset=hdf5_ds_1)
            # Requesting CuPy, which is not available
            with pytest.raises(RuntimeError):
                _ = ctx.run_udf(udf=DebugDeviceUDF(backends=('cupy',)), dataset=hdf5_ds_1)
        finally:
            # to fix "distributed.client - ERROR - Failed to reconnect to scheduler after 10.00 seconds, closing client"  # NOQA
            client.close()

    assert np.allclose(
        results.mask_0.raw_data,
        expected
    )

    for val in udf_res['device_id'].data[0].values():
        print(val)
        # no CUDA
        assert val["cuda"] is None
        # Default without worker setup
        assert val["cpu"] == 0

    for val in udf_res['backend'].data[0].values():
        print(val)
        # no CUDA
        assert 'numpy' in val

    assert np.all(udf_res['device_class'].data == 'cpu')
    assert np.allclose(udf_res['on_device'].data, data.sum(axis=(0, 1)))
Ejemplo n.º 11
0
def executor(kind: str, max_workers: int) -> typing.Iterator[Executor]:
    """General purpose utility to get an executor with its as_completed handler

    This allows us to easily use other executors as needed.
    """
    if kind == "thread":
        with ThreadPoolExecutor(max_workers=max_workers) as pool_t:
            yield pool_t
    elif kind == "process":
        with ProcessPoolExecutor(max_workers=max_workers) as pool_p:
            yield pool_p
    elif kind == "dask":
        import distributed
        from distributed.cfexecutor import ClientExecutor

        with distributed.LocalCluster(n_workers=max_workers) as cluster:
            with distributed.Client(cluster) as client:
                yield ClientExecutor(client)
    else:
        raise NotImplementedError("That kind is not implemented")
Ejemplo n.º 12
0
    def __init__(self,
                 memory_per_worker=1024,
                 procs_per_worker=1,
                 pool=None,
                 reserved_memory=None,
                 schedd_name=None,
                 threads_per_worker=1,
                 cleanup_interval=1000,
                 worker_timeout=(24 * 60 * 60),
                 **kwargs):

        global _global_schedulers

        if schedd_name is None:
            self.schedd = htcondor.Schedd()
        else:
            collector = htcondor.Collector(pool)
            self.schedd = htcondor.Schedd(
                collector.locate(htcondor.DaemonTypes.Schedd, schedd_name))

        self.local_cluster = distributed.LocalCluster(ip='',
                                                      n_workers=0,
                                                      **kwargs)

        _global_schedulers.append((self.scheduler.id, self.schedd))

        self.jobs = {}  # {jobid: CLASSAD}
        if cleanup_interval < 1:
            raise ValueError("cleanup_interval must be >= 1")
        self._cleanup_callback = tornado.ioloop.PeriodicCallback(
            callback=self.cleanup_jobs,
            callback_time=cleanup_interval,
            io_loop=self.scheduler.loop)
        self._cleanup_callback.start()

        self.memory_per_worker = memory_per_worker
        self.procs_per_worker = procs_per_worker
        self.threads_per_worker = threads_per_worker
        self.reserved_memory = reserved_memory
        self.worker_timeout = worker_timeout
Ejemplo n.º 13
0
def _exec_calcs(calcs, parallelize=False, client=None, **compute_kwargs):
    """Execute the given calculations.

    Parameters
    ----------
    calcs : Sequence of ``aospy.Calc`` objects
    parallelize : bool, default False
        Whether to submit the calculations in parallel or not
    client : distributed.Client or None
        The distributed Client used if parallelize is set to True; if None
        a distributed LocalCluster is used.
    compute_kwargs : dict of keyword arguments passed to ``Calc.compute``

    Returns
    -------
    A list of the values returned by each Calc object that was executed.
    """
    if parallelize:

        def func(calc):
            """Wrap _compute_or_skip_on_error to require only the calc
            argument"""
            if 'write_to_tar' in compute_kwargs:
                compute_kwargs['write_to_tar'] = False
            return _compute_or_skip_on_error(calc, compute_kwargs)

        if client is None:
            n_workers = _n_workers_for_local_cluster(calcs)
            with distributed.LocalCluster(n_workers=n_workers) as cluster:
                with distributed.Client(cluster) as client:
                    result = _submit_calcs_on_client(calcs, client, func)
        else:
            result = _submit_calcs_on_client(calcs, client, func)
        if compute_kwargs['write_to_tar']:
            _serial_write_to_tar(calcs)
        return result
    else:
        return [
            _compute_or_skip_on_error(calc, compute_kwargs) for calc in calcs
        ]
Ejemplo n.º 14
0
    def __init__(self,
                 memory_per_worker=1024,
                 disk_per_worker=1048576,
                 pool=None,
                 schedd_name=None,
                 threads_per_worker=1,
                 update_interval=1000,
                 worker_timeout=(24 * 60 * 60),
                 scheduler_port=8786,
                 worker_tarball=None,
                 pre_script=None,
                 transfer_files=None,
                 logdir='.',
                 logger=None,
                 **kwargs):

        self.logger = logger or logging.getLogger(__name__)
        if 'procs_per_worker' in kwargs:
            self.logger.warning("Multiple processes and adaptive scaling"
                                " don't mix; ignoring procs_per_worker")
        self.procs_per_worker = 1
        self.memory_per_worker = memory_per_worker
        self.disk_per_worker = disk_per_worker
        self.threads_per_worker = threads_per_worker
        if int(update_interval) < 1:
            raise ValueError("update_interval must be >= 1")
        self.worker_timeout = worker_timeout
        self.worker_tarball = worker_tarball
        self.pre_script = pre_script
        self.transfer_files = transfer_files

        if schedd_name is None:
            self.schedd = htcondor.Schedd()
        else:
            collector = htcondor.Collector(pool)
            self.schedd = htcondor.Schedd(
                collector.locate(htcondor.DaemonTypes.Schedd, schedd_name))

        self.script = None
        if self.worker_tarball:
            if '://' not in self.worker_tarball:
                self._verify_tarball()
            pre_script_in_wrapper = ""
            if self.pre_script:
                pre_script_in_wrapper = "./" + os.path.basename(
                    self.pre_script)
            self.script = tempfile.NamedTemporaryFile(
                suffix='.sh', prefix='dask-worker-wrapper-')
            self.script.write(
                SCRIPT_TEMPLATE % {
                    'worker_tarball': os.path.basename(self.worker_tarball),
                    'pre_script': pre_script_in_wrapper
                })
            self.script.flush()

            @atexit.register
            def _erase_script():
                self.script.close()

        self.logdir = logdir
        try:
            os.makedirs(self.logdir)
        except OSError as err:
            if err.errno == errno.EEXIST:
                pass
            else:
                self.logger.warning("Couldn't make log dir: %s", err)

        self.local_cluster = distributed.LocalCluster(
            ip='', n_workers=0, scheduler_port=scheduler_port, **kwargs)

        # dask-scheduler cannot distinguish task failure from
        # job removal/preemption. This might be a little extreme...
        self.scheduler.allowed_failures = 99999

        global _global_schedulers
        _global_schedulers.append((self.scheduler.id, self.schedd))

        self.jobs = {}  # {jobid: CLASSAD}
        self.ignored_jobs = set()  # set of jobids
        self._update_callback = tornado.ioloop.PeriodicCallback(
            callback=self.update_jobs,
            callback_time=update_interval,
            io_loop=self.scheduler.loop)
        self._update_callback.start()
Ejemplo n.º 15
0
def scheduler_context(args):
    """ Set the scheduler to use, based on the script arguments """

    import dask

    sched_info = {}

    try:
        if args.scheduler in ("mt", "thread", "threads",
                              "threaded", "threading"):
            logging.info("Using multithreaded scheduler")
            dask.config.set(scheduler="threads")
            sched_info = {"type": "threaded"}
        elif args.scheduler in ("mp", "processes", "multiprocessing"):
            raise ValueError("The Process Scheduler does not currently "
                             "work with dask-ms")
            import dask.multiprocessing
            logging.info("Using multiprocessing scheduler")
            dask.config.set(scheduler="processes")
            sched_info = {"type": "multiprocessing"}
        else:
            import distributed
            local_cluster = None

            if args.scheduler == "local":
                local_cluster = distributed.LocalCluster(processes=False)
                address = local_cluster.scheduler_address
            elif args.scheduler.startswith('tcp'):
                address = args.scheduler
            else:
                import json

                with open(args.scheduler, 'r') as f:
                    address = json.load(f)['address']

            logging.info("Using distributed scheduler "
                         "with address '{}'".format(address))
            client = distributed.Client(address)
            dask.config.set(scheduler=client)
            client.restart()
            sched_info = {
                "type": "distributed",
                "client": client,
                "local_cluster": local_cluster}

        yield
    except Exception:
        logging.exception("Error setting up scheduler", exc_info=True)

    finally:
        try:
            sched_type = sched_info["type"]
        except KeyError:
            pass
        else:
            if sched_type == "distributed":
                try:
                    client = sched_info["client"]
                except KeyError:
                    pass
                else:
                    client.close()

                try:
                    local_cluster = sched_info["local_cluster"]
                except KeyError:
                    pass
                else:
                    local_cluster.close()
Ejemplo n.º 16
0
try:
    import mpi4py.futures

    with_mpi4py = True
except ModuleNotFoundError:
    with_mpi4py = False

with suppress(ModuleNotFoundError):
    import uvloop

    asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())

if os.name == "nt":
    if with_distributed:
        _default_executor = distributed.Client
        _default_executor_kwargs = {"address": distributed.LocalCluster()}
    else:
        _windows_executor_msg = (
            "The default executor on Windows for 'adaptive.Runner' cannot "
            "be used because the package 'distributed' is not installed. "
            "Either install 'distributed' or explicitly specify an executor "
            "when using 'adaptive.Runner'.")

        _default_executor_kwargs = {}

        def _default_executor(*args, **kwargs):
            raise RuntimeError(_windows_executor_msg)

        warnings.warn(_windows_executor_msg)

else:
Ejemplo n.º 17
0
def external_client():
    cluster = distributed.LocalCluster()
    client = distributed.Client(cluster)
    yield client
    client.shutdown()
    cluster.close()
Ejemplo n.º 18
0
def scheduler_context():
    """ Set the scheduler to use, based on the script arguments """

    import dask
    args = {'scheduler': 'threaded'}

    sched_info = {}

    try:
        if args['scheduler'] in ("mt", "thread", "threaded", "threading"):
            import dask.threaded
            logging.info("Using multithreaded scheduler")
            dask.config.set(scheduler='threads')
            sched_info = {"type": "threaded"}
        elif args['scheduler'] in ("mp", "multiprocessing"):
            import dask.multiprocessing
            logging.info("Using multiprocessing scheduler")
            dask.set_options(get=dask.multiprocessing.get)
            sched_info = {"type": "multiprocessing"}
        else:
            import distributed
            local_cluster = None

            if args['scheduler'] == "local":
                local_cluster = distributed.LocalCluster(processes=False)
                address = local_cluster.scheduler_address
            elif args['scheduler'].startswith('tcp'):
                address = args['scheduler']
            else:
                import json

                with open(args['scheduler'], 'r') as f:
                    address = json.load(f)['address']

            logging.info("Using distributed scheduler "
                         "with address '{}'".format(address))
            client = distributed.Client(address)
            dask.set_options(get=client.get)
            client.restart()
            sched_info = {
                "type": "distributed",
                "client": client,
                "local_cluster": local_cluster
            }

        yield
    except Exception:
        logging.exception("Error setting up scheduler", exc_info=True)

    finally:
        try:
            sched_type = sched_info["type"]
        except KeyError:
            pass
        else:
            if sched_type == "distributed":
                try:
                    client = sched_info["client"]
                except KeyError:
                    pass
                else:
                    client.close()

                try:
                    local_cluster = sched_info["local_cluster"]
                except KeyError:
                    pass
                else:
                    local_cluster.close()