Python get_worker Examples, distributed.get_worker Python Examples

Example #1

0

Show file

    def __init__(self, name, worker=None, client=None):
        if worker is None and client is None:
            from distributed.worker import get_worker, get_client

            try:
                worker = get_worker()
            except Exception:
                client = get_client()

        self.worker = worker
        self.client = client
        if self.worker:
            self.loop = self.worker.loop
        elif self.client:
            self.loop = self.client.loop
        self.name = name
        self.buffer = deque()

        if self.worker:
            pubsub = self.worker.extensions["pubsub"]
        elif self.client:
            pubsub = self.client.extensions["pubsub"]
        self.loop.add_callback(pubsub.subscribers[name].add, self)

        msg = {"op": "pubsub-add-subscriber", "name": self.name}
        if self.worker:
            self.loop.add_callback(self.worker.batched_stream.send, msg)
        elif self.client:
            self.loop.add_callback(self.client.scheduler_comm.send, msg)
        else:
            raise Exception()

        weakref.finalize(self, pubsub.trigger_cleanup)

Example #2

0

Show file

File: pubsub.py Project: tomMoral/distributed

    def __init__(self, name, worker=None, client=None):
        if worker is None and client is None:
            from distributed import get_worker, get_client
            try:
                worker = get_worker()
            except Exception:
                client = get_client()

        self.subscribers = dict()
        self.worker = worker
        self.client = client
        assert client or worker
        if self.worker:
            self.scheduler = self.worker.scheduler
            self.loop = self.worker.loop
        elif self.client:
            self.scheduler = self.client.scheduler
            self.loop = self.client.loop

        self.name = name
        self._started = False
        self._buffer = []

        self.loop.add_callback(self._start)

        if self.worker:
            pubsub = self.worker.extensions['pubsub']
            self.loop.add_callback(pubsub.publishers[name].add, self)
            finalize(self, pubsub.trigger_cleanup)

Example #3

0

Show file

    def __init__(self, name, worker=None, client=None):
        if worker is None and client is None:
            from distributed import get_worker, get_client

            try:
                worker = get_worker()
            except Exception:
                client = get_client()

        self.subscribers = dict()
        self.worker = worker
        self.client = client
        assert client or worker
        if self.worker:
            self.scheduler = self.worker.scheduler
            self.loop = self.worker.loop
        elif self.client:
            self.scheduler = self.client.scheduler
            self.loop = self.client.loop

        self.name = name
        self._started = False
        self._buffer = []

        self.loop.add_callback(self._start)

        if self.worker:
            pubsub = self.worker.extensions["pubsub"]
            self.loop.add_callback(pubsub.publishers[name].add, self)
            weakref.finalize(self, pubsub.trigger_cleanup)

Example #4

0

Show file

File: full_analysis.py Project: hepaccelerate/hepaccelerate

def get_worker_wrapper():
    global global_worker
    try:
        this_worker = get_worker()
    except Exception as e:
        this_worker = global_worker
    return this_worker

Example #5

0

Show file

File: NanoEventsProcessor.py Project: yimuchen/coffea

    def process(self, events):
        output = self.accumulator.identity()

        dataset = events.metadata["dataset"]
        print(events.metadata)
        if "checkusermeta" in events.metadata:
            metaname, metavalue = self.expected_usermeta[dataset]
            assert metavalue == events.metadata[metaname]

        mapping = events.behavior["__events_factory__"]._mapping
        muon_pt = events.Muon.pt
        if isinstance(mapping, nanoevents.mapping.CachedMapping):
            keys_in_cache = list(mapping.cache.cache.keys())
            has_canaries = [
                canary in keys_in_cache for canary in self._canaries
            ]
            if has_canaries:
                try:
                    from distributed import get_worker

                    worker = get_worker()
                    output["worker"].add(worker.name)
                except ValueError:
                    pass

        dimuon = ak.combinations(events.Muon, 2)
        dimuon = dimuon["0"] + dimuon["1"]

        output["pt"].fill(dataset=dataset, pt=ak.flatten(muon_pt))
        output["mass"].fill(dataset=dataset, mass=ak.flatten(dimuon.mass))
        output["cutflow"]["%s_pt" % dataset] += sum(ak.num(events.Muon))
        output["cutflow"]["%s_mass" % dataset] += sum(ak.num(dimuon))

        return output

Example #6

0

Show file

File: comms.py Project: rapidsai/dask-cuda

def worker_state(sessionId: Optional[int] = None) -> dict:
    """Retrieve the state(s) of the current worker

    Parameters
    ----------
    sessionId: int, optional
        Worker session state ID. If None, all states of the worker
        are returned.

    Returns
    -------
    state: dict
        Either a single state dict or a dict of state dict
    """
    worker = get_worker()
    if not hasattr(worker, "_explicit_comm_state"):
        worker._explicit_comm_state = {}
    if sessionId is not None:
        if sessionId not in worker._explicit_comm_state:
            worker._explicit_comm_state[sessionId] = {
                "ts": time.time(),
                "eps": {},
                "loop": worker.loop.asyncio_loop,
                "worker": worker,
            }
        return worker._explicit_comm_state[sessionId]
    return worker._explicit_comm_state

Example #7

0

Show file

    def predict_in_block(block):

        from distributed import get_worker

        read_roi = block.read_roi
        write_roi = block.write_roi
        predict_script = '/groups/saalfeld/home/hanslovskyp/experiments/quasi-isotropic/predict/predict.py'
        cuda_visible_devices = get_worker().cuda_visible_devices
        predict_scripts_args = ''

        name = 'predict-%s-%s' % (write_roi.get_begin(), write_roi.get_size())
        log_file = os.path.join(cwd, '%s.log' % name)
        pythonpath = ':'.join([
            '%s/workspace-pycharm/u-net/gunpowder' % _HOME,
            '%s/workspace-pycharm/u-net/CNNectome' % _HOME,
            '/groups/saalfeld/home/papec/Work/my_projects/z5/bld/python'
        ])
        pythonpath_export_str = 'export PYTHONPATH=%s:$PYTHONPATH' % pythonpath

        daisy.call([
            'nvidia-docker', 'run', '--rm', '-u',
            os.getuid(), '-v', '/groups/turaga:/groups/turaga:rshared', '-v',
            '/groups/saalfeld:/groups/saalfeld:rshared', '-v',
            '/nrs/saalfeld:/nrs/saalfeld:rshared', '-w', cwd, '--name', name,
            'neptunes5thmoon/gunpowder:v0.3-pre6-dask1'
            '/bin/bash', '-c',
            '"export CUDA_VISIBLE_DEVICES=%s; %s; python -u %s %s 2>&1 > %s"' %
            (cuda_visible_devices, pythonpath_export_str, predict_script,
             predict_script_args, log_file)
        ])

Example #8

0

Show file

File: _parallel_backends.py Project: hackalog/joblib

def inside_dask_worker():
    """Check whether the current function is executed inside a Dask worker.
    """
    # This function can not be in joblib._dask because there would be a
    # circular import:
    # _dask imports _parallel_backend that imports _dask ...
    try:
        from distributed import get_worker
    except ImportError:
        return False

    try:
        get_worker()
        return True
    except ValueError:
        return False

Example #9

0

Show file

File: pubsub.py Project: tomMoral/distributed

    def __init__(self, name, worker=None, client=None):
        if worker is None and client is None:
            from distributed.worker import get_worker, get_client
            try:
                worker = get_worker()
            except Exception:
                client = get_client()

        self.worker = worker
        self.client = client
        if self.worker:
            self.loop = self.worker.loop
        elif self.client:
            self.loop = self.client.loop
        self.name = name
        self.buffer = deque()
        self.condition = tornado.locks.Condition()

        if self.worker:
            pubsub = self.worker.extensions['pubsub']
        elif self.client:
            pubsub = self.client.extensions['pubsub']
        self.loop.add_callback(pubsub.subscribers[name].add, self)

        msg = {'op': 'pubsub-add-subscriber', 'name': self.name}
        if self.worker:
            self.loop.add_callback(self.worker.batched_stream.send, msg)
        elif self.client:
            self.loop.add_callback(self.client.scheduler_comm.send, msg)
        else:
            raise Exception()

        finalize(self, pubsub.trigger_cleanup)

Example #10

0

Show file

async def test_cudf_cluster_device_spill(loop, params):
    async with LocalCUDACluster(
            1,
            device_memory_limit=params["device_memory_limit"],
            memory_limit=params["memory_limit"],
            memory_target_fraction=params["host_target"],
            memory_spill_fraction=params["host_spill"],
            death_timeout=300,
            asynchronous=True,
    ) as cluster:
        async with Client(cluster, asynchronous=True) as client:

            cdf = dask.datasets.timeseries(dtypes={
                "x": int,
                "y": float
            },
                                           freq="30ms").map_partitions(
                                               cudf.from_pandas)

            sizes = await client.compute(
                cdf.map_partitions(lambda df: df.__sizeof__()))
            sizes = sizes.tolist()
            nbytes = sum(sizes)
            part_index_nbytes = (await client.compute(cdf.partitions[0].index
                                                      )).__sizeof__()

            cdf2 = cdf.persist()
            await wait(cdf2)

            del cdf

            await client.run(worker_assert, nbytes, 32,
                             2048 + part_index_nbytes)

            host_chunks = await client.run(lambda: len(get_worker().data.host))
            disk_chunks = await client.run(lambda: len(get_worker().data.disk))
            for hc, dc in zip(host_chunks.values(), disk_chunks.values()):
                if params["spills_to_disk"]:
                    assert dc > 0
                else:
                    assert hc > 0
                    assert dc == 0

            del cdf2

            await client.run(worker_assert, 0, 0, 0)

Example #11

0

Show file

def log_event(topic: str, msg: dict) -> None:
    try:
        import distributed

        worker = distributed.get_worker()
    except (ImportError, ValueError):
        return
    worker.log_event(topic, dict(msg, thread=_curthread()))

Example #12

0

Show file

File: test_explicit_comms.py Project: rapidsai/dask-cuda

 async def f(_):
     worker = get_worker()
     if hasattr(worker, "running"):
         assert not worker.running
     worker.running = True
     await asyncio.sleep(0.5)
     assert worker.running
     worker.running = False

Example #13

0

Show file

def evaluate(individual, context=context):
    """ concurrently evaluate the given individual

    This is what's invoked on each dask worker to evaluate each individual.

    We log the start and end times for evaluation.

    An individual is viable if an exception is NOT thrown, else it is NOT a
    viable individual.  If not viable, we increment the context['leap'][
    'distrib']['non_viable'] count to track such instances.

    This function sets:

    individual.start_eval_time has the time() of when evaluation started.
    individual.stop_eval_time has the time() of when evaluation finished.
    individual.is_viable is True if viable, else False
    individual.exception will be assigned any raised exceptions
    individual.fitness will be NaN if not viable, else the calculated fitness
    individual.hostname is the name of the host on which this individual was
    evaluated
    individual.pid is the process ID associated with evaluating the individual

    :param individual: to be evaluated
    :return: evaluated individual
    """
    worker = distributed.get_worker()

    individual.start_eval_time = time.time()

    if hasattr(worker, 'logger'):
        worker.logger.debug(
            f'Worker {worker.id} started evaluating {individual!s}')

    # Any thrown exceptions are now handled inside Individual.evaluate()
    individual.evaluate()

    if hasattr(individual, 'is_viable') and not individual.is_viable:
        # is_viable will be False if an exception was thrown during evaluation.
        # We track the number of such failures on the off chance that this
        # might be useful.
        context['leap']['distrib']['non_viable'] += 1

        if hasattr(worker, 'logger'):
            worker.logger.warning(
                f'Worker {worker.id}: {individual.exception!s} raised for {individual!s}'
            )

    individual.stop_eval_time = time.time()
    individual.hostname = platform.node()
    individual.pid = os.getpid()

    if hasattr(worker, 'logger'):
        worker.logger.debug(
            f'Worker {worker.id} evaluated {individual!s} in '
            f'{individual.stop_eval_time - individual.start_eval_time} '
            f'seconds')

    return individual

Example #14

0

Show file

async def test_cupy_cluster_device_spill(params):
    cupy = pytest.importorskip("cupy")
    with dask.config.set({"distributed.worker.memory.terminate": False}):
        async with LocalCUDACluster(
                1,
                scheduler_port=0,
                processes=True,
                silence_logs=False,
                dashboard_address=None,
                asynchronous=True,
                death_timeout=60,
                device_memory_limit=params["device_memory_limit"],
                memory_limit=params["memory_limit"],
                memory_target_fraction=params["host_target"],
                memory_spill_fraction=params["host_spill"],
                memory_pause_fraction=params["host_pause"],
        ) as cluster:
            async with Client(cluster, asynchronous=True) as client:

                rs = da.random.RandomState(RandomState=cupy.random.RandomState)
                x = rs.random(int(250e6), chunks=10e6)
                await wait(x)

                xx = x.persist()
                await wait(xx)

                # Allow up to 1024 bytes overhead per chunk serialized
                await client.run(worker_assert, x.nbytes, 1024, 1024)

                y = client.compute(x.sum())
                res = await y

                assert (abs(res / x.size) - 0.5) < 1e-3

                await client.run(worker_assert, x.nbytes, 1024, 1024)
                host_chunks = await client.run(
                    lambda: len(get_worker().data.host))
                disk_chunks = await client.run(
                    lambda: len(get_worker().data.disk or list()))
                for hc, dc in zip(host_chunks.values(), disk_chunks.values()):
                    if params["spills_to_disk"]:
                        assert dc > 0
                    else:
                        assert hc > 0
                        assert dc == 0

Example #15

0

Show file

File: test_pubsub.py Project: zhanghang1989/distributed

def test_client(c, s):
    with pytest.raises(Exception):
        get_worker()
    sub = Sub("a")
    pub = Pub("a")

    sps = s.extensions["pubsub"]
    cps = c.extensions["pubsub"]

    start = time()
    while not set(sps.client_subscribers["a"]) == {c.id}:
        yield gen.sleep(0.01)
        assert time() < start + 3

    pub.put(123)

    result = yield sub.__anext__()
    assert result == 123

Example #16

0

Show file

File: test_pubsub.py Project: tomMoral/distributed

def test_client(c, s):
    with pytest.raises(Exception):
        get_worker()
    sub = Sub('a')
    pub = Pub('a')

    sps = s.extensions['pubsub']
    cps = c.extensions['pubsub']

    start = time()
    while not set(sps.client_subscribers['a']) == {c.id}:
        yield gen.sleep(0.01)
        assert time() < start + 3

    pub.put(123)

    result = yield sub.__anext__()
    assert result == 123

Example #17

0

Show file

File: distributedTensorFlow.py Project: cactuskid/machine-learning-sspred

def ps_task(tf_spec, verbose=False):
    worker = distributed.get_worker()
    server = worker.tensorflow_server
    ps_device = "/job:%s/task:%d" % (server.server_def.job_name,
                                     server.server_def.task_index)
    if verbose == True:
        print('PS task')
        print(ps_device)
    worker.tensorflow_server.join()

Example #18

0

Show file

File: dask.py Project: ocmadin/propertyestimator

    def _wrapped_function(function, *args, **kwargs):

        available_resources = kwargs['available_resources']

        protocols_to_import = kwargs.pop('available_protocols')
        per_worker_logging = kwargs.pop('per_worker_logging')

        gpu_assignments = kwargs.pop('gpu_assignments')

        # Each spun up worker doesn't automatically import
        # all of the modules which were imported in the main
        # launch script, and as such custom plugins will no
        # longer be registered. We re-import / register them
        # here.
        for protocol_class in protocols_to_import:

            module_name = '.'.join(protocol_class.split('.')[:-1])
            class_name = protocol_class.split('.')[-1]

            imported_module = importlib.import_module(module_name)
            available_protocols[class_name] = getattr(imported_module,
                                                      class_name)

        # Set up the logging per worker if the flag is set to True.
        if per_worker_logging:

            formatter = logging.Formatter(
                fmt='%(asctime)s.%(msecs)03d %(levelname)-8s %(message)s',
                datefmt='%H:%M:%S')

            # Each worker should have its own log file.
            logger = logging.getLogger()

            if not len(logger.handlers):

                logger_handler = logging.FileHandler('{}.log'.format(
                    get_worker().id))
                logger_handler.setFormatter(formatter)

                logger.setLevel(logging.INFO)
                logger.addHandler(logger_handler)

        if available_resources.number_of_gpus > 0:

            worker_id = distributed.get_worker().id

            available_resources._gpu_device_indices = (
                '0' if worker_id not in gpu_assignments else
                gpu_assignments[worker_id])

            logging.info(
                f'Launching a job with access to GPUs {available_resources._gpu_device_indices}'
            )

        return function(*args, **kwargs)

Example #19

0

Show file

    def func():
        with worker_client() as c:
            correct = True
            for data in [[1, 2], (1, 2), {1, 2}]:
                futures = c.scatter(data)
                correct &= type(futures) == type(data)

            o = object()
            futures = c.scatter({'x': o})
            correct &= get_worker().data['x'] is o
            return correct

Example #20

0

Show file

File: test_worker_client.py Project: tomMoral/distributed

    def func():
        with worker_client() as c:
            correct = True
            for data in [[1, 2], (1, 2), {1, 2}]:
                futures = c.scatter(data)
                correct &= type(futures) == type(data)

            o = object()
            futures = c.scatter({'x': o})
            correct &= get_worker().data['x'] is o
            return correct

Example #21

0

Show file

File: test_worker_client.py Project: uk-gov-mirror/informatics-lab.distributed

    def func():
        with worker_client() as c:
            futures = c.scatter([1, 2, 3, 4, 5])
            assert isinstance(futures, (list, tuple))
            assert len(futures) == 5

            x = dict(get_worker().data)
            y = {f.key: i for f, i in zip(futures, [1, 2, 3, 4, 5])}
            assert x == y

            total = c.submit(sum, futures)
            return total.result()

Example #22

0

Show file

File: test_worker_client.py Project: tomMoral/distributed

    def func():
        with worker_client() as c:
            futures = c.scatter([1, 2, 3, 4, 5])
            assert isinstance(futures, (list, tuple))
            assert len(futures) == 5

            x = dict(get_worker().data)
            y = {f.key: i for f, i in zip(futures, [1, 2, 3, 4, 5])}
            assert x == y

            total = c.submit(sum, futures)
            return total.result()

Example #23

0

Show file

File: fix_mask.py Project: JGCRI/e3sm_to_cmip

def fix_mask(good_file_path,
             variable,
             data_path,
             new_version_path,
             dataset_id,
             id_map=None,
             verbose=False):

    if id_map:
        addr = get_worker().address
        position = id_map[addr] + 1
    else:
        position = 0

    good_data = cdms2.open(good_file_path)[variable]
    mask = np.ma.getmask(good_data[:])

    files_to_fix = sorted(os.listdir(data_path))

    if verbose:
        pbar = tqdm(total=len(files_to_fix), position=position, leave=False)
    for chunk in files_to_fix:
        if verbose:
            year = get_year_from_file(chunk)
            desc = "{} -> {}".format(dataset_id, year)
            pbar.set_description(desc)

        source = os.path.join(data_path, chunk)
        dest = os.path.join(new_version_path, chunk)

        # create the input pointer in read mode
        ip = cdms2.open(source, 'r')
        data = ip[variable]
        data_copy = data[:]
        data_copy._set_mask(mask)

        # create the output pointer in write mode
        op = cdms2.open(dest, 'w')

        for k, v in ip.attributes.items():
            setattr(op, k, v)

        # write out the new dataset
        op.write(data_copy)

        op.close()
        ip.close()

        if verbose:
            pbar.update(1)
    if verbose:
        pbar.close()

Example #24

0

Show file

File: calcov.py Project: matyasselmeci/dask_condor_experiments

def calCov(tem,img):
    st = time.time()
    print 'Input image : ', img.shape

    size = getTemSize(tem)
    # 扩充图片的像素数
    es = (size - 1) / 2

    # 区别灰度图像与彩色图像
    if len(img.shape) is 2:
        y, x, z = img.shape[0], img.shape[1], 1
    else:
        y, x, z = img.shape[0], img.shape[1], img.shape[2]
    # 将图像上下左右各增加（模板宽度-1)/2个像素，用于计算卷积时的边界计算。
    # Expand image by es pixel around for edge calculation.
    if len(img.shape) is 2:
        eimg = np.uint8(np.zeros((y + 2 * es, x + 2 * es, 1)))
        eimg[es:y + es, es:x + es, 0] = img[:, :]
    else:
        eimg = np.uint8(np.zeros((y + 2 * es, x + 2 * es, z)))
        eimg[es:y + es, es:x + es, :] = img

    result = np.uint8(np.zeros(eimg.shape))
    x = x + es
    y = y + es
    for i in range(z):
        a = b = 1
        # 扩充后图像的坐标,从0开始
        # 设置一个dif变量，用于卷积窗内计算。
        dif = (size - 1) / 2
        while (b < y):
            a = 1
            while (a < x):
                cntx = 0
                dify = size / 2
                result[b][a][i] = 0
                while (cntx < size):
                    cnty = 0
                    difx = size / 2
                    while (cnty < size):
                        # print b,a,b-dify,a-difx
                        result[b][a][i] += tem[cntx][cnty] * eimg[b - dify][a - difx][i]
                        cnty += 1
                        difx -= 1
                    cntx += 1
                    dify -= 1
                a += 1
            b += 1
    et = time.time()
    print 'Cal time:', str(et - st)
    work = distributed.get_worker()
    return [result,str(et - st),work.address]

Example #25

0

Show file

    def test_device_spill(client, scheduler, worker):
        cudf = pytest.importorskip("cudf")
        # There's a known issue with datetime64:
        # https://github.com/numpy/numpy/issues/4983#issuecomment-441332940
        # The same error above happens when spilling datetime64 to disk
        cdf = (dask.datasets.timeseries(
            dtypes={
                "x": int,
                "y": float
            }, freq="20ms").reset_index(drop=True).map_partitions(
                cudf.from_pandas))

        sizes = yield client.compute(
            cdf.map_partitions(lambda df: df.__sizeof__()))
        sizes = sizes.tolist()
        nbytes = sum(sizes)
        part_index_nbytes = (yield client.compute(
            cdf.partitions[0].index)).__sizeof__()

        cdf2 = cdf.persist()
        yield wait(cdf2)

        del cdf

        host_chunks = yield client.run(lambda: len(get_worker().data.host))
        disk_chunks = yield client.run(
            lambda: len(get_worker().data.disk or list()))
        for hc, dc in zip(host_chunks.values(), disk_chunks.values()):
            if params["spills_to_disk"]:
                assert dc > 0
            else:
                assert hc > 0
                assert dc == 0

        yield client.run(worker_assert, nbytes, 32, 2048 + part_index_nbytes)

        del cdf2

        yield client.run(delayed_worker_assert, 0, 0, 0)

Example #26

0

Show file

File: test_worker_client.py Project: tomMoral/distributed

 def f(i):
     with worker_client(separate_thread=False) as client:
         get_worker().count += 1
         assert get_worker().count <= 3
         sleep(random.random() / 40)
         assert get_worker().count <= 3
         get_worker().count -= 1
     return i

Example #27

0

Show file

 def f(i):
     with worker_client(separate_thread=False) as client:
         get_worker().count += 1
         assert get_worker().count <= 3
         sleep(random.random() / 40)
         assert get_worker().count <= 3
         get_worker().count -= 1
     return i

Example #28

0

Show file

        def kill_init_proc():
            try:
                worker_addr = get_worker().address
            except ValueError:
                # Special case for synchronous cluster.
                # See run_on_each_worker
                worker_addr = 'tcp://127.0.0.1'

            try:
                pid_to_kill = worker_init_pids[worker_addr]
            except KeyError:
                return None
            else:
                return kill_if_running(pid_to_kill, 10.0)

Example #29

0

Show file

def worker_state(sessionId=None):
    worker = get_worker()
    if not hasattr(worker, "_explicit_comm_state"):
        worker._explicit_comm_state = {}
    if sessionId is not None and sessionId not in worker._explicit_comm_state:
        worker._explicit_comm_state[sessionId] = {
            "ts": time.time(),
            "eps": {},
            "loop": worker.loop.asyncio_loop,
            "worker": worker,
        }

    if sessionId is not None:
        return worker._explicit_comm_state[sessionId]
    return worker._explicit_comm_state

Example #30

0

Show file

    def test_device_spill(client, scheduler, worker):
        rs = da.random.RandomState(RandomState=cupy.random.RandomState)
        x = rs.random(int(250e6), chunks=10e6)

        xx = x.persist()
        yield wait(xx)

        # Allow up to 1024 bytes overhead per chunk serialized
        yield client.run(worker_assert, x.nbytes, 1024, 1024)

        y = client.compute(x.sum())
        res = yield y

        assert (abs(res / x.size) - 0.5) < 1e-3

        yield client.run(worker_assert, x.nbytes, 1024, 1024)
        host_chunks = yield client.run(lambda: len(get_worker().data.host))
        disk_chunks = yield client.run(lambda: len(get_worker().data.disk))
        for hc, dc in zip(host_chunks.values(), disk_chunks.values()):
            if params["spills_to_disk"]:
                assert dc > 0
            else:
                assert hc > 0
                assert dc == 0

Example #31

0

Show file

    def dask_incref(cls, csr):
        def shared_csr_loader_incref(x):
            # This does nothing.  Exists only to trick scheduler into generating an event
            pass

        key = distributed.get_worker().get_current_task()
        client = distributed.get_client()

        for shm in [csr.pointers_shm, csr.indices_shm, csr.values_shm]:
            task_name = f"{cls.REFCOUNT_TAG}:{key}:{shm.name}"
            dummy_arg = key + shm.name
            client.submit(shared_csr_loader_incref,
                          dummy_arg,
                          key=task_name,
                          pure=False)

Example #32

0

Show file

def register_plugins(client, add=defaultdict(dict)):
    """
    Usage:
        plugins = {
                    "MEMCache": {"maxmem": 5e8},
                    "ConfigureXRootD": {"proxy_file": None}
                  }
        register_plugins(client, add=plugins)
    """
    plugins = set()
    for p in client.run(lambda: set(get_worker().plugins)).values():
        plugins |= p
    for name, opts in add.items():
        plugin = globals()[name]
        if plugin.name not in plugins:
            client.register_worker_plugin(plugin(**opts))

Example #33

0

Show file

def get_ext() -> ShuffleWorkerExtension:
    from distributed import get_worker

    try:
        worker = get_worker()
    except ValueError as e:
        raise RuntimeError(
            "`shuffle='p2p'` requires Dask's distributed scheduler. This task is not running on a Worker; "
            "please confirm that you've created a distributed Client and are submitting this computation through it."
        ) from e
    extension: ShuffleWorkerExtension | None = worker.extensions.get("shuffle")
    if not extension:
        raise RuntimeError(
            f"The worker {worker.address} does not have a ShuffleExtension. "
            "Is pandas installed on the worker?")
    return extension

Example #34

0

Show file

File: executor.py Project: radiome-lab/radiome

    def execute_subgraph(self, SG):
        futures = {}

        client = self._client
        worker = get_worker()

        logger.info(f'Computing subgraph')

        edge = lambda G, f, t: G.edges[(f, t)]['field']
        result = lambda G, n: \
            futures[hash(G.nodes[n]['job'])] \
                if isinstance(G.nodes[n]['job'].resource, Job) else \
                G.nodes[n]['job']()

        for resource in nx.topological_sort(SG):
            job = SG.nodes[resource]['job']

            if not isinstance(job.resource, Job):
                continue

            dependencies = {
                edge(SG, dependency, resource): result(SG, dependency)
                for dependency in SG.predecessors(resource)
            }

            logger.info(
                f'Computing job {job.resource} with deps {dependencies}')

            resources = job.resources()
            try:
                del resources['storage']
            except:
                pass

            futures[hash(job)] = client.submit(job,
                                               **dependencies,
                                               resources=resources,
                                               workers=[worker.address],
                                               key=str(job),
                                               pure=False)

        logger.info(f'Gathering subgraph')

        return {
            k: v if v is not None else futures[k].exception()
            for k, v in self._client.gather(futures, errors='skip').items()
        }

Example #35

0

Show file

File: storage.py Project: mrocklin/dask-optuna

    def __init__(self, client=None, storage=None):
        try:
            self.client = client or Client.current()
        except ValueError:
            # Initialise new client
            self.client = get_worker().client
        self.storage = storage

        if self.client.asynchronous or getattr(thread_state,
                                               "on_event_loop_thread", False):

            async def _register():
                await self.client.run_on_scheduler(register_with_scheduler)

            self.client.loop.add_callback(_register)
        else:
            self.client.run_on_scheduler(register_with_scheduler)

Example #36

0

Show file

File: daskClusterLSF.py Project: cactuskid/MLseqvectors

def scoring_task(tf_spec, xval, yval, keras_model, verbose=False):
    #run partial of this to configure it to xval and yval
    with local_client() as c:
        # Scores Channel
        scores = c.channel('scores', maxlen=10)
        worker = distributed.get_worker()
        queue = worker.tensorflow_queue
        server = worker.tensorflow_server
        # Make Model
        sess, _, _, _, _, loss = model(server, tf_spec, keras_model)

        # Testing Data
        test_data = {x: xval, y_: yval}
        # Main Loop
        while True:
            score = sess.run(loss, feed_dict=test_data)
            scores.append(float(score))
            time.sleep(1)

Example #37

0

Show file

File: test_worker_client.py Project: tomMoral/distributed

    def f():
        with worker_client():
            pass

        return threading.current_thread() in get_worker().executor._threads

Example #38

0

Show file

File: test_worker.py Project: tomMoral/distributed

 def some_name():
     return get_worker().get_current_task()

Example #39

0

Show file

File: test_worker.py Project: tomMoral/distributed

 def f(x):
     sleep(0.1)
     if get_worker().address == a_address:
         raise Reschedule()

Example #40

0

Show file

File: test_worker_client.py Project: tomMoral/distributed

 def f():
     with worker_client() as lc:
         return lc.loop is get_worker().loop

Example #41

0

Show file

File: test_dask.py Project: joblib/joblib

def _worker_address(_):
    from distributed import get_worker
    return get_worker().address