Exemple #1
0
def test_stress_scatter_death(c, s, *workers):
    import random
    s.allowed_failures = 1000
    np = pytest.importorskip('numpy')
    L = yield c.scatter([np.random.random(10000) for i in range(len(workers))])
    yield c._replicate(L, n=2)

    adds = [delayed(slowadd, pure=True)(random.choice(L),
                                        random.choice(L),
                                        delay=0.05,
                                        dask_key_name='slowadd-1-%d' % i)
            for i in range(50)]

    adds = [delayed(slowadd, pure=True)(a, b, delay=0.02,
                                        dask_key_name='slowadd-2-%d' % i)
            for i, (a, b) in enumerate(sliding_window(2, adds))]

    futures = c.compute(adds)
    L = adds = None

    alive = list(workers)

    from distributed.scheduler import logger

    for i in range(7):
        yield gen.sleep(0.1)
        try:
            s.validate_state()
        except Exception as c:
            logger.exception(c)
            if config.get('log-on-err'):
                import pdb
                pdb.set_trace()
            else:
                raise
        w = random.choice(alive)
        yield w._close()
        alive.remove(w)

    try:
        yield gen.with_timeout(timedelta(seconds=25), c._gather(futures))
    except gen.TimeoutError:
        ws = {w.address: w for w in workers if w.status != 'closed'}
        print(s.processing)
        print(ws)
        print(futures)
        try:
            worker = [w for w in ws.values() if w.waiting_for_data][0]
        except Exception:
            pass
        if config.get('log-on-err'):
            import pdb
            pdb.set_trace()
        else:
            raise
    except CancelledError:
        pass
    finally:
        futures = None
Exemple #2
0
def test_stress_scatter_death(c, s, *workers):
    import random
    s.allowed_failures = 1000
    np = pytest.importorskip('numpy')
    L = yield c.scatter([np.random.random(10000) for i in range(len(workers))])
    yield c._replicate(L, n=2)

    adds = [delayed(slowadd, pure=True)(random.choice(L),
                                        random.choice(L),
                                        delay=0.05,
                                        dask_key_name='slowadd-1-%d' % i)
            for i in range(50)]

    adds = [delayed(slowadd, pure=True)(a, b, delay=0.02,
                                        dask_key_name='slowadd-2-%d' % i)
            for i, (a, b) in enumerate(sliding_window(2, adds))]

    futures = c.compute(adds)
    L = adds = None

    alive = list(workers)

    from distributed.scheduler import logger

    for i in range(7):
        yield gen.sleep(0.1)
        try:
            s.validate_state()
        except Exception as c:
            logger.exception(c)
            if config.get('log-on-err'):
                import pdb
                pdb.set_trace()
            else:
                raise
        w = random.choice(alive)
        yield w._close()
        alive.remove(w)

    try:
        yield gen.with_timeout(timedelta(seconds=25), c._gather(futures))
    except gen.TimeoutError:
        ws = {w.address: w for w in workers if w.status != 'closed'}
        print(s.processing)
        print(ws)
        print(futures)
        try:
            worker = [w for w in ws.values() if w.waiting_for_data][0]
        except Exception:
            pass
        if config.get('log-on-err'):
            import pdb
            pdb.set_trace()
        else:
            raise
    except CancelledError:
        pass
    finally:
        futures = None
Exemple #3
0
async def assert_balanced(inp, expected, c, s, *workers):
    steal = s.extensions["stealing"]
    steal._pc.stop()

    counter = itertools.count()
    tasks = list(concat(inp))
    data_seq = itertools.count()

    futures = []
    for w, ts in zip(workers, inp):
        for t in sorted(ts, reverse=True):
            if t:
                [dat] = await c.scatter([next(data_seq)], workers=w.address)
                ts = s.tasks[dat.key]
                # Ensure scheduler state stays consistent
                old_nbytes = ts.nbytes
                ts.nbytes = s.bandwidth * t
                for ws in ts.who_has:
                    ws.nbytes += ts.nbytes - old_nbytes
            else:
                dat = 123
            i = next(counter)
            f = c.submit(
                func,
                dat,
                key="%d-%d" % (int(t), i),
                workers=w.address,
                allow_other_workers=True,
                pure=False,
                priority=-i,
            )
            futures.append(f)

    while len(s.rprocessing) < len(futures):
        await asyncio.sleep(0.001)

    for i in range(10):
        steal.balance()

        while steal.in_flight:
            await asyncio.sleep(0.001)

        result = [
            sorted([int(key_split(k)) for k in s.processing[w.address]],
                   reverse=True) for w in workers
        ]

        result2 = sorted(result, reverse=True)
        expected2 = sorted(expected, reverse=True)

        if config.get("pdb-on-err"):
            if result2 != expected2:
                import pdb

                pdb.set_trace()

        if result2 == expected2:
            return
    raise Exception("Expected: {}; got: {}".format(str(expected2),
                                                   str(result2)))
Exemple #4
0
def assert_balanced(inp, expected, c, s, *workers):
    steal = s.extensions['stealing']
    steal._pc.stop()

    counter = itertools.count()
    tasks = list(concat(inp))
    data_seq = itertools.count()

    futures = []
    for w, ts in zip(workers, inp):
        for t in sorted(ts, reverse=True):
            if t:
                [dat] = yield c._scatter([next(data_seq)], workers=w.address)
                ts = s.tasks[dat.key]
                # Ensure scheduler state stays consistent
                old_nbytes = ts.nbytes
                ts.nbytes = BANDWIDTH * t
                for ws in ts.who_has:
                    ws.nbytes += ts.nbytes - old_nbytes
            else:
                dat = 123
            s.task_duration[str(int(t))] = 1
            f = c.submit(func,
                         dat,
                         key='%d-%d' % (int(t), next(counter)),
                         workers=w.address,
                         allow_other_workers=True,
                         pure=False)
            futures.append(f)

    while len(s.rprocessing) < len(futures):
        yield gen.sleep(0.001)

    for i in range(10):
        steal.balance()

        while steal.in_flight:
            yield gen.sleep(0.001)

        result = [
            sorted([int(key_split(k)) for k in s.processing[w.address]],
                   reverse=True) for w in workers
        ]

        result2 = sorted(result, reverse=True)
        expected2 = sorted(expected, reverse=True)

        if config.get('pdb-on-err'):
            if result2 != expected2:
                import pdb
                pdb.set_trace()

        if result2 == expected2:
            return
    raise Exception('Expected: {}; got: {}'.format(str(expected2),
                                                   str(result2)))
Exemple #5
0
async def test_stress_scatter_death(c, s, *workers):
    import random

    s.allowed_failures = 1000
    np = pytest.importorskip("numpy")
    L = await c.scatter([np.random.random(10000) for i in range(len(workers))])
    await c.replicate(L, n=2)

    adds = [
        delayed(slowadd, pure=True)(
            random.choice(L),
            random.choice(L),
            delay=0.05,
            dask_key_name="slowadd-1-%d" % i,
        ) for i in range(50)
    ]

    adds = [
        delayed(slowadd, pure=True)(a,
                                    b,
                                    delay=0.02,
                                    dask_key_name="slowadd-2-%d" % i)
        for i, (a, b) in enumerate(sliding_window(2, adds))
    ]

    futures = c.compute(adds)
    L = adds = None

    alive = list(workers)

    from distributed.scheduler import logger

    for i in range(7):
        await asyncio.sleep(0.1)
        try:
            s.validate_state()
        except Exception as c:
            logger.exception(c)
            if config.get("log-on-err"):
                import pdb

                pdb.set_trace()
            else:
                raise
        w = random.choice(alive)
        await w.close()
        alive.remove(w)

    with suppress(CancelledError):
        await c.gather(futures)

    futures = None
Exemple #6
0
def assert_balanced(inp, expected, c, s, *workers):
    steal = s.extensions['stealing']
    steal._pc.stop()

    counter = itertools.count()
    tasks = list(concat(inp))
    data_seq = itertools.count()

    futures = []
    for w, ts in zip(workers, inp):
        for t in sorted(ts, reverse=True):
            if t:
                [dat] = yield c.scatter([next(data_seq)], workers=w.address)
                ts = s.tasks[dat.key]
                # Ensure scheduler state stays consistent
                old_nbytes = ts.nbytes
                ts.nbytes = BANDWIDTH * t
                for ws in ts.who_has:
                    ws.nbytes += ts.nbytes - old_nbytes
            else:
                dat = 123
            s.task_duration[str(int(t))] = 1
            i = next(counter)
            f = c.submit(func, dat, key='%d-%d' % (int(t), i),
                         workers=w.address, allow_other_workers=True,
                         pure=False, priority=-i)
            futures.append(f)

    while len(s.rprocessing) < len(futures):
        yield gen.sleep(0.001)

    for i in range(10):
        steal.balance()

        while steal.in_flight:
            yield gen.sleep(0.001)

        result = [sorted([int(key_split(k)) for k in s.processing[w.address]],
                         reverse=True)
                  for w in workers]

        result2 = sorted(result, reverse=True)
        expected2 = sorted(expected, reverse=True)

        if config.get('pdb-on-err'):
            if result2 != expected2:
                import pdb
                pdb.set_trace()

        if result2 == expected2:
            return
    raise Exception('Expected: {}; got: {}'.format(str(expected2), str(result2)))
Exemple #7
0
def assert_balanced(inp, expected, c, s, *workers):
    steal = s.extensions['stealing']
    steal._pc.stop()

    counter = itertools.count()
    B = BANDWIDTH
    tasks = list(concat(inp))
    data_seq = itertools.count()

    futures = []
    for w, ts in zip(workers, inp):
        for t in ts:
            if t:
                [dat] = yield c._scatter([next(data_seq)], workers=w.address)
                s.nbytes[dat.key] = BANDWIDTH * t
            else:
                dat = 123
            s.task_duration[str(int(t))] = 1
            f = c.submit(func,
                         dat,
                         key='%d-%d' % (int(t), next(counter)),
                         workers=w.address,
                         allow_other_workers=True,
                         pure=False)
            futures.append(f)

    while len(s.rprocessing) < len(futures):
        yield gen.sleep(0.001)

    s.extensions['stealing'].balance()

    result = [
        sorted([int(key_split(k)) for k in s.processing[w.address]],
               reverse=True) for w in workers
    ]

    result2 = sorted(result, reverse=True)
    expected2 = sorted(expected, reverse=True)

    if config.get('pdb-on-err'):
        if result2 != expected2:
            import pdb
            pdb.set_trace()

    assert result2 == expected2
Exemple #8
0
from bokeh.models import (ColumnDataSource, Plot, DataRange1d, Rect,
                          LinearAxis, DatetimeAxis, Grid, BasicTicker,
                          HoverTool, BoxZoomTool, ResetTool, PanTool,
                          WheelZoomTool, Title, Range1d, Quad, Text, value,
                          Line, NumeralTickFormatter, ToolbarBox, Legend,
                          BoxSelectTool, Circle)
from bokeh.models.widgets import DataTable, TableColumn, NumberFormatter
from bokeh.palettes import Spectral9
from bokeh.plotting import figure
from toolz import valmap

from distributed.config import config
from distributed.diagnostics.progress_stream import progress_quads, nbytes_bar
from distributed.utils import log_errors

if config.get('bokeh-export-tool', False):
    from .export_tool import ExportTool
else:
    ExportTool = None


class DashboardComponent(object):
    """ Base class for Dask.distributed UI dashboard components.

    This class must have two attributes, ``root`` and ``source``, and one
    method ``update``:

    *  source: a Bokeh ColumnDataSource
    *  root: a Bokeh Model
    *  update: a method that consumes the messages dictionary found in
               distributed.bokeh.messages
Exemple #9
0
    def __init__(self,
                 pod_template=None,
                 name=None,
                 namespace=None,
                 n_workers=0,
                 host='0.0.0.0',
                 port=0,
                 env=None,
                 **kwargs):
        if pod_template is None:
            if 'kubernetes-worker-template-path' in config:
                import yaml
                with open(config['kubernetes-worker-template-path']) as f:
                    d = yaml.safe_load(f)
                pod_template = make_pod_from_dict(d)
            else:
                msg = (
                    "Worker pod specification not provided. See KubeCluster "
                    "docstring for ways to specify workers")
                raise ValueError(msg)

        self.cluster = LocalCluster(ip=host or socket.gethostname(),
                                    scheduler_port=port,
                                    n_workers=0,
                                    **kwargs)
        try:
            kubernetes.config.load_incluster_config()
        except kubernetes.config.ConfigException:
            kubernetes.config.load_kube_config()

        self.core_api = kubernetes.client.CoreV1Api()

        if namespace is None:
            namespace = _namespace_default()

        if name is None:
            worker_name = config.get('kubernetes-worker-name',
                                     'dask-{user}-{uuid}')
            name = worker_name.format(user=getpass.getuser(),
                                      uuid=str(uuid.uuid4())[:10],
                                      **os.environ)

        self.pod_template = clean_pod_template(pod_template)
        # Default labels that can't be overwritten
        self.pod_template.metadata.labels[
            'dask.pydata.org/cluster-name'] = name
        self.pod_template.metadata.labels['app'] = 'dask'
        self.pod_template.metadata.labels['component'] = 'dask-worker'
        self.pod_template.metadata.namespace = namespace

        self.pod_template.spec.containers[0].env.append(
            kubernetes.client.V1EnvVar(name='DASK_SCHEDULER_ADDRESS',
                                       value=self.scheduler_address))
        if env:
            self.pod_template.spec.containers[0].env.extend([
                kubernetes.client.V1EnvVar(name=k, value=str(v))
                for k, v in env.items()
            ])
        self.pod_template.metadata.generate_name = name

        finalize(self, _cleanup_pods, self.namespace,
                 self.pod_template.metadata.labels)

        if n_workers:
            self.scale(n_workers)