def test_stress_scatter_death(c, s, *workers): import random s.allowed_failures = 1000 np = pytest.importorskip('numpy') L = yield c.scatter([np.random.random(10000) for i in range(len(workers))]) yield c._replicate(L, n=2) adds = [delayed(slowadd, pure=True)(random.choice(L), random.choice(L), delay=0.05, dask_key_name='slowadd-1-%d' % i) for i in range(50)] adds = [delayed(slowadd, pure=True)(a, b, delay=0.02, dask_key_name='slowadd-2-%d' % i) for i, (a, b) in enumerate(sliding_window(2, adds))] futures = c.compute(adds) L = adds = None alive = list(workers) from distributed.scheduler import logger for i in range(7): yield gen.sleep(0.1) try: s.validate_state() except Exception as c: logger.exception(c) if config.get('log-on-err'): import pdb pdb.set_trace() else: raise w = random.choice(alive) yield w._close() alive.remove(w) try: yield gen.with_timeout(timedelta(seconds=25), c._gather(futures)) except gen.TimeoutError: ws = {w.address: w for w in workers if w.status != 'closed'} print(s.processing) print(ws) print(futures) try: worker = [w for w in ws.values() if w.waiting_for_data][0] except Exception: pass if config.get('log-on-err'): import pdb pdb.set_trace() else: raise except CancelledError: pass finally: futures = None
async def assert_balanced(inp, expected, c, s, *workers): steal = s.extensions["stealing"] steal._pc.stop() counter = itertools.count() tasks = list(concat(inp)) data_seq = itertools.count() futures = [] for w, ts in zip(workers, inp): for t in sorted(ts, reverse=True): if t: [dat] = await c.scatter([next(data_seq)], workers=w.address) ts = s.tasks[dat.key] # Ensure scheduler state stays consistent old_nbytes = ts.nbytes ts.nbytes = s.bandwidth * t for ws in ts.who_has: ws.nbytes += ts.nbytes - old_nbytes else: dat = 123 i = next(counter) f = c.submit( func, dat, key="%d-%d" % (int(t), i), workers=w.address, allow_other_workers=True, pure=False, priority=-i, ) futures.append(f) while len(s.rprocessing) < len(futures): await asyncio.sleep(0.001) for i in range(10): steal.balance() while steal.in_flight: await asyncio.sleep(0.001) result = [ sorted([int(key_split(k)) for k in s.processing[w.address]], reverse=True) for w in workers ] result2 = sorted(result, reverse=True) expected2 = sorted(expected, reverse=True) if config.get("pdb-on-err"): if result2 != expected2: import pdb pdb.set_trace() if result2 == expected2: return raise Exception("Expected: {}; got: {}".format(str(expected2), str(result2)))
def assert_balanced(inp, expected, c, s, *workers): steal = s.extensions['stealing'] steal._pc.stop() counter = itertools.count() tasks = list(concat(inp)) data_seq = itertools.count() futures = [] for w, ts in zip(workers, inp): for t in sorted(ts, reverse=True): if t: [dat] = yield c._scatter([next(data_seq)], workers=w.address) ts = s.tasks[dat.key] # Ensure scheduler state stays consistent old_nbytes = ts.nbytes ts.nbytes = BANDWIDTH * t for ws in ts.who_has: ws.nbytes += ts.nbytes - old_nbytes else: dat = 123 s.task_duration[str(int(t))] = 1 f = c.submit(func, dat, key='%d-%d' % (int(t), next(counter)), workers=w.address, allow_other_workers=True, pure=False) futures.append(f) while len(s.rprocessing) < len(futures): yield gen.sleep(0.001) for i in range(10): steal.balance() while steal.in_flight: yield gen.sleep(0.001) result = [ sorted([int(key_split(k)) for k in s.processing[w.address]], reverse=True) for w in workers ] result2 = sorted(result, reverse=True) expected2 = sorted(expected, reverse=True) if config.get('pdb-on-err'): if result2 != expected2: import pdb pdb.set_trace() if result2 == expected2: return raise Exception('Expected: {}; got: {}'.format(str(expected2), str(result2)))
async def test_stress_scatter_death(c, s, *workers): import random s.allowed_failures = 1000 np = pytest.importorskip("numpy") L = await c.scatter([np.random.random(10000) for i in range(len(workers))]) await c.replicate(L, n=2) adds = [ delayed(slowadd, pure=True)( random.choice(L), random.choice(L), delay=0.05, dask_key_name="slowadd-1-%d" % i, ) for i in range(50) ] adds = [ delayed(slowadd, pure=True)(a, b, delay=0.02, dask_key_name="slowadd-2-%d" % i) for i, (a, b) in enumerate(sliding_window(2, adds)) ] futures = c.compute(adds) L = adds = None alive = list(workers) from distributed.scheduler import logger for i in range(7): await asyncio.sleep(0.1) try: s.validate_state() except Exception as c: logger.exception(c) if config.get("log-on-err"): import pdb pdb.set_trace() else: raise w = random.choice(alive) await w.close() alive.remove(w) with suppress(CancelledError): await c.gather(futures) futures = None
def assert_balanced(inp, expected, c, s, *workers): steal = s.extensions['stealing'] steal._pc.stop() counter = itertools.count() tasks = list(concat(inp)) data_seq = itertools.count() futures = [] for w, ts in zip(workers, inp): for t in sorted(ts, reverse=True): if t: [dat] = yield c.scatter([next(data_seq)], workers=w.address) ts = s.tasks[dat.key] # Ensure scheduler state stays consistent old_nbytes = ts.nbytes ts.nbytes = BANDWIDTH * t for ws in ts.who_has: ws.nbytes += ts.nbytes - old_nbytes else: dat = 123 s.task_duration[str(int(t))] = 1 i = next(counter) f = c.submit(func, dat, key='%d-%d' % (int(t), i), workers=w.address, allow_other_workers=True, pure=False, priority=-i) futures.append(f) while len(s.rprocessing) < len(futures): yield gen.sleep(0.001) for i in range(10): steal.balance() while steal.in_flight: yield gen.sleep(0.001) result = [sorted([int(key_split(k)) for k in s.processing[w.address]], reverse=True) for w in workers] result2 = sorted(result, reverse=True) expected2 = sorted(expected, reverse=True) if config.get('pdb-on-err'): if result2 != expected2: import pdb pdb.set_trace() if result2 == expected2: return raise Exception('Expected: {}; got: {}'.format(str(expected2), str(result2)))
def assert_balanced(inp, expected, c, s, *workers): steal = s.extensions['stealing'] steal._pc.stop() counter = itertools.count() B = BANDWIDTH tasks = list(concat(inp)) data_seq = itertools.count() futures = [] for w, ts in zip(workers, inp): for t in ts: if t: [dat] = yield c._scatter([next(data_seq)], workers=w.address) s.nbytes[dat.key] = BANDWIDTH * t else: dat = 123 s.task_duration[str(int(t))] = 1 f = c.submit(func, dat, key='%d-%d' % (int(t), next(counter)), workers=w.address, allow_other_workers=True, pure=False) futures.append(f) while len(s.rprocessing) < len(futures): yield gen.sleep(0.001) s.extensions['stealing'].balance() result = [ sorted([int(key_split(k)) for k in s.processing[w.address]], reverse=True) for w in workers ] result2 = sorted(result, reverse=True) expected2 = sorted(expected, reverse=True) if config.get('pdb-on-err'): if result2 != expected2: import pdb pdb.set_trace() assert result2 == expected2
from bokeh.models import (ColumnDataSource, Plot, DataRange1d, Rect, LinearAxis, DatetimeAxis, Grid, BasicTicker, HoverTool, BoxZoomTool, ResetTool, PanTool, WheelZoomTool, Title, Range1d, Quad, Text, value, Line, NumeralTickFormatter, ToolbarBox, Legend, BoxSelectTool, Circle) from bokeh.models.widgets import DataTable, TableColumn, NumberFormatter from bokeh.palettes import Spectral9 from bokeh.plotting import figure from toolz import valmap from distributed.config import config from distributed.diagnostics.progress_stream import progress_quads, nbytes_bar from distributed.utils import log_errors if config.get('bokeh-export-tool', False): from .export_tool import ExportTool else: ExportTool = None class DashboardComponent(object): """ Base class for Dask.distributed UI dashboard components. This class must have two attributes, ``root`` and ``source``, and one method ``update``: * source: a Bokeh ColumnDataSource * root: a Bokeh Model * update: a method that consumes the messages dictionary found in distributed.bokeh.messages
def __init__(self, pod_template=None, name=None, namespace=None, n_workers=0, host='0.0.0.0', port=0, env=None, **kwargs): if pod_template is None: if 'kubernetes-worker-template-path' in config: import yaml with open(config['kubernetes-worker-template-path']) as f: d = yaml.safe_load(f) pod_template = make_pod_from_dict(d) else: msg = ( "Worker pod specification not provided. See KubeCluster " "docstring for ways to specify workers") raise ValueError(msg) self.cluster = LocalCluster(ip=host or socket.gethostname(), scheduler_port=port, n_workers=0, **kwargs) try: kubernetes.config.load_incluster_config() except kubernetes.config.ConfigException: kubernetes.config.load_kube_config() self.core_api = kubernetes.client.CoreV1Api() if namespace is None: namespace = _namespace_default() if name is None: worker_name = config.get('kubernetes-worker-name', 'dask-{user}-{uuid}') name = worker_name.format(user=getpass.getuser(), uuid=str(uuid.uuid4())[:10], **os.environ) self.pod_template = clean_pod_template(pod_template) # Default labels that can't be overwritten self.pod_template.metadata.labels[ 'dask.pydata.org/cluster-name'] = name self.pod_template.metadata.labels['app'] = 'dask' self.pod_template.metadata.labels['component'] = 'dask-worker' self.pod_template.metadata.namespace = namespace self.pod_template.spec.containers[0].env.append( kubernetes.client.V1EnvVar(name='DASK_SCHEDULER_ADDRESS', value=self.scheduler_address)) if env: self.pod_template.spec.containers[0].env.extend([ kubernetes.client.V1EnvVar(name=k, value=str(v)) for k, v in env.items() ]) self.pod_template.metadata.generate_name = name finalize(self, _cleanup_pods, self.namespace, self.pod_template.metadata.labels) if n_workers: self.scale(n_workers)