def _retire_workers(self): """ Get the cluster scheduler to cleanup any workers it decides can retire """ with log_errors(): workers = yield self.scheduler.retire_workers(close=True) logger.info("Retiring workers {}".format(workers))
def scale_cb(b): with log_errors(): n = request.value with suppress(AttributeError): self._adaptive.stop() self.scale(n) update()
def status_doc(worker, extra, doc): with log_errors(): statetable = StateTable(worker) executing_ts = ExecutingTimeSeries(worker, sizing_mode="scale_width") communicating_ts = CommunicatingTimeSeries(worker, sizing_mode="scale_width") communicating_stream = CommunicatingStream(worker, sizing_mode="scale_width") xr = executing_ts.root.x_range communicating_ts.root.x_range = xr communicating_stream.root.x_range = xr doc.title = "Dask Worker Internal Monitor" add_periodic_callback(doc, statetable, 200) add_periodic_callback(doc, executing_ts, 200) add_periodic_callback(doc, communicating_ts, 200) add_periodic_callback(doc, communicating_stream, 200) doc.add_root( column( statetable.root, executing_ts.root, communicating_ts.root, communicating_stream.root, sizing_mode="scale_width", )) doc.template = env.get_template("simple.html") doc.template_variables["active_page"] = "status" doc.template_variables.update(extra) doc.theme = BOKEH_THEME
def update(self): with log_errors(): outgoing = self.worker.outgoing_transfer_log n = self.worker.outgoing_count - self.last_outgoing n = min(n, 1000) outgoing = [outgoing[-i].copy() for i in range(1, n)] self.last_outgoing = self.worker.outgoing_count incoming = self.worker.incoming_transfer_log n = self.worker.incoming_count - self.last_incoming n = min(n, 1000) incoming = [incoming[-i].copy() for i in range(1, n)] self.last_incoming = self.worker.incoming_count out = [] for msg in incoming: if msg["keys"]: d = self.process_msg(msg) d["inout-color"] = "red" out.append(d) for msg in outgoing: if msg["keys"]: d = self.process_msg(msg) d["inout-color"] = "blue" out.append(d) if out: out = transpose(out) if (len(self.source.data["stop"]) and min(out["start"]) > self.source.data["stop"][-1] + 10): update(self.source, out) else: self.source.stream(out, rollover=1000)
def update(self): with log_errors(): self.source.stream( { "x": [time() * 1000], "y": [self.worker.executing_count] }, 1000)
def http_get(route): """ Get data from JSON route, store in messages deques """ with log_errors(): response = yield client.fetch('http://localhost:9786/%s.json' % route) msg = json.loads(response.body.decode()) messages[route]['deque'].append(msg) messages[route]['times'].append(time())
def put(self, stream=None, keys=None, data=None, name=None, client=None): with log_errors(): if name in self.datasets: raise KeyError("Dataset %s already exists" % name) self.scheduler.client_desires_keys(keys, 'published-%s' % tokey(name)) self.datasets[name] = {'data': data, 'keys': keys} return {'status': 'OK', 'name': name}
def deserialize_numpy_ndarray(header, frames): with log_errors(): if header.get("pickle"): return pickle.loads(frames[0], buffers=frames[1:]) (frame, ) = frames (writeable, ) = header["writeable"] is_custom, dt = header["dtype"] if is_custom: dt = pickle.loads(dt) else: dt = np.dtype(dt) if header.get("broadcast_to"): shape = header["broadcast_to"] else: shape = header["shape"] x = np.ndarray(shape, dtype=dt, buffer=frame, strides=header["strides"]) if not writeable: x.flags.writeable = False else: x = np.require(x, requirements=["W"]) return x
def update(self, messages): with log_errors(): try: d = messages['workers']['deque'][-1] except IndexError: return workers = sorted(d) data = {} data['host'] = workers for name in ['cores', 'cpu', 'memory_percent', 'latency', 'last-seen', 'memory', 'disk-read', 'disk-write', 'net-send', 'net-recv']: try: if name in ('cpu', 'memory_percent'): data[name] = [d[w][name] / 100 for w in workers] else: data[name] = [d[w][name] for w in workers] except KeyError: pass data['processing'] = [sorted(d[w]['processing']) for w in workers] data['processes'] = [len(d[w]['ports']) for w in workers] self.source.data.update(data)
def task_update(): with log_errors(): try: msg = messages['tasks']['deque'][-1] except IndexError: return task_table_update(task_source, msg)
def update(self, messages): with log_errors(): try: d = messages['workers']['deque'][-1] except IndexError: return workers = sorted(d) data = {} data['host'] = workers for name in ['cores', 'cpu', 'memory_percent', 'latency', 'last-seen', 'memory', 'disk-read', 'disk-write', 'net-send', 'net-recv']: try: if name in ('cpu', 'memory_percent'): data[name] = [d[w][name] / 100 for w in workers] else: data[name] = [d[w][name] for w in workers] except KeyError: pass data['processing'] = [sorted(d[w]['processing']) for w in workers] data['processes'] = [len(d[w]['addresses']) for w in workers] self.source.data.update(data)
def worker_update(): with log_errors(): try: msg = messages['workers']['deque'][-1] except IndexError: return worker_table_update(worker_source, msg)
def put(self, stream=None, keys=None, data=None, name=None, client=None): with log_errors(): if name in self.datasets: raise KeyError("Dataset %s already exists" % name) self.scheduler.client_desires_keys(keys, "published-%s" % tokey(name)) self.datasets[name] = {"data": data, "keys": keys} return {"status": "OK", "name": name}
def http_get(route): """ Get data from JSON route, store in messages deques """ with log_errors(): response = yield client.fetch('http://localhost:9786/%s.json' % route) msg = json.loads(response.body.decode()) messages[route]['deque'].append(msg) messages[route]['condition'].notify_all() messages[route]['times'].append(datetime.now())
def start_workers(self, n=1, **kwargs): with log_errors(): wt = self.createJobTemplate(**kwargs) ids = get_session().runBulkJobs(wt, 1, n, 1) logger.info("Start %d workers. Job ID: %s", len(ids), ids[0].split('.')[0]) self.workers.update({jid: kwargs for jid in ids})
def request_cb(b): with log_errors(): arg = request.value with ignoring(AttributeError): self._adaptive.stop() local_kwargs = dict() local_kwargs[kwarg] = arg self.scale(**local_kwargs)
def __init__(self, worker, **kwargs): with log_errors(): self.worker = worker quantities = [ "nbytes", "duration", "bandwidth", "count", "start", "stop" ] colors = ["inout-color", "type-color", "key-color"] # self.source = ColumnDataSource({name: [] for name in names}) self.source = ColumnDataSource({ "nbytes": [1, 2], "duration": [0.01, 0.02], "bandwidth": [0.01, 0.02], "count": [1, 2], "type": ["int", "str"], "inout-color": ["blue", "red"], "type-color": ["blue", "red"], "key": ["add", "inc"], "start": [1, 2], "stop": [1, 2], }) self.x = Select(title="X-Axis", value="nbytes", options=quantities) self.x.on_change("value", self.update_figure) self.y = Select(title="Y-Axis", value="bandwidth", options=quantities) self.y.on_change("value", self.update_figure) self.size = Select(title="Size", value="None", options=["None"] + quantities) self.size.on_change("value", self.update_figure) self.color = Select(title="Color", value="inout-color", options=["black"] + colors) self.color.on_change("value", self.update_figure) if "sizing_mode" in kwargs: kw = {"sizing_mode": kwargs["sizing_mode"]} else: kw = {} self.control = widgetbox([self.x, self.y, self.size, self.color], width=200, **kw) self.last_outgoing = 0 self.last_incoming = 0 self.kwargs = kwargs self.layout = row(self.control, self.create_figure(**self.kwargs), **kw) self.root = self.layout
def progress_update(): with log_errors(): msg = messages['progress'] d = progress_quads(msg) progress_source.data.update(d) progress_plot.title.text = ("Progress -- total: %(total)s, " "in-memory: %(in-memory)s, processing: %(processing)s, " "ready: %(ready)s, waiting: %(waiting)s, failed: %(failed)s" % messages['tasks']['deque'][-1])
def wrapper(arg, extra, doc): with log_errors(): doc.title = title doc.template = env.get_template(template) if active_page is not None: doc.template_variables["active_page"] = active_page doc.template_variables.update(extra) doc.theme = BOKEH_THEME return f(arg, extra, doc)
def _partial_fit(model_and_meta, X, y, fit_params): """ Call partial_fit on a classifiers with training data X and y Arguments --------- model_and_meta : Tuple[Estimator, dict] X, y : np.ndarray, np.ndarray Training data fit_params : dict Extra keyword arguments to pass to partial_fit Returns ------- Results A namedtuple with four fields: info, models, history, best * info : Dict[model_id, List[Dict]] Keys are integers identifying each model. Values are a List of Dict * models : Dict[model_id, Future[Estimator]] A dictionary with the same keys as `info`. The values are futures to the fitted models. * history : List[Dict] The history of model fitting for each model. Each element of the list is a dictionary with the following elements: * model_id : int A superset of the keys for `info` and `models`. * params : Dict[str, Any] Parameters this model was trained with. * partial_fit_calls : int The number of *consecutive* partial fit calls at this stage in this models training history. * partial_fit_time : float Time (in seconds) spent on this partial fit * score : float Score on the test set for the model at this point in history * score_time : float Time (in seconds) spent on this scoring. * best : Tuple[model_id, Future[Estimator]]] The estimator with the highest validation score in the final round. """ with log_errors(): start = time() model, meta = model_and_meta if len(X): model = deepcopy(model) model.partial_fit(X, y, **(fit_params or {})) meta = dict(meta) meta["partial_fit_calls"] += 1 meta["partial_fit_time"] = time() - start return model, meta
def update(self, messages): with log_errors(): msg = messages['progress'] if not msg: return nb = nbytes_bar(msg['nbytes']) self.source.data.update(nb) self.root.title.text = \ "Memory Use: %0.2f MB" % (sum(msg['nbytes'].values()) / 1e6)
def update(self, messages): with log_errors(): msg = messages["progress"] if not msg: return nb = nbytes_bar(msg["nbytes"]) update(self.source, nb) self.root.title.text = "Memory Use: %0.2f MB" % ( sum(msg["nbytes"].values()) / 1e6)
def get(self): with log_errors(): logs = self.server.get_logs() self.render( "logs.html", title="Logs", logs=logs, **merge(self.extra, rel_path_statics), )
def _create_model(model, ident, **params): """ Create a model by cloning and then setting params """ with log_errors(): model = clone(model).set_params(**params) return model, { "model_id": ident, "params": params, "partial_fit_calls": 0 }
async def write( self, msg: dict, serializers=("cuda", "dask", "pickle", "error"), on_error: str = "message", ): with log_errors(): if self.closed(): raise CommClosedError( "Endpoint is closed -- unable to send message") try: if serializers is None: serializers = ("cuda", "dask", "pickle", "error") # msg can also be a list of dicts when sending batched messages frames = await to_frames( msg, serializers=serializers, on_error=on_error, allow_offload=self.allow_offload, ) nframes = len(frames) cuda_frames = tuple( hasattr(f, "__cuda_array_interface__") for f in frames) sizes = tuple(nbytes(f) for f in frames) cuda_send_frames, send_frames = zip( *((is_cuda, each_frame) for is_cuda, each_frame in zip(cuda_frames, frames) if nbytes(each_frame) > 0)) # Send meta data # Send close flag and number of frames (_Bool, int64) await self.ep.send(struct.pack("?Q", False, nframes)) # Send which frames are CUDA (bool) and # how large each frame is (uint64) await self.ep.send( struct.pack(nframes * "?" + nframes * "Q", *cuda_frames, *sizes)) # Send frames # It is necessary to first synchronize the default stream before start # sending We synchronize the default stream because UCX is not # stream-ordered and syncing the default stream will wait for other # non-blocking CUDA streams. Note this is only sufficient if the memory # being sent is not currently in use on non-blocking CUDA streams. if any(cuda_send_frames): synchronize_stream(0) for each_frame in send_frames: await self.ep.send(each_frame) return sum(sizes) except (ucp.exceptions.UCXBaseException): self.abort() raise CommClosedError( "While writing, the connection was closed")
def progress(): with log_errors(): stream = yield progress_stream('localhost:8786', 0.050) while True: try: msg = yield read(stream) except StreamClosedError: break else: messages['progress'] = msg
def ts_change(attr, old, new): with log_errors(): selected = self.ts_source.selected.indices if selected: start = self.ts_source.data["time"][min(selected)] / 1000 stop = self.ts_source.data["time"][max(selected)] / 1000 self.start, self.stop = min(start, stop), max(start, stop) else: self.start = self.stop = None self.trigger_update()
def datafeed_doc(webserver, extra, doc): with log_errors(): datamonitor = DataFeedDashboard(webserver, sizing_mode="stretch_both") doc.title = "Adit: FX Data Feed" add_periodic_callback(doc, datamonitor, 1000) doc.add_root(datamonitor.root) doc.template = env.get_template("aditdata.html") doc.template_variables.update(extra) doc.theme = BOKEH_THEME
def update(self): with log_errors(): self.source.stream( { "x": [time() * 1000], "out": [len(self.worker._comms)], "in": [len(self.worker.in_flight_workers)], }, 10000, )
def progress(): with log_errors(): stream = yield progress_stream('%(host)s:%(tcp-port)d' % options, 0.050) while True: try: msg = yield read(stream) except StreamClosedError: break else: messages['progress'] = msg
def event_set(self, name=None): """Set the event with the given name to true. All waiters on this event will be notified. """ with log_errors(): name = self._normalize_name(name) # No matter if someone is listening or not, # we set the event to true self._events[name].set()
def datahealth_doc(worker, extra, doc): with log_errors(): datamonitor = DataHealthDashboard(worker, sizing_mode="fixed") doc.title = "Adit: FX Data Heath" #add_periodic_callback(doc, datamonitor, 1000) doc.add_root(datamonitor.root) doc.template = env.get_template("aditdata.html") doc.template_variables.update(extra) doc.theme = BOKEH_THEME
def status_doc(worker, extra, doc): with log_errors(): statusdb = StatusDashboard(worker, sizing_mode="stretch_both") doc.title = "Adit: Status Dashboard" add_periodic_callback(doc, statusdb, 1000) doc.add_root(statusdb.root) doc.template = env.get_template("aditdata.html") doc.template_variables.update(extra) doc.theme = BOKEH_THEME
async def read(self, deserializers=("cuda", "dask", "pickle", "error")): with log_errors(): if deserializers is None: deserializers = ("cuda", "dask", "pickle", "error") try: # Recv meta data # Recv close flag and number of frames (_Bool, int64) msg = host_array(struct.calcsize("?Q")) await self.ep.recv(msg) (shutdown, nframes) = struct.unpack("?Q", msg) if shutdown: # The writer is closing the connection raise CommClosedError("Connection closed by writer") # Recv which frames are CUDA (bool) and # how large each frame is (uint64) header_fmt = nframes * "?" + nframes * "Q" header = host_array(struct.calcsize(header_fmt)) await self.ep.recv(header) header = struct.unpack(header_fmt, header) cuda_frames, sizes = header[:nframes], header[nframes:] except ( ucp.exceptions.UCXCloseError, ucp.exceptions.UCXCanceled, ) + (getattr(ucp.exceptions, "UCXConnectionReset", ()), ): self.abort() raise CommClosedError("Connection closed by writer") else: # Recv frames frames = [ device_array(each_size) if is_cuda else host_array(each_size) for is_cuda, each_size in zip(cuda_frames, sizes) ] cuda_recv_frames, recv_frames = zip( *((is_cuda, each_frame) for is_cuda, each_frame in zip(cuda_frames, frames) if nbytes(each_frame) > 0)) # It is necessary to first populate `frames` with CUDA arrays and synchronize # the default stream before starting receiving to ensure buffers have been allocated if any(cuda_recv_frames): synchronize_stream(0) for each_frame in recv_frames: await self.ep.recv(each_frame) msg = await from_frames( frames, deserialize=self.deserialize, deserializers=deserializers, allow_offload=self.allow_offload, ) return msg
def profile_server_doc(server, extra, doc): with log_errors(): doc.title = "Dask: Profile of Event Loop" prof = ProfileServer(server, sizing_mode="stretch_both", doc=doc) doc.add_root(prof.root) doc.template = env.get_template("simple.html") # doc.template_variables['active_page'] = '' doc.template_variables.update(extra) doc.theme = BOKEH_THEME prof.trigger_update()
def profile_doc(server, extra, doc): with log_errors(): doc.title = "Dask Worker Profile" profile = ProfileTimePlot(server, sizing_mode="stretch_both", doc=doc) profile.trigger_update() doc.add_root(profile.root) doc.template = env.get_template("simple.html") doc.template_variables["active_page"] = "profile" doc.template_variables.update(extra) doc.theme = BOKEH_THEME
def update(self, messages): with log_errors(): msg = messages['progress'] if not msg: return d = progress_quads(msg) self.source.data.update(d) if messages['tasks']['deque']: self.root.title.text = ("Progress -- total: %(total)s, " "in-memory: %(in-memory)s, processing: %(processing)s, " "waiting: %(waiting)s, failed: %(failed)s" % messages['tasks']['deque'][-1])
def workers(): """ Get data from JSON route, store in messages deques """ with log_errors(): response = yield client.fetch('http://localhost:9786/workers.json') msg = json.loads(response.body.decode()) if msg: messages['workers']['deque'].append(msg) messages['workers']['times'].append(time()) resource_append(messages['workers']['plot-data'], msg) index = messages['workers']['index'] index.append(last_index[0] + 1) last_index[0] += 1
def http_get(route): """ Get data from JSON route, store in messages deques """ with log_errors(): try: response = yield client.fetch( 'http://%(host)s:%(http-port)d/' % options + route + '.json') except ConnectionRefusedError: import sys; sys.exit(0) msg = json.loads(response.body.decode()) messages[route]['deque'].append(msg) messages[route]['times'].append(time())
def processing(): with log_errors(): from distributed.diagnostics.scheduler import processing stream = yield connect(ip=options["host"], port=options["tcp-port"]) yield write(stream, {"op": "feed", "function": dumps(processing), "interval": 0.200}) while True: try: msg = yield read(stream) except StreamClosedError: break else: messages["processing"] = msg
def progress_update(): with log_errors(): msg = messages['progress'] d = progress_quads(msg) progress_source.data.update(d) progress_plot.title.text = ("Progress -- total: %(total)s, " "in-memory: %(in-memory)s, processing: %(processing)s, " "ready: %(ready)s, waiting: %(waiting)s, failed: %(failed)s" % messages['tasks']['deque'][-1]) nb = nbytes_bar(msg['nbytes']) nbytes_task_source.data.update(nb) nbytes_task_plot.title.text = \ "Memory Use: %0.2f MB" % (sum(msg['nbytes'].values()) / 1e6)
def processing(): with log_errors(): from distributed.diagnostics.scheduler import processing stream = yield connect(ip=options['host'], port=options['tcp-port']) yield write(stream, {'op': 'feed', 'function': dumps(processing), 'interval': 0.200}) while True: try: msg = yield read(stream) except StreamClosedError: break else: messages['processing'] = msg
def update(self, messages): with log_errors(): msg = messages['processing'] if not msg.get('ncores'): return data = self.processing_update(msg) x_range = self.root.x_range max_right = max(data['right']) cores = max(data['ncores']) if x_range.end < max_right: x_range.end = max_right + 2 elif x_range.end > 2 * max_right + cores: # way out there, walk back x_range.end = x_range.end * 0.95 + max_right * 0.05 self.source.data.update(data)
def resource_update(): with log_errors(): index = messages['workers']['index'] data = messages['workers']['plot-data'] if not index or index[-1] == resource_index[0]: return if resource_index == [0]: data = valmap(list, data) ind = bisect(index, resource_index[0]) indexes = list(range(ind, len(index))) data = {k: [v[i] for i in indexes] for k, v in data.items()} resource_index[0] = index[-1] resource_source.stream(data, 1000)
def processing_update(msg): with log_errors(): names = sorted(msg['processing']) names = sorted(names) processing = msg['processing'] processing = [processing[name] for name in names] ncores = msg['ncores'] ncores = [ncores[name] for name in names] n = len(names) d = {'name': list(names), 'processing': processing, 'right': list(processing), 'top': list(range(n, 0, -1)), 'bottom': list(range(n - 1, -1, -1)), 'ncores': ncores} d['alpha'] = [0.7] * n return d
def processing_plot_update(): with log_errors(): msg = messages['processing'] if not msg['ncores']: return data = processing_update(msg) x_range = processing_plot.x_range max_right = max(data['right']) min_left = min(data['left'][:-1]) cores = max(data['ncores']) if min_left < x_range.start: # not out there enough, jump ahead x_range.start = min_left - 2 elif x_range.start < 2 * min_left - cores: # way out there, walk back x_range.start = x_range.start * 0.95 + min_left * 0.05 if x_range.end < max_right: x_range.end = max_right + 2 elif x_range.end > 2 * max_right + cores: # way out there, walk back x_range.end = x_range.end * 0.95 + max_right * 0.05 processing_source.data.update(data)
def update(self, messages): with log_errors(): index = messages['task-events']['index'] old = rectangles = messages['task-events']['rectangles'] if not index or index[-1] == self.task_stream_index[0]: return ind = bisect(index, self.task_stream_index[0]) rectangles = {k: [v[i] for i in range(ind, len(index))] for k, v in rectangles.items()} self.task_stream_index[0] = index[-1] # If there has been a significant delay then clear old rectangles if rectangles['start']: last_end = old['start'][ind - 1] + old['duration'][ind - 1] if min(rectangles['start']) > last_end + self.clear_interval: self.source.data.update(rectangles) return self.source.stream(rectangles, self.n_rectangles)
def task_stream_update(): with log_errors(): index = messages['task-events']['index'] old = rectangles = messages['task-events']['rectangles'] if not index or index[-1] == task_stream_index[0]: return ind = bisect(index, task_stream_index[0]) rectangles = {k: [v[i] for i in range(ind, len(index))] for k, v in rectangles.items()} task_stream_index[0] = index[-1] # If there has been a five second delay, clear old rectangles if rectangles['start']: last_end = old['start'][ind - 1] + old['duration'][ind - 1] if min(rectangles['start']) > last_end + 20000: # long delay task_stream_source.data.update(rectangles) return task_stream_source.stream(rectangles, 1000)
def task_events(interval, deque, times, index, rectangles, workers, last_seen): i = 0 with log_errors(): stream = yield eventstream('localhost:8786', 0.100) while True: try: msgs = yield read(stream) except StreamClosedError: break else: if not msgs: continue last_seen[0] = time() for msg in msgs: if 'compute-start' in msg: deque.append(msg) times.append(msg['compute-start']) index.append(i); i += 1 if 'transfer-start' in msg: index.append(i); i += 1 task_stream_append(rectangles, msg, workers)
def task_events(interval, deque, times, index, rectangles, workers, last_seen): i = 0 with log_errors(): stream = yield eventstream('%(host)s:%(tcp-port)d' % options, 0.100) while True: try: msgs = yield read(stream) except StreamClosedError: break else: if not msgs: continue last_seen[0] = time() for msg in msgs: if 'compute_start' in msg: deque.append(msg) times.append(msg['compute_start']) index.append(i); i += 1 if msg.get('transfer_start') is not None: index.append(i); i += 1 task_stream_append(rectangles, msg, workers)
def get(self, stream, name=None, client=None): with log_errors(): if name in self.datasets: return self.datasets[name] else: raise KeyError("Dataset '%s' not found" % name)
def get(self, stream, name=None, client=None): with log_errors(): return self.datasets.get(name, None)
def worker_update(): with log_errors(): yield messages['workers']['condition'].wait() msg = messages['workers']['deque'][-1] worker_table_update(worker_source, msg)
def delete(self, stream=None, name=None): with log_errors(): out = self.datasets.pop(name, {'keys': []}) self.scheduler.client_releases_keys(out['keys'], 'published-%s' % tokey(name))
def list(self, *args): with log_errors(): return list(sorted(self.datasets.keys(), key=str))
def progress_update(): with log_errors(): msg = messages['progress'] d = progress_quads(msg) progress_source.data.update(d)
def task_update(): with log_errors(): yield messages['tasks']['condition'].wait() msg = messages['tasks']['deque'][-1] task_table_update(task_source, msg)