def _cumsum(seq, initial_zero): if isinstance(seq, _HashIdWrapper): seq = seq.wrapped if initial_zero: return tuple(toolz.accumulate(add, seq, 0)) else: return tuple(toolz.accumulate(add, seq))
def arg_reduction(x, chunk, combine, agg, axis=None, split_every=None, out=None): """Generic function for argreduction. Parameters ---------- x : Array chunk : callable Partialed ``arg_chunk``. combine : callable Partialed ``arg_combine``. agg : callable Partialed ``arg_agg``. axis : int, optional split_every : int or dict, optional """ if axis is None: axis = tuple(range(x.ndim)) ravel = True elif isinstance(axis, Integral): axis = validate_axis(axis, x.ndim) axis = (axis,) ravel = x.ndim == 1 else: raise TypeError("axis must be either `None` or int, got '{0}'".format(axis)) for ax in axis: chunks = x.chunks[ax] if len(chunks) > 1 and np.isnan(chunks).any(): raise ValueError( "Arg-reductions do not work with arrays that have " "unknown chunksizes. At some point in your computation " "this array lost chunking information.\n\n" "A possible solution is with \n" " x.compute_chunk_sizes()" ) # Map chunk across all blocks name = "arg-reduce-{0}".format(tokenize(axis, x, chunk, combine, split_every)) old = x.name keys = list(product(*map(range, x.numblocks))) offsets = list(product(*(accumulate(operator.add, bd[:-1], 0) for bd in x.chunks))) if ravel: offset_info = zip(offsets, repeat(x.shape)) else: offset_info = pluck(axis[0], offsets) chunks = tuple((1,) * len(c) if i in axis else c for (i, c) in enumerate(x.chunks)) dsk = dict( ((name,) + k, (chunk, (old,) + k, axis, off)) for (k, off) in zip(keys, offset_info) ) # The dtype of `tmp` doesn't actually matter, just need to provide something graph = HighLevelGraph.from_collections(name, dsk, dependencies=[x]) tmp = Array(graph, name, chunks, dtype=x.dtype) dtype = np.argmin([1]).dtype result = _tree_reduce(tmp, agg, axis, False, dtype, split_every, combine) return handle_out(out, result)
def cumdims_label(chunks, const): """Internal utility for cumulative sum with label. >>> cumdims_label(((5, 3, 3), (2, 2, 1)), 'n') # doctest: +NORMALIZE_WHITESPACE [(('n', 0), ('n', 5), ('n', 8), ('n', 11)), (('n', 0), ('n', 2), ('n', 4), ('n', 5))] """ return [ tuple(zip((const, ) * (1 + len(bds)), accumulate(add, (0, ) + bds))) for bds in chunks ]
def fromfunction(func, chunks="auto", shape=None, dtype=None, **kwargs): chunks = normalize_chunks(chunks, shape, dtype=dtype) name = "fromfunction-" + tokenize(func, chunks, shape, dtype, kwargs) keys = list(product([name], *[range(len(bd)) for bd in chunks])) aggdims = [list(accumulate(add, (0, ) + bd[:-1])) for bd in chunks] offsets = list(product(*aggdims)) shapes = list(product(*chunks)) dtype = dtype or float values = [(_np_fromfunction, func, shp, dtype, offset, kwargs) for offset, shp in zip(offsets, shapes)] dsk = dict(zip(keys, values)) return Array(dsk, name, chunks, dtype=dtype)
def plot_cache(results, dsk, start_time, metric_name, palette="Viridis", label_size=60, **kwargs): """Visualize the results of profiling in a bokeh plot. Parameters ---------- results : sequence Output of CacheProfiler.results dsk : dict The dask graph being profiled. start_time : float Start time of the profile. metric_name : string Metric used to measure cache size palette : string, optional Name of the bokeh palette to use, must be a member of bokeh.palettes.all_palettes. label_size: int (optional) Maximum size of output labels in plot, defaults to 60 **kwargs Other keyword arguments, passed to bokeh.figure. These will override all defaults set by visualize. Returns ------- The completed bokeh plot object. """ bp = import_required("bokeh.plotting", _BOKEH_MISSING_MSG) from bokeh.models import HoverTool defaults = dict( title="Profile Results", tools="hover,save,reset,wheel_zoom,xpan", toolbar_location="above", width=800, height=300, ) # Support plot_width and plot_height for backwards compatibility if "plot_width" in kwargs: kwargs["width"] = kwargs.pop("plot_width") if "plot_height" in kwargs: kwargs["height"] = kwargs.pop("plot_height") defaults.update( (k, v) for (k, v) in kwargs.items() if k in _get_figure_keywords()) if results: starts, ends = list(zip(*results))[3:] tics = sorted(unique(starts + ends)) groups = groupby(lambda d: pprint_task(d[1], dsk, label_size), results) data = {} for k, vals in groups.items(): cnts = dict.fromkeys(tics, 0) for v in vals: cnts[v.cache_time] += v.metric cnts[v.free_time] -= v.metric data[k] = [0] + list( accumulate(add, pluck(1, sorted(cnts.items())))) tics = [0] + [i - start_time for i in tics] p = bp.figure(x_range=[0, max(tics)], **defaults) for (key, val), color in zip(data.items(), get_colors(palette, data.keys())): p.line( "x", "y", line_color=color, line_width=3, source=bp.ColumnDataSource({ "x": tics, "y": val, "label": [key for i in val] }), ) else: p = bp.figure(y_range=[0, 10], x_range=[0, 10], **defaults) p.yaxis.axis_label = "Cache Size ({0})".format(metric_name) p.xaxis.axis_label = "Time (s)" hover = p.select(HoverTool) hover.tooltips = """ <div> <span style="font-size: 14px; font-weight: bold;">Task:</span> <span style="font-size: 10px; font-family: Monaco, monospace;">@label</span> </div> """ return p
def accumulate_gen(chunks): for bd in chunks: yield accumulate(add, (0,) + bd[:-1])