Ejemplo n.º 1
0
def _cumsum(seq, initial_zero):
    if isinstance(seq, _HashIdWrapper):
        seq = seq.wrapped
    if initial_zero:
        return tuple(toolz.accumulate(add, seq, 0))
    else:
        return tuple(toolz.accumulate(add, seq))
Ejemplo n.º 2
0
def arg_reduction(x, chunk, combine, agg, axis=None, split_every=None, out=None):
    """Generic function for argreduction.

    Parameters
    ----------
    x : Array
    chunk : callable
        Partialed ``arg_chunk``.
    combine : callable
        Partialed ``arg_combine``.
    agg : callable
        Partialed ``arg_agg``.
    axis : int, optional
    split_every : int or dict, optional
    """
    if axis is None:
        axis = tuple(range(x.ndim))
        ravel = True
    elif isinstance(axis, Integral):
        axis = validate_axis(axis, x.ndim)
        axis = (axis,)
        ravel = x.ndim == 1
    else:
        raise TypeError("axis must be either `None` or int, got '{0}'".format(axis))

    for ax in axis:
        chunks = x.chunks[ax]
        if len(chunks) > 1 and np.isnan(chunks).any():
            raise ValueError(
                "Arg-reductions do not work with arrays that have "
                "unknown chunksizes. At some point in your computation "
                "this array lost chunking information.\n\n"
                "A possible solution is with \n"
                "  x.compute_chunk_sizes()"
            )

    # Map chunk across all blocks
    name = "arg-reduce-{0}".format(tokenize(axis, x, chunk, combine, split_every))
    old = x.name
    keys = list(product(*map(range, x.numblocks)))
    offsets = list(product(*(accumulate(operator.add, bd[:-1], 0) for bd in x.chunks)))
    if ravel:
        offset_info = zip(offsets, repeat(x.shape))
    else:
        offset_info = pluck(axis[0], offsets)

    chunks = tuple((1,) * len(c) if i in axis else c for (i, c) in enumerate(x.chunks))
    dsk = dict(
        ((name,) + k, (chunk, (old,) + k, axis, off))
        for (k, off) in zip(keys, offset_info)
    )
    # The dtype of `tmp` doesn't actually matter, just need to provide something
    graph = HighLevelGraph.from_collections(name, dsk, dependencies=[x])
    tmp = Array(graph, name, chunks, dtype=x.dtype)
    dtype = np.argmin([1]).dtype
    result = _tree_reduce(tmp, agg, axis, False, dtype, split_every, combine)
    return handle_out(out, result)
def cumdims_label(chunks, const):
    """Internal utility for cumulative sum with label.

    >>> cumdims_label(((5, 3, 3), (2, 2, 1)), 'n')  # doctest: +NORMALIZE_WHITESPACE
    [(('n', 0), ('n', 5), ('n', 8), ('n', 11)),
     (('n', 0), ('n', 2), ('n', 4), ('n', 5))]
    """
    return [
        tuple(zip((const, ) * (1 + len(bds)), accumulate(add, (0, ) + bds)))
        for bds in chunks
    ]
Ejemplo n.º 4
0
def fromfunction(func, chunks="auto", shape=None, dtype=None, **kwargs):
    chunks = normalize_chunks(chunks, shape, dtype=dtype)
    name = "fromfunction-" + tokenize(func, chunks, shape, dtype, kwargs)
    keys = list(product([name], *[range(len(bd)) for bd in chunks]))
    aggdims = [list(accumulate(add, (0, ) + bd[:-1])) for bd in chunks]
    offsets = list(product(*aggdims))
    shapes = list(product(*chunks))
    dtype = dtype or float

    values = [(_np_fromfunction, func, shp, dtype, offset, kwargs)
              for offset, shp in zip(offsets, shapes)]

    dsk = dict(zip(keys, values))

    return Array(dsk, name, chunks, dtype=dtype)
Ejemplo n.º 5
0
def plot_cache(results,
               dsk,
               start_time,
               metric_name,
               palette="Viridis",
               label_size=60,
               **kwargs):
    """Visualize the results of profiling in a bokeh plot.

    Parameters
    ----------
    results : sequence
        Output of CacheProfiler.results
    dsk : dict
        The dask graph being profiled.
    start_time : float
        Start time of the profile.
    metric_name : string
        Metric used to measure cache size
    palette : string, optional
        Name of the bokeh palette to use, must be a member of
        bokeh.palettes.all_palettes.
    label_size: int (optional)
        Maximum size of output labels in plot, defaults to 60
    **kwargs
        Other keyword arguments, passed to bokeh.figure. These will override
        all defaults set by visualize.

    Returns
    -------
    The completed bokeh plot object.
    """
    bp = import_required("bokeh.plotting", _BOKEH_MISSING_MSG)
    from bokeh.models import HoverTool

    defaults = dict(
        title="Profile Results",
        tools="hover,save,reset,wheel_zoom,xpan",
        toolbar_location="above",
        width=800,
        height=300,
    )
    # Support plot_width and plot_height for backwards compatibility
    if "plot_width" in kwargs:
        kwargs["width"] = kwargs.pop("plot_width")
    if "plot_height" in kwargs:
        kwargs["height"] = kwargs.pop("plot_height")
    defaults.update(
        (k, v) for (k, v) in kwargs.items() if k in _get_figure_keywords())

    if results:
        starts, ends = list(zip(*results))[3:]
        tics = sorted(unique(starts + ends))
        groups = groupby(lambda d: pprint_task(d[1], dsk, label_size), results)
        data = {}
        for k, vals in groups.items():
            cnts = dict.fromkeys(tics, 0)
            for v in vals:
                cnts[v.cache_time] += v.metric
                cnts[v.free_time] -= v.metric
            data[k] = [0] + list(
                accumulate(add, pluck(1, sorted(cnts.items()))))

        tics = [0] + [i - start_time for i in tics]
        p = bp.figure(x_range=[0, max(tics)], **defaults)

        for (key, val), color in zip(data.items(),
                                     get_colors(palette, data.keys())):
            p.line(
                "x",
                "y",
                line_color=color,
                line_width=3,
                source=bp.ColumnDataSource({
                    "x": tics,
                    "y": val,
                    "label": [key for i in val]
                }),
            )

    else:
        p = bp.figure(y_range=[0, 10], x_range=[0, 10], **defaults)
    p.yaxis.axis_label = "Cache Size ({0})".format(metric_name)
    p.xaxis.axis_label = "Time (s)"

    hover = p.select(HoverTool)
    hover.tooltips = """
    <div>
        <span style="font-size: 14px; font-weight: bold;">Task:</span>&nbsp;
        <span style="font-size: 10px; font-family: Monaco, monospace;">@label</span>
    </div>
    """
    return p
Ejemplo n.º 6
0
 def accumulate_gen(chunks):
     for bd in chunks:
         yield accumulate(add, (0,) + bd[:-1])