コード例 #1
0
ファイル: slicing.py プロジェクト: xcz011/dask
def _cumsum(seq, initial_zero):
    if isinstance(seq, _HashIdWrapper):
        seq = seq.wrapped
    if initial_zero:
        return tuple(accumulate(add, seq, 0))
    else:
        return tuple(accumulate(add, seq))
コード例 #2
0
ファイル: core.py プロジェクト: maxhutch/dask
def accumulate_part(binop, seq, initial, is_first=False):
    if initial == no_default:
        res = list(accumulate(binop, seq))
    else:
        res = list(accumulate(binop, seq, initial=initial))
    if is_first:
        return res, res[-1] if res else [], initial
    return res[1:], res[-1]
コード例 #3
0
ファイル: core.py プロジェクト: datastark/dask
def accumulate_part(binop, seq, initial, is_first=False):
    if initial == no_default:
        res = list(accumulate(binop, seq))
    else:
        res = list(accumulate(binop, seq, initial=initial))
    if is_first:
        return res, res[-1] if res else [], initial
    return res[1:], res[-1]
コード例 #4
0
def _get_data(clauses, values, keys):
    def get_query(query):
        return frappe.db.sql(query.format(**clauses), values=values, as_dict=1)

    get_opening = compose(lambda x: x.opening, first, get_query)

    opening = get_opening("""
            SELECT SUM(loyalty_points) AS opening
            FROM `tabLoyalty Point Entry`
            WHERE {opening_clause}
        """)

    rows = get_query("""
            SELECT
                posting_date,
                sales_invoice,
                os_custom_loyalty_entry AS custom_loyalty_entry,
                loyalty_points AS points
            FROM `tabLoyalty Point Entry`
            WHERE {period_clause}
            ORDER BY posting_date
        """)

    def set_voucher_ref(row):
        if row.get("sales_invoice"):
            return merge(
                row,
                {
                    "voucher_type": "Sales Invoice",
                    "voucher_no": row.get("sales_invoice"),
                },
            )
        if row.get("custom_loyalty_entry"):
            return merge(
                row,
                {
                    "voucher_type": "Custom Loyalty Entry",
                    "voucher_no": row.get("custom_loyalty_entry"),
                },
            )
        return row

    def set_balance(a, row):
        return merge(row, {"balance": a.get("balance") + row.get("points")})

    make_list = compose(list, concatv)
    return make_list(
        accumulate(
            set_balance,
            [set_voucher_ref(x) for x in rows],
            initial={
                "voucher_no": "Opening",
                "balance": opening
            },
        ),
        [{
            "voucher_no": "Total",
            "points": sum([x.points for x in rows])
        }],
    )
コード例 #5
0
ファイル: rechunk.py プロジェクト: dougc333/TestCode
def cumdims_label(chunks, const):
    """ Interal utility for cumulative sum with label.

    >>> cumdims_label(((5, 3, 3), (2, 2, 1)), 'n')  # doctest: +NORMALIZE_WHITESPACE
    [(('n', 0), ('n', 5), ('n', 8), ('n', 11)),
     (('n', 0), ('n', 2), ('n', 4), ('n', 5))]
    """
    return [tuple(zip((const,) * (1 + len(bds)), list(accumulate(add, (0,) + bds)))) for bds in chunks]
コード例 #6
0
def gradient_descent3(f, df, x):
    return accumulate(
        lambda fx, _: min(
            (partial(gradient_step, df, -alpha)(fx) for alpha in [100, 10, 1, 0.7, 0.01, 0.001, 0.0001, 0.00001]),
            key=safe(f),
        ),
        repeat(x),
    )
コード例 #7
0
def arg_reduction(x,
                  chunk,
                  combine,
                  agg,
                  axis=None,
                  split_every=None,
                  out=None):
    """Generic function for argreduction.

    Parameters
    ----------
    x : Array
    chunk : callable
        Partialed ``arg_chunk``.
    combine : callable
        Partialed ``arg_combine``.
    agg : callable
        Partialed ``arg_agg``.
    axis : int, optional
    split_every : int or dict, optional
    """
    if axis is None:
        axis = tuple(range(x.ndim))
        ravel = True
    elif isinstance(axis, int):
        if axis < 0:
            axis += x.ndim
        if axis < 0 or axis >= x.ndim:
            raise ValueError("axis entry is out of bounds")
        axis = (axis, )
        ravel = x.ndim == 1
    else:
        raise TypeError("axis must be either `None` or int, "
                        "got '{0}'".format(axis))

    # Map chunk across all blocks
    name = 'arg-reduce-chunk-{0}'.format(tokenize(chunk, axis))
    old = x.name
    keys = list(product(*map(range, x.numblocks)))
    offsets = list(
        product(*(accumulate(operator.add, bd[:-1], 0) for bd in x.chunks)))
    if ravel:
        offset_info = zip(offsets, repeat(x.shape))
    else:
        offset_info = pluck(axis[0], offsets)

    chunks = tuple(
        (1, ) * len(c) if i in axis else c for (i, c) in enumerate(x.chunks))
    dsk = dict(((name, ) + k, (chunk, (old, ) + k, axis, off))
               for (k, off) in zip(keys, offset_info))
    # The dtype of `tmp` doesn't actually matter, just need to provide something
    tmp = Array(sharedict.merge(x.dask, (name, dsk)),
                name,
                chunks,
                dtype=x.dtype)
    dtype = np.argmin([1]).dtype
    result = _tree_reduce(tmp, agg, axis, False, dtype, split_every, combine)
    return handle_out(out, result)
コード例 #8
0
ファイル: reductions.py プロジェクト: yliapis/dask
def arg_reduction(x, chunk, combine, agg, axis=None, split_every=None, out=None):
    """ Generic function for argreduction.

    Parameters
    ----------
    x : Array
    chunk : callable
        Partialed ``arg_chunk``.
    combine : callable
        Partialed ``arg_combine``.
    agg : callable
        Partialed ``arg_agg``.
    axis : int, optional
    split_every : int or dict, optional
    """
    if axis is None:
        axis = tuple(range(x.ndim))
        ravel = True
    elif isinstance(axis, Integral):
        axis = validate_axis(axis, x.ndim)
        axis = (axis,)
        ravel = x.ndim == 1
    else:
        raise TypeError("axis must be either `None` or int, "
                        "got '{0}'".format(axis))

    for ax in axis:
        chunks = x.chunks[ax]
        if len(chunks) > 1 and np.isnan(chunks).any():
            raise ValueError(
                "Arg-reductions do not work with arrays that have "
                "unknown chunksizes.  At some point in your computation "
                "this array lost chunking information"
            )

    # Map chunk across all blocks
    name = 'arg-reduce-{0}'.format(tokenize(axis, x, chunk,
                                            combine, split_every))
    old = x.name
    keys = list(product(*map(range, x.numblocks)))
    offsets = list(product(*(accumulate(operator.add, bd[:-1], 0)
                             for bd in x.chunks)))
    if ravel:
        offset_info = zip(offsets, repeat(x.shape))
    else:
        offset_info = pluck(axis[0], offsets)

    chunks = tuple((1, ) * len(c) if i in axis else c for (i, c)
                   in enumerate(x.chunks))
    dsk = dict(((name,) + k, (chunk, (old,) + k, axis, off)) for (k, off)
               in zip(keys, offset_info))
    # The dtype of `tmp` doesn't actually matter, just need to provide something
    graph = HighLevelGraph.from_collections(name, dsk, dependencies=[x])
    tmp = Array(graph, name, chunks, dtype=x.dtype)
    dtype = np.argmin([1]).dtype
    result = _tree_reduce(tmp, agg, axis, False, dtype, split_every, combine)
    return handle_out(out, result)
コード例 #9
0
ファイル: reductions.py プロジェクト: vertexclique/dask
def arg_reduction(x, chunk, combine, agg, axis=None, split_every=None, out=None):
    """ Generic function for argreduction.

    Parameters
    ----------
    x : Array
    chunk : callable
        Partialed ``arg_chunk``.
    combine : callable
        Partialed ``arg_combine``.
    agg : callable
        Partialed ``arg_agg``.
    axis : int, optional
    split_every : int or dict, optional
    """
    if axis is None:
        axis = tuple(range(x.ndim))
        ravel = True
    elif isinstance(axis, Integral):
        axis = validate_axis(axis, x.ndim)
        axis = (axis,)
        ravel = x.ndim == 1
    else:
        raise TypeError("axis must be either `None` or int, got '{0}'".format(axis))

    for ax in axis:
        chunks = x.chunks[ax]
        if len(chunks) > 1 and np.isnan(chunks).any():
            raise ValueError(
                "Arg-reductions do not work with arrays that have "
                "unknown chunksizes. At some point in your computation "
                "this array lost chunking information.\n\n"
                "A possible solution is with \n"
                "  x.compute_chunk_sizes()"
            )

    # Map chunk across all blocks
    name = "arg-reduce-{0}".format(tokenize(axis, x, chunk, combine, split_every))
    old = x.name
    keys = list(product(*map(range, x.numblocks)))
    offsets = list(product(*(accumulate(operator.add, bd[:-1], 0) for bd in x.chunks)))
    if ravel:
        offset_info = zip(offsets, repeat(x.shape))
    else:
        offset_info = pluck(axis[0], offsets)

    chunks = tuple((1,) * len(c) if i in axis else c for (i, c) in enumerate(x.chunks))
    dsk = dict(
        ((name,) + k, (chunk, (old,) + k, axis, off))
        for (k, off) in zip(keys, offset_info)
    )
    # The dtype of `tmp` doesn't actually matter, just need to provide something
    graph = HighLevelGraph.from_collections(name, dsk, dependencies=[x])
    tmp = Array(graph, name, chunks, dtype=x.dtype)
    dtype = np.argmin([1]).dtype
    result = _tree_reduce(tmp, agg, axis, False, dtype, split_every, combine)
    return handle_out(out, result)
コード例 #10
0
def cum_posterior(prior, data, samples):
    """
    Returns list of all posteriors based on prior, data and samples.

    See posterior
    """
    return list(
        toolz.accumulate(lambda prior, sample: prior.update(data, sample),
                         samples, prior))
コード例 #11
0
ファイル: rechunk.py プロジェクト: hgz2373294/dask
def cumdims_label(chunks, const):
    """ Interal utility for cumulative sum with label.

    >>> cumdims_label(((5, 3, 3), (2, 2, 1)), 'n')  # doctest: +NORMALIZE_WHITESPACE
    [(('n', 0), ('n', 5), ('n', 8), ('n', 11)),
     (('n', 0), ('n', 2), ('n', 4), ('n', 5))]
    """
    return [tuple(zip((const,) * (1 + len(bds)),
                      list(accumulate(add, (0,) + bds))))
              for bds in chunks ]
コード例 #12
0
ファイル: utility.py プロジェクト: philiplessner/FunctionalML
def until_nearly_convergence(convf, it, tolerance=0.0001):
    '''
    Test for absolute convergence
    Parameters
        it: Lazy sequence of values
        tolerance: Convergence criteria
    Returns
        Continues to add to the sequence of current values if tolerence is not satisfied
        Othewise it terminates iteration and returns the sequence of values
    '''
    # The order of arguments for toolz.accumulate is opposite to
    # Python 3 itertools.accumulate
    return accumulate(partial(convf, tolerance), it)
コード例 #13
0
ファイル: utility.py プロジェクト: philiplessner/FunctionalML
def until_nearly_convergence(convf, it, tolerance=0.0001):
    '''
    Test for absolute convergence
    Parameters
        it: Lazy sequence of values
        tolerance: Convergence criteria
    Returns
        Continues to add to the sequence of current values if tolerence is not satisfied
        Othewise it terminates iteration and returns the sequence of values
    '''
    # The order of arguments for toolz.accumulate is opposite to
    # Python 3 itertools.accumulate
    return accumulate(partial(convf, tolerance), it)
コード例 #14
0
def sgd(df, X, y, theta_0, eta=0.1):
    """
    Parameters
        df: Gradient of function f
        X: Matrix of features
        y: vector of observations
        theta0: Initial guess, theta ia a j dimensional vector ([theta_01, theta_02,...,theta0_0j])
        eta: Learning rate
    Returns
        Generator sequence of [theta_k1, theta_k2,...,theta_kj] 
        where k = 0 to ...
    """
    xys = chain([theta_0], in_random_order(zip(X, y)))
    return accumulate(partial(sgd_step, df, eta), xys)
コード例 #15
0
def sgd(df, X, y, theta_0, eta=0.1):
    '''
    Parameters
        df: Gradient of function f
        X: Matrix of features
        y: vector of observations
        theta0: Initial guess, theta ia a j dimensional vector ([theta_01, theta_02,...,theta0_0j])
        eta: Learning rate
    Returns
        Generator sequence of [theta_k1, theta_k2,...,theta_kj] 
        where k = 0 to ...
    '''
    xys = chain([theta_0], in_random_order(zip(X, y)))
    return accumulate(partial(sgd_step, df, eta), xys)
コード例 #16
0
ファイル: reductions.py プロジェクト: togar-nk/dask
def arg_reduction(x, chunk, combine, agg, axis=None, split_every=None, out=None):
    """ Generic function for argreduction.

    Parameters
    ----------
    x : Array
    chunk : callable
        Partialed ``arg_chunk``.
    combine : callable
        Partialed ``arg_combine``.
    agg : callable
        Partialed ``arg_agg``.
    axis : int, optional
    split_every : int or dict, optional
    """
    if axis is None:
        axis = tuple(range(x.ndim))
        ravel = True
    elif isinstance(axis, int):
        if axis < 0:
            axis += x.ndim
        if axis < 0 or axis >= x.ndim:
            raise ValueError("axis entry is out of bounds")
        axis = (axis,)
        ravel = x.ndim == 1
    else:
        raise TypeError("axis must be either `None` or int, "
                        "got '{0}'".format(axis))

    # Map chunk across all blocks
    name = 'arg-reduce-chunk-{0}'.format(tokenize(chunk, axis))
    old = x.name
    keys = list(product(*map(range, x.numblocks)))
    offsets = list(product(*(accumulate(operator.add, bd[:-1], 0)
                             for bd in x.chunks)))
    if ravel:
        offset_info = zip(offsets, repeat(x.shape))
    else:
        offset_info = pluck(axis[0], offsets)

    chunks = tuple((1, ) * len(c) if i in axis else c for (i, c)
                   in enumerate(x.chunks))
    dsk = dict(((name,) + k, (chunk, (old,) + k, axis, off)) for (k, off)
               in zip(keys, offset_info))
    # The dtype of `tmp` doesn't actually matter, just need to provide something
    tmp = Array(sharedict.merge(x.dask, (name, dsk)), name, chunks, dtype=x.dtype)
    dtype = np.argmin([1]).dtype
    result = _tree_reduce(tmp, agg, axis, False, dtype, split_every, combine)
    return handle_out(out, result)
コード例 #17
0
ファイル: creation.py プロジェクト: caseyclements/dask
def fromfunction(func, chunks='auto', shape=None, dtype=None, **kwargs):
    chunks = normalize_chunks(chunks, shape)
    name = 'fromfunction-' + tokenize(func, chunks, shape, dtype, kwargs)
    keys = list(product([name], *[range(len(bd)) for bd in chunks]))
    aggdims = [list(accumulate(add, (0,) + bd[:-1])) for bd in chunks]
    offsets = list(product(*aggdims))
    shapes = list(product(*chunks))
    dtype = dtype or float

    values = [(_np_fromfunction, func, shp, dtype, offset, kwargs)
              for offset, shp in zip(offsets, shapes)]

    dsk = dict(zip(keys, values))

    return Array(dsk, name, chunks, dtype=dtype)
コード例 #18
0
def fromfunction(func, chunks=None, shape=None, dtype=None):
    if chunks:
        chunks = normalize_chunks(chunks, shape)
    name = 'fromfunction-' + tokenize(func, chunks, shape, dtype)
    keys = list(product([name], *[range(len(bd)) for bd in chunks]))
    aggdims = [list(accumulate(add, (0, ) + bd[:-1])) for bd in chunks]
    offsets = list(product(*aggdims))
    shapes = list(product(*chunks))

    values = [(np.fromfunction, offset_func(func, offset), shp)
              for offset, shp in zip(offsets, shapes)]

    dsk = dict(zip(keys, values))

    return Array(dsk, name, chunks, dtype=dtype)
コード例 #19
0
def fromfunction(func, chunks="auto", shape=None, dtype=None, **kwargs):
    chunks = normalize_chunks(chunks, shape, dtype=dtype)
    name = "fromfunction-" + tokenize(func, chunks, shape, dtype, kwargs)
    keys = list(product([name], *[range(len(bd)) for bd in chunks]))
    aggdims = [list(accumulate(add, (0, ) + bd[:-1])) for bd in chunks]
    offsets = list(product(*aggdims))
    shapes = list(product(*chunks))
    dtype = dtype or float

    values = [(_np_fromfunction, func, shp, dtype, offset, kwargs)
              for offset, shp in zip(offsets, shapes)]

    dsk = dict(zip(keys, values))

    return Array(dsk, name, chunks, dtype=dtype)
コード例 #20
0
ファイル: creation.py プロジェクト: martindurant/dask
def fromfunction(func, chunks=None, shape=None, dtype=None):
    if chunks:
        chunks = normalize_chunks(chunks, shape)
    name = 'fromfunction-' + tokenize(func, chunks, shape, dtype)
    keys = list(product([name], *[range(len(bd)) for bd in chunks]))
    aggdims = [list(accumulate(add, (0,) + bd[:-1])) for bd in chunks]
    offsets = list(product(*aggdims))
    shapes = list(product(*chunks))

    values = [(np.fromfunction, offset_func(func, offset), shp)
              for offset, shp in zip(offsets, shapes)]

    dsk = dict(zip(keys, values))

    return Array(dsk, name, chunks, dtype=dtype)
コード例 #21
0
ファイル: zmq.py プロジェクト: mindw/partd
def keys_to_flush(lengths, fraction=0.1, maxcount=100000):
    """ Which keys to remove

    >>> lengths = {'a': 20, 'b': 10, 'c': 15, 'd': 15,
    ...            'e': 10, 'f': 25, 'g': 5}
    >>> keys_to_flush(lengths, 0.5)
    ['f', 'a']
    """
    top = topk(max(len(lengths) // 2, 1), lengths.items(), key=1)
    total = sum(lengths.values())
    cutoff = min(
        maxcount,
        max(1, bisect(list(accumulate(add, pluck(1, top))), total * fraction)))
    result = [k for k, v in top[:cutoff]]
    assert result
    return result
コード例 #22
0
ファイル: buffer.py プロジェクト: CaptainAL/Spyder
def keys_to_flush(lengths, fraction=0.1, maxcount=100000):
    """ Which keys to remove

    >>> lengths = {'a': 20, 'b': 10, 'c': 15, 'd': 15,
    ...            'e': 10, 'f': 25, 'g': 5}
    >>> keys_to_flush(lengths, 0.5)
    ['f', 'a']
    """
    top = topk(max(len(lengths) // 2, 1),
               lengths.items(),
               key=1)
    total = sum(lengths.values())
    cutoff = min(maxcount, max(1,
                   bisect(list(accumulate(add, pluck(1, top))),
                          total * fraction)))
    result = [k for k, v in top[:cutoff]]
    assert result
    return result
コード例 #23
0
def optimalPath(threes: [[int]]) -> ([Step], [Step]):
    forwardPriceToA = threes[0][0]
    crossPriceToA = threes[0][1] + threes[0][2]
    forwardPriceToB = threes[0][1]
    crossPriceToB = threes[0][0] + threes[0][2]

    newPathToA = [Step("A", forwardPriceToA)
                  ] if forwardPriceToA <= crossPriceToA else [
                      Step("B", forwardPriceToB),
                      Step("C", threes[0][2])
                  ]
    newPathToB = [Step("B", forwardPriceToB)
                  ] if forwardPriceToB <= crossPriceToB else [
                      Step("A", forwardPriceToA),
                      Step("C", threes[0][2])
                  ]
    accumulator = (pvector(newPathToA), pvector(newPathToB))

    newThrees = drop(1, threes)
    return accumulate(roadStep, newThrees, accumulator)
コード例 #24
0
 def count_chars(self, trans_text):
     """
     text should only be a list[list[str]]
     """
     mins = 5000
     maxs = 20000
     steps = 1
     cutoffs = []
     for r in enumerate(toolz.accumulate(add, toolz.map(len, trans_text))):
         if r[1] >= mins * steps and r[1] < maxs * steps:
             cutoffs.append(r[0])
             steps = steps + 1
         elif r[1] >= mins * steps and r[1] >= maxs * steps:
             if steps > 1:
                 cutoffs.append(r[0] - 1)
                 steps = steps + 1
             else:
                 cutoffs.append(r[0])
                 print(f"single string element too long!  (l: {r[1]})")
     cutoffs.append(len(trans_text))
     print(
         f"make_batch for {sum(list(map(len, trans_text)))} chars in {len(trans_text)} seqs | calculated cutoffs: {cutoffs}"
     )
     return cutoffs
コード例 #25
0
ファイル: slicing.py プロジェクト: jrenner/dask
def _slice_1d(dim_shape, lengths, index):
    """Returns a dict of {blocknum: slice}

    This function figures out where each slice should start in each
    block for a single dimension. If the slice won't return any elements
    in the block, that block will not be in the output.

    Parameters
    ----------

    dim_shape - the number of elements in this dimension.
      This should be a positive, non-zero integer
    blocksize - the number of elements per block in this dimension
      This should be a positive, non-zero integer
    index - a description of the elements in this dimension that we want
      This might be an integer, a slice(), or an Ellipsis

    Returns
    -------

    dictionary where the keys are the integer index of the blocks that
      should be sliced and the values are the slices

    Examples
    --------

    100 length array cut into length 20 pieces, slice 0:35

    >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(0, 35))
    {0: slice(None, None, None), 1: slice(0, 15, 1)}

    Support irregular blocks and various slices

    >>> _slice_1d(100, [20, 10, 10, 10, 25, 25], slice(10, 35))
    {0: slice(10, 20, 1), 1: slice(None, None, None), 2: slice(0, 5, 1)}

    Support step sizes

    >>> _slice_1d(100, [15, 14, 13], slice(10, 41, 3))
    {0: slice(10, 15, 3), 1: slice(1, 14, 3), 2: slice(2, 12, 3)}

    >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(0, 100, 40))  # step > blocksize
    {0: slice(0, 20, 40), 2: slice(0, 20, 40), 4: slice(0, 20, 40)}

    Also support indexing single elements

    >>> _slice_1d(100, [20, 20, 20, 20, 20], 25)
    {1: 5}

    And negative slicing

    >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(100, 0, -3))
    {0: slice(-2, -20, -3), 1: slice(-1, -21, -3), 2: slice(-3, -21, -3), 3: slice(-2, -21, -3), 4: slice(-1, -21, -3)}

    >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(100, 12, -3))
    {0: slice(-2, -8, -3), 1: slice(-1, -21, -3), 2: slice(-3, -21, -3), 3: slice(-2, -21, -3), 4: slice(-1, -21, -3)}

    >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(100, -12, -3))
    {4: slice(-1, -12, -3)}
    """
    if isinstance(index, (int, long)):
        i = 0
        ind = index
        lens = list(lengths)
        while ind >= lens[0]:
            i += 1
            ind -= lens.pop(0)
        return {i: ind}

    assert isinstance(index, slice)

    step = index.step or 1
    if step > 0:
        start = index.start or 0
        stop = index.stop if index.stop is not None else dim_shape
    else:
        start = index.start or dim_shape - 1
        start = dim_shape - 1 if start >= dim_shape else start
        stop = -(dim_shape + 1) if index.stop is None else index.stop

    # posify start and stop
    if start < 0:
        start += dim_shape
    if stop < 0:
        stop += dim_shape

    d = dict()
    if step > 0:
        for i, length in enumerate(lengths):
            if start < length and stop > 0:
                d[i] = slice(start, min(stop, length), step)
                start = (start - length) % step
            else:
                start = start - length
            stop -= length
    else:
        rstart = start  # running start
        chunk_boundaries = list(accumulate(add, lengths))
        for i, chunk_stop in reversed(list(enumerate(chunk_boundaries))):
            # create a chunk start and stop
            if i == 0:
                chunk_start = 0
            else:
                chunk_start = chunk_boundaries[i - 1]

            # if our slice is in this chunk
            if (chunk_start <= rstart < chunk_stop) and (rstart > stop):
                d[i] = slice(
                    rstart - chunk_stop,
                    max(chunk_start - chunk_stop - 1, stop - chunk_stop), step)

                # compute the next running start point,
                offset = (rstart - (chunk_start - 1)) % step
                rstart = chunk_start + offset - 1

    # replace 0:20:1 with : if appropriate
    for k, v in d.items():
        if v == slice(0, lengths[k], 1):
            d[k] = slice(None, None, None)

    if not d:  # special case x[:0]
        d[0] = slice(0, 0, 1)

    return d
コード例 #26
0
ファイル: agent.py プロジェクト: jeromeku/reinforce
 def discount(self, rs, discount_rate):
     discounted = accumulate(lambda prev, curr: discount_rate * prev + curr,
                             reversed(rs))
     return np.fromiter(discounted, 'float')[::-1]
コード例 #27
0
ファイル: profile_visualize.py プロジェクト: ifzz/dask
def plot_cache(results, dsk, start_time, metric_name, palette='GnBu',
               label_size=60, **kwargs):
    """Visualize the results of profiling in a bokeh plot.

    Parameters
    ----------
    results : sequence
        Output of CacheProfiler.results
    dsk : dict
        The dask graph being profiled.
    start_time : float
        Start time of the profile.
    metric_name : string
        Metric used to measure cache size
    palette : string, optional
        Name of the bokeh palette to use, must be key in bokeh.palettes.brewer.
    label_size: int (optional)
        Maximum size of output labels in plot, defaults to 60
    **kwargs
        Other keyword arguments, passed to bokeh.figure. These will override
        all defaults set by visualize.

    Returns
    -------
    The completed bokeh plot object.
    """

    defaults = dict(title="Profile Results",
                    tools="hover,save,reset,resize,wheel_zoom,xpan",
                    plot_width=800, plot_height=300)
    defaults.update((k, v) for (k, v) in kwargs.items() if k in
                    bp.Figure.properties())

    if results:
        starts, ends = list(zip(*results))[3:]
        tics = list(sorted(unique(starts + ends)))
        groups = groupby(lambda d: pprint_task(d[1], dsk, label_size), results)
        data = {}
        for k, vals in groups.items():
            cnts = dict.fromkeys(tics, 0)
            for v in vals:
                cnts[v.cache_time] += v.metric
                cnts[v.free_time] -= v.metric
            data[k] = list(accumulate(add, pluck(1, sorted(cnts.items()))))

        tics = [i - start_time for i in tics]
        p = bp.figure(x_range=[0, max(tics)], **defaults)

        for (key, val), color in zip(data.items(), get_colors(palette, data.keys())):
            p.line('x', 'y', line_color=color, line_width=3,
                   source=bp.ColumnDataSource({'x': tics, 'y': val,
                                               'label': [key for i in val]}))

    else:
        p = bp.figure(y_range=[0, 10], x_range=[0, 10], **defaults)
    p.grid.grid_line_color = None
    p.axis.axis_line_color = None
    p.axis.major_tick_line_color = None
    p.yaxis.axis_label = "Cache Size ({0})".format(metric_name)
    p.xaxis.axis_label = "Time (s)"

    hover = p.select(HoverTool)
    hover.tooltips = """
    <div>
        <span style="font-size: 14px; font-weight: bold;">Task:</span>&nbsp;
        <span style="font-size: 10px; font-family: Monaco, monospace;">@label</span>
    </div>
    """
    return p
コード例 #28
0
ファイル: slicing.py プロジェクト: kastnerkyle/dask
def _slice_1d(dim_shape, lengths, index):
    """Returns a dict of {blocknum: slice}

    This function figures out where each slice should start in each
    block for a single dimension. If the slice won't return any elements
    in the block, that block will not be in the output.

    Parameters
    ----------

    dim_shape - the number of elements in this dimension.
      This should be a positive, non-zero integer
    blocksize - the number of elements per block in this dimension
      This should be a positive, non-zero integer
    index - a description of the elements in this dimension that we want
      This might be an integer, a slice(), or an Ellipsis

    Returns
    -------

    dictionary where the keys are the integer index of the blocks that
      should be sliced and the values are the slices

    Examples
    --------

    100 length array cut into length 20 pieces, slice 0:35

    >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(0, 35))
    {0: slice(None, None, None), 1: slice(0, 15, 1)}

    Support irregular blocks and various slices

    >>> _slice_1d(100, [20, 10, 10, 10, 25, 25], slice(10, 35))
    {0: slice(10, 20, 1), 1: slice(None, None, None), 2: slice(0, 5, 1)}

    Support step sizes

    >>> _slice_1d(100, [15, 14, 13], slice(10, 41, 3))
    {0: slice(10, 15, 3), 1: slice(1, 14, 3), 2: slice(2, 12, 3)}

    >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(0, 100, 40))  # step > blocksize
    {0: slice(0, 20, 40), 2: slice(0, 20, 40), 4: slice(0, 20, 40)}

    Also support indexing single elements

    >>> _slice_1d(100, [20, 20, 20, 20, 20], 25)
    {1: 5}

    And negative slicing

    >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(100, 0, -3))
    {0: slice(-2, -20, -3), 1: slice(-1, -21, -3), 2: slice(-3, -21, -3), 3: slice(-2, -21, -3), 4: slice(-1, -21, -3)}

    >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(100, 12, -3))
    {0: slice(-2, -8, -3), 1: slice(-1, -21, -3), 2: slice(-3, -21, -3), 3: slice(-2, -21, -3), 4: slice(-1, -21, -3)}

    >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(100, -12, -3))
    {4: slice(-1, -12, -3)}
    """
    if isinstance(index, (int, long)):
        i = 0
        ind = index
        lens = list(lengths)
        while ind >= lens[0]:
            i += 1
            ind -= lens.pop(0)
        return {i: ind}

    assert isinstance(index, slice)

    step = index.step or 1
    if step > 0:
        start = index.start or 0
        stop = index.stop if index.stop is not None else dim_shape
    else:
        start = index.start or dim_shape - 1
        start = dim_shape - 1 if start >= dim_shape else start
        stop = -(dim_shape + 1) if index.stop is None else index.stop

    if start < 0:
        start += dim_shape
    if stop < 0:
        stop += dim_shape

    d = dict()
    if step > 0:
        for i, length in enumerate(lengths):
            if start < length and stop > 0:
                d[i] = slice(start, min(stop, length), step)
                start = (start - length) % step
            else:
                start = start - length
            stop -= length
    else:
        stop -= dim_shape
        tail_index = list(accumulate(add, lengths))
        pos_step = abs(step) # 11%3==2, 11%-3==-1. Need positive step for %

        offset = 0
        for i, length in zip(range(len(lengths)-1, -1, -1), reversed(lengths)):
            if start + length >= tail_index[i] and stop < 0:
                d[i] = slice(start - tail_index[i],
                             max(stop, -length - 1), step)
                # The offset accumulates over time from the start point
                offset = (offset + pos_step - (length % pos_step)) % pos_step
                start = tail_index[i] - 1 - length - offset

            stop += length

    # replace 0:20:1 with : if appropriate
    for k, v in d.items():
        if v == slice(0, lengths[k], 1):
            d[k] = slice(None, None, None)

    return d
コード例 #29
0
ファイル: slicing.py プロジェクト: kastnerkyle/dask
def _slice_1d(dim_shape, lengths, index):
    """Returns a dict of {blocknum: slice}

    This function figures out where each slice should start in each
    block for a single dimension. If the slice won't return any elements
    in the block, that block will not be in the output.

    Parameters
    ----------

    dim_shape - the number of elements in this dimension.
      This should be a positive, non-zero integer
    blocksize - the number of elements per block in this dimension
      This should be a positive, non-zero integer
    index - a description of the elements in this dimension that we want
      This might be an integer, a slice(), or an Ellipsis

    Returns
    -------

    dictionary where the keys are the integer index of the blocks that
      should be sliced and the values are the slices

    Examples
    --------

    100 length array cut into length 20 pieces, slice 0:35

    >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(0, 35))
    {0: slice(None, None, None), 1: slice(0, 15, 1)}

    Support irregular blocks and various slices

    >>> _slice_1d(100, [20, 10, 10, 10, 25, 25], slice(10, 35))
    {0: slice(10, 20, 1), 1: slice(None, None, None), 2: slice(0, 5, 1)}

    Support step sizes

    >>> _slice_1d(100, [15, 14, 13], slice(10, 41, 3))
    {0: slice(10, 15, 3), 1: slice(1, 14, 3), 2: slice(2, 12, 3)}

    >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(0, 100, 40))  # step > blocksize
    {0: slice(0, 20, 40), 2: slice(0, 20, 40), 4: slice(0, 20, 40)}

    Also support indexing single elements

    >>> _slice_1d(100, [20, 20, 20, 20, 20], 25)
    {1: 5}

    And negative slicing

    >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(100, 0, -3))
    {0: slice(-2, -20, -3), 1: slice(-1, -21, -3), 2: slice(-3, -21, -3), 3: slice(-2, -21, -3), 4: slice(-1, -21, -3)}

    >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(100, 12, -3))
    {0: slice(-2, -8, -3), 1: slice(-1, -21, -3), 2: slice(-3, -21, -3), 3: slice(-2, -21, -3), 4: slice(-1, -21, -3)}

    >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(100, -12, -3))
    {4: slice(-1, -12, -3)}
    """
    if isinstance(index, (int, long)):
        i = 0
        ind = index
        lens = list(lengths)
        while ind >= lens[0]:
            i += 1
            ind -= lens.pop(0)
        return {i: ind}

    assert isinstance(index, slice)

    step = index.step or 1
    if step > 0:
        start = index.start or 0
        stop = index.stop if index.stop is not None else dim_shape
    else:
        start = index.start or dim_shape - 1
        start = dim_shape - 1 if start >= dim_shape else start
        stop = -(dim_shape + 1) if index.stop is None else index.stop

    if start < 0:
        start += dim_shape
    if stop < 0:
        stop += dim_shape

    d = dict()
    if step > 0:
        for i, length in enumerate(lengths):
            if start < length and stop > 0:
                d[i] = slice(start, min(stop, length), step)
                start = (start - length) % step
            else:
                start = start - length
            stop -= length
    else:
        stop -= dim_shape
        tail_index = list(accumulate(add, lengths))
        pos_step = abs(step)  # 11%3==2, 11%-3==-1. Need positive step for %

        offset = 0
        for i, length in zip(range(len(lengths) - 1, -1, -1),
                             reversed(lengths)):
            if start + length >= tail_index[i] and stop < 0:
                d[i] = slice(start - tail_index[i], max(stop, -length - 1),
                             step)
                # The offset accumulates over time from the start point
                offset = (offset + pos_step - (length % pos_step)) % pos_step
                start = tail_index[i] - 1 - length - offset

            stop += length

    # replace 0:20:1 with : if appropriate
    for k, v in d.items():
        if v == slice(0, lengths[k], 1):
            d[k] = slice(None, None, None)

    return d
コード例 #30
0
def sumDigits(ints: PVector[int]) -> int:
    return last(accumulate(add, concat(map(lambda c: toDigits(c), ints))))
コード例 #31
0
def rlePropLengthPreserved(ints: List) -> bool:
    return len(ints) == last(
        accumulate(add, [b for a, b in runLengthEncode(ints)]))
コード例 #32
0
    def split(self, X, y=None):
        """Iterate tuples of data split into training and test sets.

        Parameters
        ----------
        X : dask object
            Training data. May be a ``da.Array``, ``db.Bag``, or
            ``dklearn.Matrix``.

        y : dask object, optional
            The target variable for supervised learning problems.

        Yields
        -------
        X_train, y_train, X_test, y_test : dask objects
            The split training and testing data, returned as the same type as
            the input. If y is not provided, ``y_train`` and ``y_test`` will be
            ``None``.
        """
        if self.n_folds < 2:
            raise ValueError("n_folds must be >= 2")
        X, y = check_X_y(X, y)
        if isinstance(X, da.Array):
            n = len(X)
            if n < self.n_folds:
                raise ValueError("n_folds must be <= n_samples")
        elif isinstance(X, (dm.Matrix, db.Bag)):
            n = X.npartitions
            if n < self.n_folds:
                raise ValueError("n_folds must be <= npartitions for Bag or "
                                 "Matrix objects")
        else:
            raise TypeError("Expected an instance of ``da.Array``, "
                            "``db.Bag``, or ``dm.Matrix`` - got "
                            "{0}".format(type(X).__name__))
        fold_sizes = (n // self.n_folds) * np.ones(self.n_folds, dtype=np.int)
        fold_sizes[:n % self.n_folds] += 1
        folds = list(sliding_window(2, accumulate(add, fold_sizes, 0)))
        if isinstance(X, da.Array):
            x_parts = [X[start:stop] for start, stop in folds]
            if y is not None:
                y_parts = [y[start:stop] for start, stop in folds]
            for i in range(len(x_parts)):
                X_train = da.concatenate(x_parts[:i] + x_parts[i + 1:])
                X_test = x_parts[i]
                if y is not None:
                    y_train = da.concatenate(y_parts[:i] + y_parts[i + 1:])
                    y_test = y_parts[i]
                else:
                    y_train = y_test = None
                yield X_train, y_train, X_test, y_test
        else:
            parts = list(range(n))
            for start, stop in folds:
                test = parts[start:stop]
                train = parts[:start] + parts[stop:]
                X_train = _part_split(X, train, 'X_train')
                X_test = _part_split(X, test, 'X_test')
                if y is not None:
                    y_train = _part_split(y, train, 'y_train')
                    y_test = _part_split(y, test, 'y_test')
                else:
                    y_train = y_test = None
                yield X_train, y_train, X_test, y_test
コード例 #33
0
def gradient_descent3(f, df, x):
    return accumulate(
        lambda fx, _: min(
            (partial(gradient_step, df, -alpha)(fx)
             for alpha in [100, 10, 1, 0.7, 0.01, 0.001, 0.0001, 0.00001]),
            key=safe(f)), repeat(x))
コード例 #34
0
ファイル: slicing.py プロジェクト: ankravch/dask
def _slice_1d(dim_shape, lengths, index):
    """Returns a dict of {blocknum: slice}

    This function figures out where each slice should start in each
    block for a single dimension. If the slice won't return any elements
    in the block, that block will not be in the output.

    Parameters
    ----------

    dim_shape - the number of elements in this dimension.
      This should be a positive, non-zero integer
    blocksize - the number of elements per block in this dimension
      This should be a positive, non-zero integer
    index - a description of the elements in this dimension that we want
      This might be an integer, a slice(), or an Ellipsis

    Returns
    -------

    dictionary where the keys are the integer index of the blocks that
      should be sliced and the values are the slices

    Examples
    --------

    100 length array cut into length 20 pieces, slice 0:35

    >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(0, 35))
    {0: slice(None, None, None), 1: slice(0, 15, 1)}

    Support irregular blocks and various slices

    >>> _slice_1d(100, [20, 10, 10, 10, 25, 25], slice(10, 35))
    {0: slice(10, 20, 1), 1: slice(None, None, None), 2: slice(0, 5, 1)}

    Support step sizes

    >>> _slice_1d(100, [15, 14, 13], slice(10, 41, 3))
    {0: slice(10, 15, 3), 1: slice(1, 14, 3), 2: slice(2, 12, 3)}

    >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(0, 100, 40))  # step > blocksize
    {0: slice(0, 20, 40), 2: slice(0, 20, 40), 4: slice(0, 20, 40)}

    Also support indexing single elements

    >>> _slice_1d(100, [20, 20, 20, 20, 20], 25)
    {1: 5}

    And negative slicing

    >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(100, 0, -3))
    {0: slice(-2, -20, -3), 1: slice(-1, -21, -3), 2: slice(-3, -21, -3), 3: slice(-2, -21, -3), 4: slice(-1, -21, -3)}

    >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(100, 12, -3))
    {0: slice(-2, -8, -3), 1: slice(-1, -21, -3), 2: slice(-3, -21, -3), 3: slice(-2, -21, -3), 4: slice(-1, -21, -3)}

    >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(100, -12, -3))
    {4: slice(-1, -12, -3)}
    """
    if isinstance(index, (int, long)):
        i = 0
        ind = index
        lens = list(lengths)
        while ind >= lens[0]:
            i += 1
            ind -= lens.pop(0)
        return {i: ind}

    assert isinstance(index, slice)

    step = index.step or 1
    if step > 0:
        start = index.start or 0
        stop = index.stop if index.stop is not None else dim_shape
    else:
        start = index.start or dim_shape - 1
        start = dim_shape - 1 if start >= dim_shape else start
        stop = -(dim_shape + 1) if index.stop is None else index.stop

    # posify start and stop
    if start < 0:
        start += dim_shape
    if stop < 0:
        stop += dim_shape

    d = dict()
    if step > 0:
        for i, length in enumerate(lengths):
            if start < length and stop > 0:
                d[i] = slice(start, min(stop, length), step)
                start = (start - length) % step
            else:
                start = start - length
            stop -= length
    else:
        rstart = start  # running start
        chunk_boundaries = list(accumulate(add, lengths))
        for i, chunk_stop in reversed(list(enumerate(chunk_boundaries))):
            # create a chunk start and stop
            if i == 0:
                chunk_start = 0
            else:
                chunk_start = chunk_boundaries[i - 1]

            # if our slice is in this chunk
            if (chunk_start <= rstart < chunk_stop) and (rstart > stop):
                d[i] = slice(rstart - chunk_stop,
                             max(chunk_start - chunk_stop - 1,
                                 stop - chunk_stop),
                             step)

                # compute the next running start point,
                offset = (rstart - (chunk_start - 1)) % step
                rstart = chunk_start + offset - 1

    # replace 0:20:1 with : if appropriate
    for k, v in d.items():
        if v == slice(0, lengths[k], 1):
            d[k] = slice(None, None, None)

    if not d:  # special case x[:0]
        d[0] = slice(0, 0, 1)

    return d
コード例 #35
0
def list_accumulator(itr: Iterable) -> Iterable[List]:
    # list(list_accumulator([1, 2, 3]) -> [[1], [1, 2], [1, 2, 3]]
    return filter(
        None, accumulate(lambda a, b: make_list(a) + make_list(b), itr, []))