def _cumsum(seq, initial_zero): if isinstance(seq, _HashIdWrapper): seq = seq.wrapped if initial_zero: return tuple(accumulate(add, seq, 0)) else: return tuple(accumulate(add, seq))
def accumulate_part(binop, seq, initial, is_first=False): if initial == no_default: res = list(accumulate(binop, seq)) else: res = list(accumulate(binop, seq, initial=initial)) if is_first: return res, res[-1] if res else [], initial return res[1:], res[-1]
def _get_data(clauses, values, keys): def get_query(query): return frappe.db.sql(query.format(**clauses), values=values, as_dict=1) get_opening = compose(lambda x: x.opening, first, get_query) opening = get_opening(""" SELECT SUM(loyalty_points) AS opening FROM `tabLoyalty Point Entry` WHERE {opening_clause} """) rows = get_query(""" SELECT posting_date, sales_invoice, os_custom_loyalty_entry AS custom_loyalty_entry, loyalty_points AS points FROM `tabLoyalty Point Entry` WHERE {period_clause} ORDER BY posting_date """) def set_voucher_ref(row): if row.get("sales_invoice"): return merge( row, { "voucher_type": "Sales Invoice", "voucher_no": row.get("sales_invoice"), }, ) if row.get("custom_loyalty_entry"): return merge( row, { "voucher_type": "Custom Loyalty Entry", "voucher_no": row.get("custom_loyalty_entry"), }, ) return row def set_balance(a, row): return merge(row, {"balance": a.get("balance") + row.get("points")}) make_list = compose(list, concatv) return make_list( accumulate( set_balance, [set_voucher_ref(x) for x in rows], initial={ "voucher_no": "Opening", "balance": opening }, ), [{ "voucher_no": "Total", "points": sum([x.points for x in rows]) }], )
def cumdims_label(chunks, const): """ Interal utility for cumulative sum with label. >>> cumdims_label(((5, 3, 3), (2, 2, 1)), 'n') # doctest: +NORMALIZE_WHITESPACE [(('n', 0), ('n', 5), ('n', 8), ('n', 11)), (('n', 0), ('n', 2), ('n', 4), ('n', 5))] """ return [tuple(zip((const,) * (1 + len(bds)), list(accumulate(add, (0,) + bds)))) for bds in chunks]
def gradient_descent3(f, df, x): return accumulate( lambda fx, _: min( (partial(gradient_step, df, -alpha)(fx) for alpha in [100, 10, 1, 0.7, 0.01, 0.001, 0.0001, 0.00001]), key=safe(f), ), repeat(x), )
def arg_reduction(x, chunk, combine, agg, axis=None, split_every=None, out=None): """Generic function for argreduction. Parameters ---------- x : Array chunk : callable Partialed ``arg_chunk``. combine : callable Partialed ``arg_combine``. agg : callable Partialed ``arg_agg``. axis : int, optional split_every : int or dict, optional """ if axis is None: axis = tuple(range(x.ndim)) ravel = True elif isinstance(axis, int): if axis < 0: axis += x.ndim if axis < 0 or axis >= x.ndim: raise ValueError("axis entry is out of bounds") axis = (axis, ) ravel = x.ndim == 1 else: raise TypeError("axis must be either `None` or int, " "got '{0}'".format(axis)) # Map chunk across all blocks name = 'arg-reduce-chunk-{0}'.format(tokenize(chunk, axis)) old = x.name keys = list(product(*map(range, x.numblocks))) offsets = list( product(*(accumulate(operator.add, bd[:-1], 0) for bd in x.chunks))) if ravel: offset_info = zip(offsets, repeat(x.shape)) else: offset_info = pluck(axis[0], offsets) chunks = tuple( (1, ) * len(c) if i in axis else c for (i, c) in enumerate(x.chunks)) dsk = dict(((name, ) + k, (chunk, (old, ) + k, axis, off)) for (k, off) in zip(keys, offset_info)) # The dtype of `tmp` doesn't actually matter, just need to provide something tmp = Array(sharedict.merge(x.dask, (name, dsk)), name, chunks, dtype=x.dtype) dtype = np.argmin([1]).dtype result = _tree_reduce(tmp, agg, axis, False, dtype, split_every, combine) return handle_out(out, result)
def arg_reduction(x, chunk, combine, agg, axis=None, split_every=None, out=None): """ Generic function for argreduction. Parameters ---------- x : Array chunk : callable Partialed ``arg_chunk``. combine : callable Partialed ``arg_combine``. agg : callable Partialed ``arg_agg``. axis : int, optional split_every : int or dict, optional """ if axis is None: axis = tuple(range(x.ndim)) ravel = True elif isinstance(axis, Integral): axis = validate_axis(axis, x.ndim) axis = (axis,) ravel = x.ndim == 1 else: raise TypeError("axis must be either `None` or int, " "got '{0}'".format(axis)) for ax in axis: chunks = x.chunks[ax] if len(chunks) > 1 and np.isnan(chunks).any(): raise ValueError( "Arg-reductions do not work with arrays that have " "unknown chunksizes. At some point in your computation " "this array lost chunking information" ) # Map chunk across all blocks name = 'arg-reduce-{0}'.format(tokenize(axis, x, chunk, combine, split_every)) old = x.name keys = list(product(*map(range, x.numblocks))) offsets = list(product(*(accumulate(operator.add, bd[:-1], 0) for bd in x.chunks))) if ravel: offset_info = zip(offsets, repeat(x.shape)) else: offset_info = pluck(axis[0], offsets) chunks = tuple((1, ) * len(c) if i in axis else c for (i, c) in enumerate(x.chunks)) dsk = dict(((name,) + k, (chunk, (old,) + k, axis, off)) for (k, off) in zip(keys, offset_info)) # The dtype of `tmp` doesn't actually matter, just need to provide something graph = HighLevelGraph.from_collections(name, dsk, dependencies=[x]) tmp = Array(graph, name, chunks, dtype=x.dtype) dtype = np.argmin([1]).dtype result = _tree_reduce(tmp, agg, axis, False, dtype, split_every, combine) return handle_out(out, result)
def arg_reduction(x, chunk, combine, agg, axis=None, split_every=None, out=None): """ Generic function for argreduction. Parameters ---------- x : Array chunk : callable Partialed ``arg_chunk``. combine : callable Partialed ``arg_combine``. agg : callable Partialed ``arg_agg``. axis : int, optional split_every : int or dict, optional """ if axis is None: axis = tuple(range(x.ndim)) ravel = True elif isinstance(axis, Integral): axis = validate_axis(axis, x.ndim) axis = (axis,) ravel = x.ndim == 1 else: raise TypeError("axis must be either `None` or int, got '{0}'".format(axis)) for ax in axis: chunks = x.chunks[ax] if len(chunks) > 1 and np.isnan(chunks).any(): raise ValueError( "Arg-reductions do not work with arrays that have " "unknown chunksizes. At some point in your computation " "this array lost chunking information.\n\n" "A possible solution is with \n" " x.compute_chunk_sizes()" ) # Map chunk across all blocks name = "arg-reduce-{0}".format(tokenize(axis, x, chunk, combine, split_every)) old = x.name keys = list(product(*map(range, x.numblocks))) offsets = list(product(*(accumulate(operator.add, bd[:-1], 0) for bd in x.chunks))) if ravel: offset_info = zip(offsets, repeat(x.shape)) else: offset_info = pluck(axis[0], offsets) chunks = tuple((1,) * len(c) if i in axis else c for (i, c) in enumerate(x.chunks)) dsk = dict( ((name,) + k, (chunk, (old,) + k, axis, off)) for (k, off) in zip(keys, offset_info) ) # The dtype of `tmp` doesn't actually matter, just need to provide something graph = HighLevelGraph.from_collections(name, dsk, dependencies=[x]) tmp = Array(graph, name, chunks, dtype=x.dtype) dtype = np.argmin([1]).dtype result = _tree_reduce(tmp, agg, axis, False, dtype, split_every, combine) return handle_out(out, result)
def cum_posterior(prior, data, samples): """ Returns list of all posteriors based on prior, data and samples. See posterior """ return list( toolz.accumulate(lambda prior, sample: prior.update(data, sample), samples, prior))
def cumdims_label(chunks, const): """ Interal utility for cumulative sum with label. >>> cumdims_label(((5, 3, 3), (2, 2, 1)), 'n') # doctest: +NORMALIZE_WHITESPACE [(('n', 0), ('n', 5), ('n', 8), ('n', 11)), (('n', 0), ('n', 2), ('n', 4), ('n', 5))] """ return [tuple(zip((const,) * (1 + len(bds)), list(accumulate(add, (0,) + bds)))) for bds in chunks ]
def until_nearly_convergence(convf, it, tolerance=0.0001): ''' Test for absolute convergence Parameters it: Lazy sequence of values tolerance: Convergence criteria Returns Continues to add to the sequence of current values if tolerence is not satisfied Othewise it terminates iteration and returns the sequence of values ''' # The order of arguments for toolz.accumulate is opposite to # Python 3 itertools.accumulate return accumulate(partial(convf, tolerance), it)
def sgd(df, X, y, theta_0, eta=0.1): """ Parameters df: Gradient of function f X: Matrix of features y: vector of observations theta0: Initial guess, theta ia a j dimensional vector ([theta_01, theta_02,...,theta0_0j]) eta: Learning rate Returns Generator sequence of [theta_k1, theta_k2,...,theta_kj] where k = 0 to ... """ xys = chain([theta_0], in_random_order(zip(X, y))) return accumulate(partial(sgd_step, df, eta), xys)
def sgd(df, X, y, theta_0, eta=0.1): ''' Parameters df: Gradient of function f X: Matrix of features y: vector of observations theta0: Initial guess, theta ia a j dimensional vector ([theta_01, theta_02,...,theta0_0j]) eta: Learning rate Returns Generator sequence of [theta_k1, theta_k2,...,theta_kj] where k = 0 to ... ''' xys = chain([theta_0], in_random_order(zip(X, y))) return accumulate(partial(sgd_step, df, eta), xys)
def arg_reduction(x, chunk, combine, agg, axis=None, split_every=None, out=None): """ Generic function for argreduction. Parameters ---------- x : Array chunk : callable Partialed ``arg_chunk``. combine : callable Partialed ``arg_combine``. agg : callable Partialed ``arg_agg``. axis : int, optional split_every : int or dict, optional """ if axis is None: axis = tuple(range(x.ndim)) ravel = True elif isinstance(axis, int): if axis < 0: axis += x.ndim if axis < 0 or axis >= x.ndim: raise ValueError("axis entry is out of bounds") axis = (axis,) ravel = x.ndim == 1 else: raise TypeError("axis must be either `None` or int, " "got '{0}'".format(axis)) # Map chunk across all blocks name = 'arg-reduce-chunk-{0}'.format(tokenize(chunk, axis)) old = x.name keys = list(product(*map(range, x.numblocks))) offsets = list(product(*(accumulate(operator.add, bd[:-1], 0) for bd in x.chunks))) if ravel: offset_info = zip(offsets, repeat(x.shape)) else: offset_info = pluck(axis[0], offsets) chunks = tuple((1, ) * len(c) if i in axis else c for (i, c) in enumerate(x.chunks)) dsk = dict(((name,) + k, (chunk, (old,) + k, axis, off)) for (k, off) in zip(keys, offset_info)) # The dtype of `tmp` doesn't actually matter, just need to provide something tmp = Array(sharedict.merge(x.dask, (name, dsk)), name, chunks, dtype=x.dtype) dtype = np.argmin([1]).dtype result = _tree_reduce(tmp, agg, axis, False, dtype, split_every, combine) return handle_out(out, result)
def fromfunction(func, chunks='auto', shape=None, dtype=None, **kwargs): chunks = normalize_chunks(chunks, shape) name = 'fromfunction-' + tokenize(func, chunks, shape, dtype, kwargs) keys = list(product([name], *[range(len(bd)) for bd in chunks])) aggdims = [list(accumulate(add, (0,) + bd[:-1])) for bd in chunks] offsets = list(product(*aggdims)) shapes = list(product(*chunks)) dtype = dtype or float values = [(_np_fromfunction, func, shp, dtype, offset, kwargs) for offset, shp in zip(offsets, shapes)] dsk = dict(zip(keys, values)) return Array(dsk, name, chunks, dtype=dtype)
def fromfunction(func, chunks=None, shape=None, dtype=None): if chunks: chunks = normalize_chunks(chunks, shape) name = 'fromfunction-' + tokenize(func, chunks, shape, dtype) keys = list(product([name], *[range(len(bd)) for bd in chunks])) aggdims = [list(accumulate(add, (0, ) + bd[:-1])) for bd in chunks] offsets = list(product(*aggdims)) shapes = list(product(*chunks)) values = [(np.fromfunction, offset_func(func, offset), shp) for offset, shp in zip(offsets, shapes)] dsk = dict(zip(keys, values)) return Array(dsk, name, chunks, dtype=dtype)
def fromfunction(func, chunks="auto", shape=None, dtype=None, **kwargs): chunks = normalize_chunks(chunks, shape, dtype=dtype) name = "fromfunction-" + tokenize(func, chunks, shape, dtype, kwargs) keys = list(product([name], *[range(len(bd)) for bd in chunks])) aggdims = [list(accumulate(add, (0, ) + bd[:-1])) for bd in chunks] offsets = list(product(*aggdims)) shapes = list(product(*chunks)) dtype = dtype or float values = [(_np_fromfunction, func, shp, dtype, offset, kwargs) for offset, shp in zip(offsets, shapes)] dsk = dict(zip(keys, values)) return Array(dsk, name, chunks, dtype=dtype)
def fromfunction(func, chunks=None, shape=None, dtype=None): if chunks: chunks = normalize_chunks(chunks, shape) name = 'fromfunction-' + tokenize(func, chunks, shape, dtype) keys = list(product([name], *[range(len(bd)) for bd in chunks])) aggdims = [list(accumulate(add, (0,) + bd[:-1])) for bd in chunks] offsets = list(product(*aggdims)) shapes = list(product(*chunks)) values = [(np.fromfunction, offset_func(func, offset), shp) for offset, shp in zip(offsets, shapes)] dsk = dict(zip(keys, values)) return Array(dsk, name, chunks, dtype=dtype)
def keys_to_flush(lengths, fraction=0.1, maxcount=100000): """ Which keys to remove >>> lengths = {'a': 20, 'b': 10, 'c': 15, 'd': 15, ... 'e': 10, 'f': 25, 'g': 5} >>> keys_to_flush(lengths, 0.5) ['f', 'a'] """ top = topk(max(len(lengths) // 2, 1), lengths.items(), key=1) total = sum(lengths.values()) cutoff = min( maxcount, max(1, bisect(list(accumulate(add, pluck(1, top))), total * fraction))) result = [k for k, v in top[:cutoff]] assert result return result
def keys_to_flush(lengths, fraction=0.1, maxcount=100000): """ Which keys to remove >>> lengths = {'a': 20, 'b': 10, 'c': 15, 'd': 15, ... 'e': 10, 'f': 25, 'g': 5} >>> keys_to_flush(lengths, 0.5) ['f', 'a'] """ top = topk(max(len(lengths) // 2, 1), lengths.items(), key=1) total = sum(lengths.values()) cutoff = min(maxcount, max(1, bisect(list(accumulate(add, pluck(1, top))), total * fraction))) result = [k for k, v in top[:cutoff]] assert result return result
def optimalPath(threes: [[int]]) -> ([Step], [Step]): forwardPriceToA = threes[0][0] crossPriceToA = threes[0][1] + threes[0][2] forwardPriceToB = threes[0][1] crossPriceToB = threes[0][0] + threes[0][2] newPathToA = [Step("A", forwardPriceToA) ] if forwardPriceToA <= crossPriceToA else [ Step("B", forwardPriceToB), Step("C", threes[0][2]) ] newPathToB = [Step("B", forwardPriceToB) ] if forwardPriceToB <= crossPriceToB else [ Step("A", forwardPriceToA), Step("C", threes[0][2]) ] accumulator = (pvector(newPathToA), pvector(newPathToB)) newThrees = drop(1, threes) return accumulate(roadStep, newThrees, accumulator)
def count_chars(self, trans_text): """ text should only be a list[list[str]] """ mins = 5000 maxs = 20000 steps = 1 cutoffs = [] for r in enumerate(toolz.accumulate(add, toolz.map(len, trans_text))): if r[1] >= mins * steps and r[1] < maxs * steps: cutoffs.append(r[0]) steps = steps + 1 elif r[1] >= mins * steps and r[1] >= maxs * steps: if steps > 1: cutoffs.append(r[0] - 1) steps = steps + 1 else: cutoffs.append(r[0]) print(f"single string element too long! (l: {r[1]})") cutoffs.append(len(trans_text)) print( f"make_batch for {sum(list(map(len, trans_text)))} chars in {len(trans_text)} seqs | calculated cutoffs: {cutoffs}" ) return cutoffs
def _slice_1d(dim_shape, lengths, index): """Returns a dict of {blocknum: slice} This function figures out where each slice should start in each block for a single dimension. If the slice won't return any elements in the block, that block will not be in the output. Parameters ---------- dim_shape - the number of elements in this dimension. This should be a positive, non-zero integer blocksize - the number of elements per block in this dimension This should be a positive, non-zero integer index - a description of the elements in this dimension that we want This might be an integer, a slice(), or an Ellipsis Returns ------- dictionary where the keys are the integer index of the blocks that should be sliced and the values are the slices Examples -------- 100 length array cut into length 20 pieces, slice 0:35 >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(0, 35)) {0: slice(None, None, None), 1: slice(0, 15, 1)} Support irregular blocks and various slices >>> _slice_1d(100, [20, 10, 10, 10, 25, 25], slice(10, 35)) {0: slice(10, 20, 1), 1: slice(None, None, None), 2: slice(0, 5, 1)} Support step sizes >>> _slice_1d(100, [15, 14, 13], slice(10, 41, 3)) {0: slice(10, 15, 3), 1: slice(1, 14, 3), 2: slice(2, 12, 3)} >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(0, 100, 40)) # step > blocksize {0: slice(0, 20, 40), 2: slice(0, 20, 40), 4: slice(0, 20, 40)} Also support indexing single elements >>> _slice_1d(100, [20, 20, 20, 20, 20], 25) {1: 5} And negative slicing >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(100, 0, -3)) {0: slice(-2, -20, -3), 1: slice(-1, -21, -3), 2: slice(-3, -21, -3), 3: slice(-2, -21, -3), 4: slice(-1, -21, -3)} >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(100, 12, -3)) {0: slice(-2, -8, -3), 1: slice(-1, -21, -3), 2: slice(-3, -21, -3), 3: slice(-2, -21, -3), 4: slice(-1, -21, -3)} >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(100, -12, -3)) {4: slice(-1, -12, -3)} """ if isinstance(index, (int, long)): i = 0 ind = index lens = list(lengths) while ind >= lens[0]: i += 1 ind -= lens.pop(0) return {i: ind} assert isinstance(index, slice) step = index.step or 1 if step > 0: start = index.start or 0 stop = index.stop if index.stop is not None else dim_shape else: start = index.start or dim_shape - 1 start = dim_shape - 1 if start >= dim_shape else start stop = -(dim_shape + 1) if index.stop is None else index.stop # posify start and stop if start < 0: start += dim_shape if stop < 0: stop += dim_shape d = dict() if step > 0: for i, length in enumerate(lengths): if start < length and stop > 0: d[i] = slice(start, min(stop, length), step) start = (start - length) % step else: start = start - length stop -= length else: rstart = start # running start chunk_boundaries = list(accumulate(add, lengths)) for i, chunk_stop in reversed(list(enumerate(chunk_boundaries))): # create a chunk start and stop if i == 0: chunk_start = 0 else: chunk_start = chunk_boundaries[i - 1] # if our slice is in this chunk if (chunk_start <= rstart < chunk_stop) and (rstart > stop): d[i] = slice( rstart - chunk_stop, max(chunk_start - chunk_stop - 1, stop - chunk_stop), step) # compute the next running start point, offset = (rstart - (chunk_start - 1)) % step rstart = chunk_start + offset - 1 # replace 0:20:1 with : if appropriate for k, v in d.items(): if v == slice(0, lengths[k], 1): d[k] = slice(None, None, None) if not d: # special case x[:0] d[0] = slice(0, 0, 1) return d
def discount(self, rs, discount_rate): discounted = accumulate(lambda prev, curr: discount_rate * prev + curr, reversed(rs)) return np.fromiter(discounted, 'float')[::-1]
def plot_cache(results, dsk, start_time, metric_name, palette='GnBu', label_size=60, **kwargs): """Visualize the results of profiling in a bokeh plot. Parameters ---------- results : sequence Output of CacheProfiler.results dsk : dict The dask graph being profiled. start_time : float Start time of the profile. metric_name : string Metric used to measure cache size palette : string, optional Name of the bokeh palette to use, must be key in bokeh.palettes.brewer. label_size: int (optional) Maximum size of output labels in plot, defaults to 60 **kwargs Other keyword arguments, passed to bokeh.figure. These will override all defaults set by visualize. Returns ------- The completed bokeh plot object. """ defaults = dict(title="Profile Results", tools="hover,save,reset,resize,wheel_zoom,xpan", plot_width=800, plot_height=300) defaults.update((k, v) for (k, v) in kwargs.items() if k in bp.Figure.properties()) if results: starts, ends = list(zip(*results))[3:] tics = list(sorted(unique(starts + ends))) groups = groupby(lambda d: pprint_task(d[1], dsk, label_size), results) data = {} for k, vals in groups.items(): cnts = dict.fromkeys(tics, 0) for v in vals: cnts[v.cache_time] += v.metric cnts[v.free_time] -= v.metric data[k] = list(accumulate(add, pluck(1, sorted(cnts.items())))) tics = [i - start_time for i in tics] p = bp.figure(x_range=[0, max(tics)], **defaults) for (key, val), color in zip(data.items(), get_colors(palette, data.keys())): p.line('x', 'y', line_color=color, line_width=3, source=bp.ColumnDataSource({'x': tics, 'y': val, 'label': [key for i in val]})) else: p = bp.figure(y_range=[0, 10], x_range=[0, 10], **defaults) p.grid.grid_line_color = None p.axis.axis_line_color = None p.axis.major_tick_line_color = None p.yaxis.axis_label = "Cache Size ({0})".format(metric_name) p.xaxis.axis_label = "Time (s)" hover = p.select(HoverTool) hover.tooltips = """ <div> <span style="font-size: 14px; font-weight: bold;">Task:</span> <span style="font-size: 10px; font-family: Monaco, monospace;">@label</span> </div> """ return p
def _slice_1d(dim_shape, lengths, index): """Returns a dict of {blocknum: slice} This function figures out where each slice should start in each block for a single dimension. If the slice won't return any elements in the block, that block will not be in the output. Parameters ---------- dim_shape - the number of elements in this dimension. This should be a positive, non-zero integer blocksize - the number of elements per block in this dimension This should be a positive, non-zero integer index - a description of the elements in this dimension that we want This might be an integer, a slice(), or an Ellipsis Returns ------- dictionary where the keys are the integer index of the blocks that should be sliced and the values are the slices Examples -------- 100 length array cut into length 20 pieces, slice 0:35 >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(0, 35)) {0: slice(None, None, None), 1: slice(0, 15, 1)} Support irregular blocks and various slices >>> _slice_1d(100, [20, 10, 10, 10, 25, 25], slice(10, 35)) {0: slice(10, 20, 1), 1: slice(None, None, None), 2: slice(0, 5, 1)} Support step sizes >>> _slice_1d(100, [15, 14, 13], slice(10, 41, 3)) {0: slice(10, 15, 3), 1: slice(1, 14, 3), 2: slice(2, 12, 3)} >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(0, 100, 40)) # step > blocksize {0: slice(0, 20, 40), 2: slice(0, 20, 40), 4: slice(0, 20, 40)} Also support indexing single elements >>> _slice_1d(100, [20, 20, 20, 20, 20], 25) {1: 5} And negative slicing >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(100, 0, -3)) {0: slice(-2, -20, -3), 1: slice(-1, -21, -3), 2: slice(-3, -21, -3), 3: slice(-2, -21, -3), 4: slice(-1, -21, -3)} >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(100, 12, -3)) {0: slice(-2, -8, -3), 1: slice(-1, -21, -3), 2: slice(-3, -21, -3), 3: slice(-2, -21, -3), 4: slice(-1, -21, -3)} >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(100, -12, -3)) {4: slice(-1, -12, -3)} """ if isinstance(index, (int, long)): i = 0 ind = index lens = list(lengths) while ind >= lens[0]: i += 1 ind -= lens.pop(0) return {i: ind} assert isinstance(index, slice) step = index.step or 1 if step > 0: start = index.start or 0 stop = index.stop if index.stop is not None else dim_shape else: start = index.start or dim_shape - 1 start = dim_shape - 1 if start >= dim_shape else start stop = -(dim_shape + 1) if index.stop is None else index.stop if start < 0: start += dim_shape if stop < 0: stop += dim_shape d = dict() if step > 0: for i, length in enumerate(lengths): if start < length and stop > 0: d[i] = slice(start, min(stop, length), step) start = (start - length) % step else: start = start - length stop -= length else: stop -= dim_shape tail_index = list(accumulate(add, lengths)) pos_step = abs(step) # 11%3==2, 11%-3==-1. Need positive step for % offset = 0 for i, length in zip(range(len(lengths)-1, -1, -1), reversed(lengths)): if start + length >= tail_index[i] and stop < 0: d[i] = slice(start - tail_index[i], max(stop, -length - 1), step) # The offset accumulates over time from the start point offset = (offset + pos_step - (length % pos_step)) % pos_step start = tail_index[i] - 1 - length - offset stop += length # replace 0:20:1 with : if appropriate for k, v in d.items(): if v == slice(0, lengths[k], 1): d[k] = slice(None, None, None) return d
def _slice_1d(dim_shape, lengths, index): """Returns a dict of {blocknum: slice} This function figures out where each slice should start in each block for a single dimension. If the slice won't return any elements in the block, that block will not be in the output. Parameters ---------- dim_shape - the number of elements in this dimension. This should be a positive, non-zero integer blocksize - the number of elements per block in this dimension This should be a positive, non-zero integer index - a description of the elements in this dimension that we want This might be an integer, a slice(), or an Ellipsis Returns ------- dictionary where the keys are the integer index of the blocks that should be sliced and the values are the slices Examples -------- 100 length array cut into length 20 pieces, slice 0:35 >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(0, 35)) {0: slice(None, None, None), 1: slice(0, 15, 1)} Support irregular blocks and various slices >>> _slice_1d(100, [20, 10, 10, 10, 25, 25], slice(10, 35)) {0: slice(10, 20, 1), 1: slice(None, None, None), 2: slice(0, 5, 1)} Support step sizes >>> _slice_1d(100, [15, 14, 13], slice(10, 41, 3)) {0: slice(10, 15, 3), 1: slice(1, 14, 3), 2: slice(2, 12, 3)} >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(0, 100, 40)) # step > blocksize {0: slice(0, 20, 40), 2: slice(0, 20, 40), 4: slice(0, 20, 40)} Also support indexing single elements >>> _slice_1d(100, [20, 20, 20, 20, 20], 25) {1: 5} And negative slicing >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(100, 0, -3)) {0: slice(-2, -20, -3), 1: slice(-1, -21, -3), 2: slice(-3, -21, -3), 3: slice(-2, -21, -3), 4: slice(-1, -21, -3)} >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(100, 12, -3)) {0: slice(-2, -8, -3), 1: slice(-1, -21, -3), 2: slice(-3, -21, -3), 3: slice(-2, -21, -3), 4: slice(-1, -21, -3)} >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(100, -12, -3)) {4: slice(-1, -12, -3)} """ if isinstance(index, (int, long)): i = 0 ind = index lens = list(lengths) while ind >= lens[0]: i += 1 ind -= lens.pop(0) return {i: ind} assert isinstance(index, slice) step = index.step or 1 if step > 0: start = index.start or 0 stop = index.stop if index.stop is not None else dim_shape else: start = index.start or dim_shape - 1 start = dim_shape - 1 if start >= dim_shape else start stop = -(dim_shape + 1) if index.stop is None else index.stop if start < 0: start += dim_shape if stop < 0: stop += dim_shape d = dict() if step > 0: for i, length in enumerate(lengths): if start < length and stop > 0: d[i] = slice(start, min(stop, length), step) start = (start - length) % step else: start = start - length stop -= length else: stop -= dim_shape tail_index = list(accumulate(add, lengths)) pos_step = abs(step) # 11%3==2, 11%-3==-1. Need positive step for % offset = 0 for i, length in zip(range(len(lengths) - 1, -1, -1), reversed(lengths)): if start + length >= tail_index[i] and stop < 0: d[i] = slice(start - tail_index[i], max(stop, -length - 1), step) # The offset accumulates over time from the start point offset = (offset + pos_step - (length % pos_step)) % pos_step start = tail_index[i] - 1 - length - offset stop += length # replace 0:20:1 with : if appropriate for k, v in d.items(): if v == slice(0, lengths[k], 1): d[k] = slice(None, None, None) return d
def sumDigits(ints: PVector[int]) -> int: return last(accumulate(add, concat(map(lambda c: toDigits(c), ints))))
def rlePropLengthPreserved(ints: List) -> bool: return len(ints) == last( accumulate(add, [b for a, b in runLengthEncode(ints)]))
def split(self, X, y=None): """Iterate tuples of data split into training and test sets. Parameters ---------- X : dask object Training data. May be a ``da.Array``, ``db.Bag``, or ``dklearn.Matrix``. y : dask object, optional The target variable for supervised learning problems. Yields ------- X_train, y_train, X_test, y_test : dask objects The split training and testing data, returned as the same type as the input. If y is not provided, ``y_train`` and ``y_test`` will be ``None``. """ if self.n_folds < 2: raise ValueError("n_folds must be >= 2") X, y = check_X_y(X, y) if isinstance(X, da.Array): n = len(X) if n < self.n_folds: raise ValueError("n_folds must be <= n_samples") elif isinstance(X, (dm.Matrix, db.Bag)): n = X.npartitions if n < self.n_folds: raise ValueError("n_folds must be <= npartitions for Bag or " "Matrix objects") else: raise TypeError("Expected an instance of ``da.Array``, " "``db.Bag``, or ``dm.Matrix`` - got " "{0}".format(type(X).__name__)) fold_sizes = (n // self.n_folds) * np.ones(self.n_folds, dtype=np.int) fold_sizes[:n % self.n_folds] += 1 folds = list(sliding_window(2, accumulate(add, fold_sizes, 0))) if isinstance(X, da.Array): x_parts = [X[start:stop] for start, stop in folds] if y is not None: y_parts = [y[start:stop] for start, stop in folds] for i in range(len(x_parts)): X_train = da.concatenate(x_parts[:i] + x_parts[i + 1:]) X_test = x_parts[i] if y is not None: y_train = da.concatenate(y_parts[:i] + y_parts[i + 1:]) y_test = y_parts[i] else: y_train = y_test = None yield X_train, y_train, X_test, y_test else: parts = list(range(n)) for start, stop in folds: test = parts[start:stop] train = parts[:start] + parts[stop:] X_train = _part_split(X, train, 'X_train') X_test = _part_split(X, test, 'X_test') if y is not None: y_train = _part_split(y, train, 'y_train') y_test = _part_split(y, test, 'y_test') else: y_train = y_test = None yield X_train, y_train, X_test, y_test
def gradient_descent3(f, df, x): return accumulate( lambda fx, _: min( (partial(gradient_step, df, -alpha)(fx) for alpha in [100, 10, 1, 0.7, 0.01, 0.001, 0.0001, 0.00001]), key=safe(f)), repeat(x))
def _slice_1d(dim_shape, lengths, index): """Returns a dict of {blocknum: slice} This function figures out where each slice should start in each block for a single dimension. If the slice won't return any elements in the block, that block will not be in the output. Parameters ---------- dim_shape - the number of elements in this dimension. This should be a positive, non-zero integer blocksize - the number of elements per block in this dimension This should be a positive, non-zero integer index - a description of the elements in this dimension that we want This might be an integer, a slice(), or an Ellipsis Returns ------- dictionary where the keys are the integer index of the blocks that should be sliced and the values are the slices Examples -------- 100 length array cut into length 20 pieces, slice 0:35 >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(0, 35)) {0: slice(None, None, None), 1: slice(0, 15, 1)} Support irregular blocks and various slices >>> _slice_1d(100, [20, 10, 10, 10, 25, 25], slice(10, 35)) {0: slice(10, 20, 1), 1: slice(None, None, None), 2: slice(0, 5, 1)} Support step sizes >>> _slice_1d(100, [15, 14, 13], slice(10, 41, 3)) {0: slice(10, 15, 3), 1: slice(1, 14, 3), 2: slice(2, 12, 3)} >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(0, 100, 40)) # step > blocksize {0: slice(0, 20, 40), 2: slice(0, 20, 40), 4: slice(0, 20, 40)} Also support indexing single elements >>> _slice_1d(100, [20, 20, 20, 20, 20], 25) {1: 5} And negative slicing >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(100, 0, -3)) {0: slice(-2, -20, -3), 1: slice(-1, -21, -3), 2: slice(-3, -21, -3), 3: slice(-2, -21, -3), 4: slice(-1, -21, -3)} >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(100, 12, -3)) {0: slice(-2, -8, -3), 1: slice(-1, -21, -3), 2: slice(-3, -21, -3), 3: slice(-2, -21, -3), 4: slice(-1, -21, -3)} >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(100, -12, -3)) {4: slice(-1, -12, -3)} """ if isinstance(index, (int, long)): i = 0 ind = index lens = list(lengths) while ind >= lens[0]: i += 1 ind -= lens.pop(0) return {i: ind} assert isinstance(index, slice) step = index.step or 1 if step > 0: start = index.start or 0 stop = index.stop if index.stop is not None else dim_shape else: start = index.start or dim_shape - 1 start = dim_shape - 1 if start >= dim_shape else start stop = -(dim_shape + 1) if index.stop is None else index.stop # posify start and stop if start < 0: start += dim_shape if stop < 0: stop += dim_shape d = dict() if step > 0: for i, length in enumerate(lengths): if start < length and stop > 0: d[i] = slice(start, min(stop, length), step) start = (start - length) % step else: start = start - length stop -= length else: rstart = start # running start chunk_boundaries = list(accumulate(add, lengths)) for i, chunk_stop in reversed(list(enumerate(chunk_boundaries))): # create a chunk start and stop if i == 0: chunk_start = 0 else: chunk_start = chunk_boundaries[i - 1] # if our slice is in this chunk if (chunk_start <= rstart < chunk_stop) and (rstart > stop): d[i] = slice(rstart - chunk_stop, max(chunk_start - chunk_stop - 1, stop - chunk_stop), step) # compute the next running start point, offset = (rstart - (chunk_start - 1)) % step rstart = chunk_start + offset - 1 # replace 0:20:1 with : if appropriate for k, v in d.items(): if v == slice(0, lengths[k], 1): d[k] = slice(None, None, None) if not d: # special case x[:0] d[0] = slice(0, 0, 1) return d
def list_accumulator(itr: Iterable) -> Iterable[List]: # list(list_accumulator([1, 2, 3]) -> [[1], [1, 2], [1, 2, 3]] return filter( None, accumulate(lambda a, b: make_list(a) + make_list(b), itr, []))