def compute_up(expr, data, **kwargs): names = data.c.keys() assert names == expr._child.fields d = dict(zip(names, getattr(data, "inner_columns", data.c))) return sa.select( d[col].label(new_col) if col != new_col else d[col] for col, new_col in zip(expr._child.fields, expr.fields) )
def test_unzip(): def _to_lists(seq, n=10): """iter of iters -> finite list of finite lists """ def initial(s): return list(take(n, s)) return initial(map(initial, seq)) def _assert_initial_matches(a, b, n=10): assert list(take(n, a)) == list(take(n, b)) # Unzips a simple list correctly assert _to_lists(unzip([('a', 1), ('b', 2), ('c', 3)])) \ == [['a', 'b', 'c'], [1, 2, 3]] # Can handle a finite number of infinite iterators (the naive unzip # implementation `zip(*args)` impelementation fails on this example). a, b, c = unzip(zip(count(1), repeat(0), repeat(1))) _assert_initial_matches(a, count(1)) _assert_initial_matches(b, repeat(0)) _assert_initial_matches(c, repeat(1)) # Sensibly handles empty input assert list(unzip(zip([]))) == []
def compute_up(expr, data, **kwargs): names = data.c.keys() assert names == expr._child.fields d = dict(zip(names, getattr(data, 'inner_columns', data.c))) return sa.select( d[col].label(new_col) if col != new_col else d[col] for col, new_col in zip(expr._child.fields, expr.fields) )
def compute_up(expr, data, **kwargs): names = data.c.keys() assert names == expr._child.fields, ( 'names = %r\nexpr._child.fields = %r' % (names, expr._child.fields)) d = dict(zip(names, getattr(data, 'inner_columns', data.c))) return reconstruct_select( (d[col].label(new_col) if col != new_col else d[col] for col, new_col in zip(expr._child.fields, expr.fields)), data, )
def compute_up(expr, data, **kwargs): names = data.c.keys() assert names == expr._child.fields, ( 'names = %r\nexpr._child.fields = %r' % (names, expr._child.fields) ) d = dict(zip(names, getattr(data, 'inner_columns', data.c))) return reconstruct_select( (d[col].label(new_col) if col != new_col else d[col] for col, new_col in zip(expr._child.fields, expr.fields)), data, )
def from_delayed(values): """ Create bag from many dask.delayed objects Parameters ---------- values: list of Values An iterable of dask.delayed.Value objects, such as come from dask.do These comprise the individual partitions of the resulting bag Returns ------- Bag Examples -------- >>> b = from_delayed([x, y, z]) # doctest: +SKIP """ from dask.delayed import Value if isinstance(values, Value): values = [values] dsk = merge(v.dask for v in values) name = 'bag-from-delayed-' + tokenize(*values) names = [(name, i) for i in range(len(values))] values = [v.key for v in values] dsk2 = dict(zip(names, values)) return Bag(merge(dsk, dsk2), name, len(values))
def compute_up(expr, data, **kwargs): if not valid_grouper(expr.grouper): raise TypeError("Grouper must have a non-nested record or one " "dimensional collection datashape, " "got %s of type %r with dshape %s" % (expr.grouper, type(expr.grouper).__name__, expr.grouper.dshape)) grouper = get_inner_columns( compute( expr.grouper, data, post_compute=False, return_type='native', ), ) app = expr.apply reductions = [ compute( val, data, post_compute=False, return_type='native', ).label(name) for val, name in zip(app.values, app.fields) ] return sa.select(grouper + reductions).group_by(*grouper)
def compute_up(t, s, **kwargs): columns = [ getattr(s.c, col).label(new_col) if col != new_col else getattr( s.c, col) for col, new_col in zip(t._child.fields, t.fields) ] return select(columns)
def compute_up(t, s, **kwargs): columns = [getattr(s.c, col).label(new_col) if col != new_col else getattr(s.c, col) for col, new_col in zip(t._child.fields, t.fields)] return select(columns)
def compute_up(expr, data, scope=None, **kwargs): data = lower_column(data) grouper = compute( expr.grouper, scope, post_compute=False, return_type='native', **kwargs ) app = expr.apply reductions = [ compute( val, data, post_compute=None, return_type='native', ).label(name) for val, name in zip(app.values, app.fields) ] froms = list(unique(chain(get_all_froms(grouper), concat(map(get_all_froms, reductions))))) inner_cols = list(getattr(grouper, 'inner_columns', [grouper])) grouper_cols = inner_cols[:] inner_cols.extend(concat( getattr(getattr(r, 'element', None), 'inner_columns', [r]) for r in reductions )) wheres = unify_wheres([grouper] + reductions) sel = unify_froms(sa.select(inner_cols, whereclause=wheres), froms) return sel.group_by(*grouper_cols)
def interpose(el, seq): """ Introduce element between each pair of elements in seq >>> list(interpose("a", [1, 2, 3])) [1, 'a', 2, 'a', 3] """ combined = zip(itertools.repeat(el), seq) return drop(1, concat(combined))
def compute_up(t, s, **kwargs): scope = {t._child: s} return sa.select( compute( value, scope, post_compute=None, return_type='native', ).label(name) for value, name in zip(t.values, t.fields))
def interpose(el, seq): """ Introduce element between each pair of elements in seq >>> list(interpose("a", [1, 2, 3])) [1, 'a', 2, 'a', 3] """ inposed = concat(zip(itertools.repeat(el), seq)) next(inposed) return inposed
def compute_up(expr, data, **kwargs): grouper = lower_column(data) app = expr.apply if isinstance(app, Reduction): reductions = [compute(app, data, post_compute=False)] elif isinstance(app, Summary): reductions = [compute(val, data, post_compute=None).label(name) for val, name in zip(app.values, app.fields)] return sa.select([grouper] + reductions).group_by(grouper)
def keymap(func, d): """ Apply function to keys of dictionary >>> bills = {"Alice": [20, 15, 30], "Bob": [10, 35]} >>> keymap(str.lower, bills) # doctest: +SKIP {'alice': [20, 15, 30], 'bob': [10, 35]} See Also: valmap """ return dict(zip(map(func, iterkeys(d)), itervalues(d)))
def compute_up(t, s, **kwargs): scope = {t._child: s} return sa.select( compute( value, scope, post_compute=None, return_type='native', ).label(name) for value, name in zip(t.values, t.fields) )
def valmap(func, d): """ Apply function to values of dictionary >>> bills = {"Alice": [20, 15, 30], "Bob": [10, 35]} >>> valmap(sum, bills) # doctest: +SKIP {'Alice': 65, 'Bob': 45} See Also: keymap """ return dict(zip(iterkeys(d), map(func, itervalues(d))))
def compute_up(expr, data, **kwargs): grouper = lower_column(data) app = expr.apply if isinstance(app, Reduction): reductions = [compute(app, data, post_compute=False)] elif isinstance(app, Summary): reductions = [ compute(val, data, post_compute=None).label(name) for val, name in zip(app.values, app.fields) ] return sa.select([grouper] + reductions).group_by(grouper)
def compute_up(t, s, scope=None, **kwargs): d = dict((t._child[c], list(inner_columns(s))[i]) for i, c in enumerate(t._child.fields)) cols = [compute(val, toolz.merge(scope, d), post_compute=None).label(name) for name, val in zip(t.fields, t.values)] s = copy(s) for c in cols: s.append_column(c) return s.with_only_columns(cols)
def keymap(func, d, factory=dict): """ Apply function to keys of dictionary >>> bills = {"Alice": [20, 15, 30], "Bob": [10, 35]} >>> keymap(str.lower, bills) # doctest: +SKIP {'alice': [20, 15, 30], 'bob': [10, 35]} See Also: valmap itemmap """ rv = factory() rv.update(zip(map(func, iterkeys(d)), itervalues(d))) return rv
def valmap(func, d, factory=dict): """ Apply function to values of dictionary >>> bills = {"Alice": [20, 15, 30], "Bob": [10, 35]} >>> valmap(sum, bills) # doctest: +SKIP {'Alice': 65, 'Bob': 45} See Also: keymap itemmap """ rv = factory() rv.update(zip(iterkeys(d), map(func, itervalues(d)))) return rv
def sliding_window(n, seq): """ A sequence of overlapping subsequences >>> list(sliding_window(2, [1, 2, 3, 4])) [(1, 2), (2, 3), (3, 4)] This function creates a sliding window suitable for transformations like sliding means / smoothing >>> mean = lambda seq: float(sum(seq)) / len(seq) >>> list(map(mean, sliding_window(2, [1, 2, 3, 4]))) [1.5, 2.5, 3.5] """ return zip(*(collections.deque(itertools.islice(it, i), 0) or it for i, it in enumerate(itertools.tee(seq, n))))
def load_castra_partition(castra, part, columns, index): import blosc # Due to serialization issues, blosc needs to be manually initialized in # each process. blosc.init() df = castra.load_partition(part, columns) if isinstance(columns, list): items = df.itertuples(index) else: items = df.iteritems() if index else iter(df) items = list(items) if items and isinstance(items[0], tuple) and type(items[0]) is not tuple: names = items[0]._fields items = [dict(zip(names, item)) for item in items] return items
def diff(*seqs, **kwargs): """ Return those items that differ between sequences >>> list(diff([1, 2, 3], [1, 2, 10, 100])) [(3, 10)] Shorter sequences may be padded with a ``default`` value: >>> list(diff([1, 2, 3], [1, 2, 10, 100], default=None)) [(3, 10), (None, 100)] A ``key`` function may also be applied to each item to use during comparisons: >>> list(diff(['apples', 'bananas'], ['Apples', 'Oranges'], key=str.lower)) [('bananas', 'Oranges')] """ N = len(seqs) if N == 1 and isinstance(seqs[0], list): seqs = seqs[0] N = len(seqs) if N < 2: raise TypeError('Too few sequences given (min 2 required)') default = kwargs.get('default', no_default) if default == no_default: iters = zip(*seqs) else: iters = zip_longest(*seqs, fillvalue=default) key = kwargs.get('key', None) if key is None: for items in iters: if items.count(items[0]) != N: yield items else: for items in iters: vals = tuple(map(key, items)) if vals.count(vals[0]) != N: yield items
def partition(n, seq, pad=no_pad): """ Partition sequence into tuples of length n >>> list(partition(2, [1, 2, 3, 4])) [(1, 2), (3, 4)] If the length of ``seq`` is not evenly divisible by ``n``, the final tuple is dropped if ``pad`` is not specified, or filled to length ``n`` by pad: >>> list(partition(2, [1, 2, 3, 4, 5])) [(1, 2), (3, 4)] >>> list(partition(2, [1, 2, 3, 4, 5], pad=None)) [(1, 2), (3, 4), (5, None)] See Also: partition_all """ args = [iter(seq)] * n if pad is no_pad: return zip(*args) else: return zip_longest(*args, fillvalue=pad)
def mean_aggregate(x): totals, counts = list(zip(*x)) return 1.0 * sum(totals) / sum(counts)
def var_aggregate(x): squares, totals, counts = list(zip(*x)) x2, x, n = float(sum(squares)), float(sum(totals)), sum(counts) result = (x2 / n) - (x / n)**2 return result * n / (n - ddof)
def compute(t, seq): seq1, seq2 = itertools.tee(seq) parent = compute(t.parent, seq1) predicate = compute(t.predicate, seq2) return (x for x, tf in zip(parent, predicate) if tf)