def itersimpleaggregate(table, key, aggregation, value, field): # special case counting if aggregation == len and key is not None: aggregation = lambda g: sum(1 for _ in g) # count length of iterable # special case where length of key is 1 if isinstance(key, (list, tuple)) and len(key) == 1: key = key[0] # determine output header if isinstance(key, (list, tuple)): outhdr = tuple(key) + (field, ) elif callable(key): outhdr = ('key', field) elif key is None: outhdr = field, else: outhdr = (key, field) yield outhdr # generate data if isinstance(key, (list, tuple)): for k, grp in rowgroupby(table, key, value): yield tuple(k) + (aggregation(grp), ) elif key is None: # special case counting if aggregation == len: yield nrows(table), else: yield aggregation(values(table, value)), else: for k, grp in rowgroupby(table, key, value): yield k, aggregation(grp)
def test_rowgroupby(): table = (('foo', 'bar', 'baz'), ('a', 1, True), ('b', 2, True), ('b', 3)) # simplest form g = rowgroupby(table, 'foo') key, vals = next(g) vals = list(vals) eq_('a', key) eq_(1, len(vals)) eq_(('a', 1, True), vals[0]) key, vals = next(g) vals = list(vals) eq_('b', key) eq_(2, len(vals)) eq_(('b', 2, True), vals[0]) eq_(('b', 3), vals[1]) # specify value g = rowgroupby(table, 'foo', 'bar') key, vals = next(g) vals = list(vals) eq_('a', key) eq_(1, len(vals)) eq_(1, vals[0]) key, vals = next(g) vals = list(vals) eq_('b', key) eq_(2, len(vals)) eq_(2, vals[0]) eq_(3, vals[1]) # callable key g = rowgroupby(table, lambda r: r['foo'], lambda r: r['baz']) key, vals = next(g) vals = list(vals) eq_('a', key) eq_(1, len(vals)) eq_(True, vals[0]) key, vals = next(g) vals = list(vals) eq_('b', key) eq_(2, len(vals)) eq_(True, vals[0]) eq_(None, vals[1]) # gets padded
def iterrowreduce(source, key, reducer, header): if header is None: # output header from source header, source = iterpeek(source) yield tuple(header) for key, rows in rowgroupby(source, key): yield tuple(reducer(key, rows))
def itermergeduplicates(table, key, missing): it = iter(table) hdr, it = iterpeek(it) flds = list(map(text_type, hdr)) # determine output fields if isinstance(key, string_types): outhdr = [key] keyflds = set([key]) else: outhdr = list(key) keyflds = set(key) valflds = [f for f in flds if f not in keyflds] valfldidxs = [flds.index(f) for f in valflds] outhdr.extend(valflds) yield tuple(outhdr) # do the work for k, grp in rowgroupby(it, key): grp = list(grp) if isinstance(key, string_types): outrow = [k] else: outrow = list(k) mergedvals = [set(row[i] for row in grp if len(row) > i and row[i] != missing) for i in valfldidxs] normedvals = [vals.pop() if len(vals) == 1 else missing if len(vals) == 0 else Conflict(vals) for vals in mergedvals] outrow.extend(normedvals) yield tuple(outrow)
def itermultiaggregate(source, key, aggregation): aggregation = OrderedDict(aggregation.items()) # take a copy it = iter(source) hdr = next(it) # push back header to ensure we iterate only once it = itertools.chain([hdr], it) # normalise aggregators for outfld in aggregation: agg = aggregation[outfld] if callable(agg): aggregation[outfld] = None, agg elif isinstance(agg, string_types): aggregation[outfld] = agg, list # list is default elif len(agg) == 1 and isinstance(agg[0], string_types): aggregation[outfld] = agg[0], list # list is default elif len(agg) == 1 and callable(agg[0]): aggregation[outfld] = None, agg[0] # aggregate whole rows elif len(agg) == 2: pass # no need to normalise else: raise ArgumentError('invalid aggregation: %r, %r' % (outfld, agg)) # determine output header if isinstance(key, (list, tuple)): outhdr = list(key) elif callable(key): outhdr = ['key'] else: outhdr = [key] for outfld in aggregation: outhdr.append(outfld) yield tuple(outhdr) # generate data for k, rows in rowgroupby(it, key): rows = list(rows) # may need to iterate over these more than once # handle compound key if isinstance(key, (list, tuple)): outrow = list(k) else: outrow = [k] for outfld in aggregation: srcfld, aggfun = aggregation[outfld] if srcfld is None: aggval = aggfun(rows) outrow.append(aggval) elif isinstance(srcfld, (list, tuple)): idxs = [hdr.index(f) for f in srcfld] valgetter = operator.itemgetter(*idxs) vals = (valgetter(row) for row in rows) aggval = aggfun(vals) outrow.append(aggval) else: idx = hdr.index(srcfld) # try using generator comprehension vals = (row[idx] for row in rows) aggval = aggfun(vals) outrow.append(aggval) yield tuple(outrow)
def itermergeduplicates(table, key, missing): it = iter(table) hdr, it = iterpeek(it) flds = list(map(text_type, hdr)) # determine output fields if isinstance(key, string_types): outhdr = [key] keyflds = set([key]) else: outhdr = list(key) keyflds = set(key) valflds = [f for f in flds if f not in keyflds] valfldidxs = [flds.index(f) for f in valflds] outhdr.extend(valflds) yield tuple(outhdr) # do the work for k, grp in rowgroupby(it, key): grp = list(grp) if isinstance(key, string_types): outrow = [k] else: outrow = list(k) mergedvals = [ set(row[i] for row in grp if len(row) > i and row[i] != missing) for i in valfldidxs ] normedvals = [ vals.pop() if len(vals) == 1 else missing if len(vals) == 0 else Conflict(vals) for vals in mergedvals ] outrow.extend(normedvals) yield tuple(outrow)
def itersimpleaggregate(table, key, aggregation, value): # special case counting if aggregation == len: aggregation = lambda g: sum(1 for _ in g) # count length of iterable # determine output header if isinstance(key, (list, tuple)): outhdr = tuple(key) + ('value', ) elif callable(key): outhdr = ('key', 'value') else: outhdr = (key, 'value') yield outhdr # generate data if isinstance(key, (list, tuple)): for k, grp in rowgroupby(table, key, value): yield tuple(k) + (aggregation(grp), ) else: for k, grp in rowgroupby(table, key, value): yield k, aggregation(grp)
def itersimpleaggregate(table, key, aggregation, value): # special case counting if aggregation == len: aggregation = lambda g: sum(1 for _ in g) # count length of iterable # determine output header if isinstance(key, (list, tuple)): outhdr = tuple(key) + ('value',) elif callable(key): outhdr = ('key', 'value') else: outhdr = (key, 'value') yield outhdr # generate data if isinstance(key, (list, tuple)): for k, grp in rowgroupby(table, key, value): yield tuple(k) + (aggregation(grp),) else: for k, grp in rowgroupby(table, key, value): yield k, aggregation(grp)
def __iter__(self): it = iter(self.table) hdr = next(it) table = itertools.chain([hdr], it) value = self.value vidx = hdr.index(value) outhdr = list(hdr) outhdr[vidx] = '%s_id' % value yield tuple(outhdr) offset, multiplier = self.autoincrement for n, (_, group) in enumerate(rowgroupby(table, value)): for row in group: outrow = list(row) outrow[vidx] = (n * multiplier) + offset yield tuple(outrow)
def collapsedintervals(table, start='start', stop='stop', key=None): """ Utility function to collapse intervals in a table. If no facet `key` is given, returns an iterator over `(start, stop)` tuples. If facet `key` is given, returns an iterator over `(key, start, stop)` tuples. """ if key is None: table = sort(table, key=start) for iv in _collapse(values(table, (start, stop))): yield iv else: table = sort(table, key=(key, start)) for k, g in rowgroupby(table, key=key, value=(start, stop)): for iv in _collapse(g): yield (k,) + iv
def collapsedintervals(table, start='start', stop='stop', key=None): """ Utility function to collapse intervals in a table. If no facet `key` is given, returns an iterator over `(start, stop)` tuples. If facet `key` is given, returns an iterator over `(key, start, stop)` tuples. """ if key is None: table = sort(table, key=start) for iv in _collapse(values(table, (start, stop))): yield iv else: table = sort(table, key=(key, start)) for k, g in rowgroupby(table, key=key, value=(start, stop)): for iv in _collapse(g): yield (k, ) + iv
def iterfold(table, key, f, value): yield ('key', 'value') for k, grp in rowgroupby(table, key, value): yield k, reduce(f, grp)
def iterrowgroupmap(source, key, mapper, header): yield tuple(header) for key, rows in rowgroupby(source, key): for row in mapper(key, rows): yield row
def __iter__(self): offset, multiplier = self.autoincrement yield ('id', self.value) for n, (v, _) in enumerate(rowgroupby(self.table, self.value)): yield ((n * multiplier) + offset, v)