def itermultirangeaggregate(source, key, width, aggregation, minv, maxv): aggregation = OrderedDict(aggregation.items()) # take a copy it = iter(source) srcflds = it.next() # push back header to ensure we iterate only once it = itertools.chain([srcflds], it) # normalise aggregators for outfld in aggregation: agg = aggregation[outfld] if callable(agg): aggregation[outfld] = None, agg elif isinstance(agg, basestring): aggregation[outfld] = agg, list # list is default elif len(agg) == 1 and isinstance(agg[0], basestring): aggregation[outfld] = agg[0], list # list is default elif len(agg) == 1 and callable(agg[0]): aggregation[outfld] = None, agg[0] # aggregate whole rows elif len(agg) == 2: pass # no need to normalise else: raise Exception('invalid aggregation: %r, %r' % (outfld, agg)) outflds = [key] for outfld in aggregation: outflds.append(outfld) yield tuple(outflds) for k, rows in rowgroupbybin(it, key, width, minv=minv, maxv=maxv): outrow = [k] for outfld in aggregation: srcfld, aggfun = aggregation[outfld] if srcfld is None: aggval = aggfun(rows) outrow.append(aggval) else: idx = srcflds.index(srcfld) # try using generator comprehension vals = (row[idx] for row in rows) aggval = aggfun(vals) outrow.append(aggval) yield tuple(outrow)
def itersimplerangeaggregate(table, key, width, aggregation, value, minv, maxv): if aggregation == len: aggregation = lambda grp: sum(1 for _ in grp) # count length of iterable yield (key, 'value') for k, grp in rowgroupbybin(table, key, width, value=value, minv=minv, maxv=maxv): yield k, aggregation(grp)
def iterrangerowreduce(table, key, width, reducer, fields, minv, maxv): yield tuple(fields) for k, grp in rowgroupbybin(table, key, width, minv=minv, maxv=maxv): yield tuple(reducer(k, grp))