예제 #1
0
def itermultirangeaggregate(source, key, width, aggregation, minv, maxv):
    aggregation = OrderedDict(aggregation.items()) # take a copy
    it = iter(source)
    srcflds = it.next()
    # push back header to ensure we iterate only once
    it = itertools.chain([srcflds], it)

    # normalise aggregators
    for outfld in aggregation:
        agg = aggregation[outfld]
        if callable(agg):
            aggregation[outfld] = None, agg
        elif isinstance(agg, basestring):
            aggregation[outfld] = agg, list # list is default
        elif len(agg) == 1 and isinstance(agg[0], basestring):
            aggregation[outfld] = agg[0], list # list is default 
        elif len(agg) == 1 and callable(agg[0]):
            aggregation[outfld] = None, agg[0] # aggregate whole rows
        elif len(agg) == 2:
            pass # no need to normalise
        else:
            raise Exception('invalid aggregation: %r, %r' % (outfld, agg))
        
    outflds = [key]
    for outfld in aggregation:
        outflds.append(outfld)
    yield tuple(outflds)
    
    for k, rows in rowgroupbybin(it, key, width, minv=minv, maxv=maxv):
        outrow = [k]
        for outfld in aggregation:
            srcfld, aggfun = aggregation[outfld]
            if srcfld is None:
                aggval = aggfun(rows)
                outrow.append(aggval)
            else:
                idx = srcflds.index(srcfld)
                # try using generator comprehension
                vals = (row[idx] for row in rows)
                aggval = aggfun(vals)
                outrow.append(aggval)
        yield tuple(outrow)
예제 #2
0
def itersimplerangeaggregate(table, key, width, aggregation, value, minv, maxv):
    if aggregation == len:
        aggregation = lambda grp: sum(1 for _ in grp) # count length of iterable
    yield (key, 'value')
    for k, grp in rowgroupbybin(table, key, width, value=value, minv=minv, maxv=maxv):
        yield k, aggregation(grp)
예제 #3
0
def iterrangerowreduce(table, key, width, reducer, fields, minv, maxv):
    yield tuple(fields)
    for k, grp in rowgroupbybin(table, key, width, minv=minv, maxv=maxv):
        yield tuple(reducer(k, grp))