Beispiel #1
0
def avg(data, column):
    global __is_aggregate
    __is_aggregate = True
    vals = [row[column] for row in data]
    data = parallel.run(parallel.map(
        lambda chunk: [(sum([int(line) for line in chunk]), len(chunk))]), 
        vals,
        'avg()'
    )
    dividend = parallel.run(parallel.reduce(lambda data: sum([d[0] for d in data], 0.0)), data)
    divisor  = parallel.run(parallel.reduce(lambda data: sum([d[1] for d in data])), data)
    return sum(dividend)/sum(divisor)
Beispiel #2
0
def min(data, column):
    global __is_aggregate
    __is_aggregate = True
    min = __builtins__['min']
    vals = [row[column] for row in data]
    return min(parallel.run(parallel.reduce(lambda chunk: min([int(i) for i in chunk])), vals))