def avg(data, column): global __is_aggregate __is_aggregate = True vals = [row[column] for row in data] data = parallel.run(parallel.map( lambda chunk: [(sum([int(line) for line in chunk]), len(chunk))]), vals, 'avg()' ) dividend = parallel.run(parallel.reduce(lambda data: sum([d[0] for d in data], 0.0)), data) divisor = parallel.run(parallel.reduce(lambda data: sum([d[1] for d in data])), data) return sum(dividend)/sum(divisor)
def min(data, column): global __is_aggregate __is_aggregate = True min = __builtins__['min'] vals = [row[column] for row in data] return min(parallel.run(parallel.reduce(lambda chunk: min([int(i) for i in chunk])), vals))