def avg_with_cnt(self,data): return data \ .map(lambda (key, value): (adpu.build_key(self.combo, key), value)) \ .combineByKey(lambda value: (value, 1), lambda x, value: (x[0] + value, x[1] + 1), lambda x, y: (x[0] + y[0], x[1] + y[1])) \ .map(lambda (label, (value_sum, count)): (label, str(count) + "," + str(value_sum / count)))
def max(self,data): return data \ .map(lambda (key, value): (adpu.build_key(self.combo, key), value)) \ .reduceByKey(lambda a, b: a if a > b else b)