def run(k, normalize=0): """Emit the top K values for each key. If NORMALIZE > 0, divide counts by the total for each key and round to NORMALIZE digits.""" k = int(k) for key, value_iterator in values_by_key(sys.stdin): counts = Counter(value_iterator) format = lambda x: x if normalize: digits = int(normalize) total = sum(counts.values()) def format(pair): return (pair[0], round(pair[1]/total, digits)) emit(key, tuple(map(format, counts.most_common(k))))
### Import mr library. sys.path.append(os.path.dirname(__file__)) from mr import values_by_key ### Helper function to perform component-wise binary operations using reduce. def helper(x, y): def str_sum(str1, str2): if str1 == str2: return str1 return str1 + '|' + str2 return [x[0] + y[0], x[1] + y[1], x[2] + y[2], x[3] + y[3], x[4], x[5] + y[5], x[6] + y[6], x[7] + y[7], x[8] + y[8], x[9] + y[9], x[10] + y[10], x[11] + y[11], str_sum(x[12], y[12]), x[13] + y[13], x[14] + y[14], x[15] + y[15], x[16] + y[16], x[17] + y[17], x[18] + y[18], x[19] + y[19], x[20] + y[20], x[21] + y[21], x[22] + y[22]] for key, value_iterator in values_by_key(sys.stdin): ### Use our helper function to reduce the value iterator object. value_iterator = reduce(helper, value_iterator) ### Finalize relative position. value_iterator[11] /= float(value_iterator[1]) ### Compute number of distinct users. value_iterator[12] = len(set(value_iterator[12].split('|'))) print str(key) + '\t' + str(value_iterator).replace(', ', '\t')[1 : -1]
#!/usr/bin/env python3 """Sum values for each key.""" import sys from mr import values_by_key, emit for key, value_iterator in values_by_key(sys.stdin): emit(key, sum(value_iterator))
#!/usr/bin/env python import sys from mr import values_by_key, emit # MapReduce module. for key, value_iterator in values_by_key( sys.stdin): # group values by key into an iterator value_iterator emit( key, sum(value_iterator) ) # emit pairs of each unique key and sum the related iterator to get pair (key, summation)
def run(): for key, value_iterator in values_by_key(sys.stdin): emit(key, set(value_iterator))