# top right result.append(((x+1, y-1), neighbor)) if y < GRID_SIZE: # bottom result.append(((x, y+1), neighbor)) if x > 1: # bottom left result.append(((x-1, y+1), neighbor)) return result def reducer1(coords, neighborsList): return coords, neighborsList result_mapred1 = simul.map_red(data, mapper1, reducer1) print("result_mapred1") for item in result_mapred1: print("\t", item) def mapper2(coords, neighborsList): result = [] for x, neighbor1 in enumerate(neighborsList): for neighbor2 in neighborsList[x:]: result.append(((neighbor1, neighbor2), 1)) return result
table_users_records = [] table_orders_records = [] for table_name, records in table_records: if table_name == ORDERS: table_orders_records.append(records) continue # else it has to be from the users table table_users_records.append(records) result = [] for record in table_users_records: user_id, user_name = record order_id = None orders_hash = {} for record in table_orders_records: order_id, _, product_name = record if order_id not in orders_hash: orders_hash[order_id] = [] orders_hash[order_id].append(product_name) for order_id, products_list in orders_hash.items(): key = (user_id, order_id) result.append((key, products_list)) return result result_mapred = simul.map_red(data, mapper, reducer) print('result_mapred') for item in result_mapred: print("\t", item)
# -*- coding: UTF-8 -*- from mredu.simul import map_red, input_file, run from re import split def mymap(k, v): words = list(filter(lambda s: s != '', split(r'\W', v))) return len(words), 1 def myred(k, vs): return k, sum(vs) if __name__ == '__main__': process = map_red(input_file('data/quijote.txt'), mapper=mymap, reducer=myred) run(process)
# -*- coding: UTF-8 -*- from mredu.simul import map_red, input_kv_file, run from re import split def mymap(k, v): return v, k def myred(k, vs): return k, len(vs) if __name__ == '__main__': process = map_red(input_kv_file('data/palabras.txt'), mymap, myred) run(process)
def mapper1(number, _): return ('all', number) result_mapper1 = list(simul.process_mapper(data, mapper1)) print('') print("result_mapper1:\n", sorted(result_mapper1)) def reducer1(k, numbersList): return 'max-min', (min(numbersList), max(numbersList)) result_reducer1 = list(simul.map_red(data, mapper1, reducer1)) print("result_reducer1:\n", result_reducer1) print('') MIN, MAX = result_reducer1[0][1] DELTA = MAX - MIN def mapper2(number, _): column_size = DELTA / BARS_NUM column = BARS_NUM - 1 if number == MAX else floor( (number - MIN) / column_size) range_from = number - number % column_size range_to = range_from + column_size
def mapper1(customer, product): return customer, product def reducer1(customer, productsList): result = [] for products_pair in itertools.combinations(productsList, 2): result.append((tuple(sorted(products_pair)), 1)) return result result_mapred1 = list(simul.map_red(data, mapper1, reducer1)) print("result_mapred1:\n") for item in result_mapred1: print(item) def mapper2(products_pair, amount): return products_pair, amount def reducer2(products_pair, amountsList): return products_pair, sum(amountsList) result_mapred2 = list(simul.map_red(result_mapred1, mapper2, reducer2))
# -*- coding: UTF-8 -*- from mredu import simul l = [('a', 1), ('b', 2), ('a', 3), ('c', 9), ('b', 6)] print('Sum all values by key...') print('-' * 50) process = simul.map_red(l, reducer=lambda k, v: (k, sum(v))) simul.run(process, sep=',') print() print('Unrolls key by value...') print('-' * 50) process = simul.map_red(l, mapper=lambda k, v: [(k, 1) for _ in range(v)]) simul.run(process, sep=',')
# -*- coding: utf-8 -*- from mredu import simul import re data = list(simul.input_file('quijote.txt')) def mapper(k, v): line = re.sub(r'[^a-záéíóúüñç\s]', '', v.lower()).strip() if line == '': return None return [(word, 1) for word in re.split(r' +', line)] def reducer(k, vList): return (k, sum(vList)) result_map = list(simul.map_red(data, mapper, reducer)) print("top 50\n") for item in sorted(result_map, key=lambda item: item[1], reverse=True)[:50]: print("\t", item)
# -*- coding: UTF-8 -*- from mredu.simul import map_red, input_file, run import re stopwords = [ u'así', u'para', u'sus', u'una', u'ni', u'porque', u'sin', u'tan', u'al', u'si', u'me', u'un', u'más', u'es', u'del', u'lo', u'las', u'le', u'mas', u'por', u'su', u'con', u'los', u'se', u'no', u'en', u'el', u'la', u'a', u'y', u'de', u'que', u'muy', u'qué', u'como', u'mi', u'o', u'aquel', u'ya', u'pues', u'cuando', u'cual', u'pero', u'este', u'esto', u'aquí', u'aquella', u'aquello' ] def mymap(_, v): words = list( filter(lambda s: s != '', re.split(r'\W', v, flags=re.UNICODE))) return [(word.lower(), 1) for word in words if word not in stopwords] def myred(k, vs): return k, len(vs) if __name__ == '__main__': process = map_red(input_file('data/quijote.txt'), mymap, myred) run(process)