return out def custom_reduce(data): d = dict() for key, value in data: if key not in d: d[key] = value else: d[key] += value out = [] for key in d: out.append((key, d[key])) return out ## Same calls mr_1 = MapReduce(verbose=False) mr_1.map_one(custom_map, values_1) mr_1.map_one(custom_map, values_2) mr_1.map_one(custom_map, values_3) print(mr_1.reduce(custom_reduce)) mr_2 = MapReduce(verbose=False) mr_2.map(custom_map, all_values, 3) print(mr_2.reduce(custom_reduce)) mr_3 = MapReduce(verbose=True) print(mr_3.map_reduce(custom_map, custom_reduce, all_values, 3))
def main(): comm = MPI.COMM_WORLD nr_nodes = comm.Get_size() rank = comm.Get_rank() map_phase = 10 reduce_phase = 20 confirmation = 30 stop_phase = 100 master = 0 # master node if rank == master: # mapping phase file_name = 'application/output/adjacency_list.json' with open(file_name, 'r', encoding='utf-8') as infile: json_data = json.load(infile) keys = [] data_queue = [] for data in json_data: keys.append(data) for key in keys: for value in json_data[key]: data = {"k": key, "v": value} data_queue.append(data) # transmit data temp_data = transmit_data(data_queue, nr_nodes, comm, map_phase) while temp_data: temp_data = transmit_data(temp_data, nr_nodes, comm, map_phase) # reduction phase path = 'application/output/map/*.json' files = glob.glob(path) for file in files: with open(file, 'r', encoding='utf-8') as infile: json_data = json.load(infile) keys = [] data_queue = [] for data in json_data: keys.append(data) for key in keys: for value in json_data[key]: data = {"k": key, "v": value} data_queue.append(data) # transmit data temp_data = transmit_data(data_queue, nr_nodes, comm, reduce_phase) # while temp_data: # temp_data = transmit_data(temp_data, nr_nodes, comm, reduce_phase) # stopping phase for dest_rank in range(1, nr_nodes): data = {"k": "", "v": ""} comm.isend(data, dest=dest_rank, tag=stop_phase) print("[" + str(rank) + "] - TERMINATED ") # worker nodes else: is_terminated = False mr = MapReduce(rank) while not is_terminated: status = MPI.Status() data = comm.recv(source=0, tag=MPI.ANY_TAG, status=status) tag = status.Get_tag() if tag == map_phase: key = data["k"] value = data["v"] mr.map(key, value) elif tag == reduce_phase: key = data["k"] value = data["v"] mr.reduce(key, value) elif tag == stop_phase: print("[" + str(rank) + "] - TERMINATED") is_terminated = True else: print("[" + str(rank) + "] - INCORRECT TAG RECEIVED") comm.isend(data, dest=master, tag=confirmation) mr.store_values()