def compute(lst): # number of map nodes num_map_nodes num_map_nodes = get_num_map_nodes() # Manager to keep track of all map results manager = Manager() # Map processes emit key-value pairs to emissions emissions = manager.list() # Job is a list of processes jobs = [] # Process number num_p = 0 start_time = time.time() list_of_list = lst print ("Up to Permutation:", str(time.time() - start_time ) + " seconds" ) len_list_of_list = len(list_of_list) print ("Size of S: ", len_list_of_list) len_sublist = max(1,int(len_list_of_list/float(num_map_nodes))) i=0 while i < len_list_of_list: if (len_list_of_list - i <= len_sublist): # last step sliced_list = list_of_list[i:] else: sliced_list = list_of_list[i:i + len_sublist] print(len(sliced_list)) p = Process(target=map, args=(num_p, sliced_list, emissions)) i += len_sublist jobs.append(p) p.start() num_p += 1 print ("Wait for Catch Up\n") for p in jobs: p.join() print(len(emissions)) print ("Up to Mapping Stage:", str(time.time() - start_time ) + " seconds" ) #--------------------------------------------------- # Shuffle/Reduce jobs = [] manager_2 = Manager() result_lst = manager_2.list() for key in range(emissions[-1][1],emissions[-1][2]+5): key_list = [1 for x in emissions if x[0] == key] q = Process(target=reduce, args=(key,key_list,result_lst)) jobs.append(q) q.start() print ("Wait for Catch Up\n") for q in jobs: q.join() print ("Up to Reducing Stage:", str(time.time() - start_time ) + " seconds" ) print("Input Size: ", len_list_of_list ) return sum(result_lst)
def compute(lst, size_param = 0, option='not_fixed'): try: # number of map nodes num_map_nodes if option=='not_fixed': num_map_nodes = get_num_map_nodes(size_param) elif option == 'fixed': num_map_nodes = 5 else: raise NameError('second parameter [option] should be either `not_fixed` or `fixed` ... see documentation') # Manager to keep track of all map results list_of_emissions = [] # Job is a list of processes jobs = [] # Process number num_p = 0 manager = Manager() start_time = time.time() print ("Up to Permutation:", str(time.time() - start_time ) + " seconds" ) len_lst = len(lst) print ("Size of S: ", len_lst) len_sublist = max(1,int(len_lst/float(num_map_nodes))) i=0 #Split input into muliple sliced_list each sent to one map node while i < len_lst: list_of_emissions.append(manager.list()) if (len_lst - i <= len_sublist): # last step sliced_list = lst[i:] else: sliced_list = lst[i:i + len_sublist] #print(len(sliced_list)) p = Process(target=map, args=(num_p, sliced_list, list_of_emissions[-1])) i += len_sublist jobs.append(p) p.start() num_p += 1 print ("Wait for Catch Up\n") for p in jobs: p.join() print ("Up to Mapping Stage:", str(time.time() - start_time ) + " seconds" ) #--------------------------------------------------- # Combine Step jobs = [] manager_3 = Manager() combined_lists = [] for emissions in list_of_emissions: for key in range(emissions[-1][1],emissions[-1][2]+5): combined_lists.append(manager_3.list()) q = Process(target=combine, args=(key,emissions,combined_lists[-1])) q.start() print ("Wait for Catch Up\n") for q in jobs: q.join() jobs = [] manager_2 = Manager() result_lst = manager_2.list() # REDUCE -------------------------------------------------------- for combined_list in combined_lists: r = Process(target=reduce, args=(key,combined_list,result_lst)) jobs.append(r) r.start() print ("Wait for Catch Up\n") for r in jobs: r.join() print ("Up to Reducing Stage:", str(time.time() - start_time ) + " seconds" ) print("Length of List of List: ", len_lst ) return sum(result_lst) except NameError as e: print('Usage Error:', e)
def compute(lst, size_param=0, option="not_fixed"): try: # number of map nodes is num_map_nodes if option == "not_fixed": num_map_nodes = get_num_map_nodes(size_param) elif option == "fixed": num_map_nodes = 5 else: raise NameError("second parameter [option] should be either `not_fixed` or `fixed` ... see documentation") # Manager to keep track of all map results manager = Manager() # Store key value emitted from map nodes here list_of_emissions = [] # Job is a list of processes jobs = [] # Process number num_p = 0 # Timer start_time = time.time() # Get list size len_lst = len(lst) print("Size of S: ", len_lst) len_sublist = max(1, int(len_lst / float(num_map_nodes))) i = 0 # Split input into muliple sliced_list each sent to one map node while i < len_lst: # list_of_emissions[i] hold key value pairs emitted from one map node i list_of_emissions.append(manager.list()) if len_lst - i <= len_sublist: # last step sliced_list = lst[i:] else: sliced_list = lst[i : i + len_sublist] # print(len(sliced_list)) p = Process(target=map, args=(num_p, sliced_list, list_of_emissions[-1])) i += len_sublist jobs.append(p) p.start() num_p += 1 print("Wait for Catch Up\n") for p in jobs: p.join() print("Up to Mapping Stage:", str(time.time() - start_time) + " seconds") # Shuffle/Reduce jobs = [] manager_2 = Manager() result_lst = manager_2.list() # This is the step five optimization (duplicating # number of reduce nodes for each key) for emissions in list_of_emissions: for key in range(emissions[-1][1], emissions[-1][2] + 5): key_list = [1 for x in emissions if x[0] == key] q = Process(target=reduce, args=(key, key_list, result_lst)) jobs.append(q) q.start() print("Wait for Catch Up\n") for q in jobs: q.join() print("Up to Reducing Stage:", str(time.time() - start_time) + " seconds") print("Length of List of List: ", len_lst) return sum(result_lst) except NameError as e: print("Usage Error:", e)