from multiprocessing import Pool, Process, Manager from simplfunction import simplefunction from calltomapper import calltomapper if __name__ == '__main__': """Map reduce test is a simple testing module to check functionality of map-reduce implementation""" haystacks = [] haystacks.append(haystack("0",[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])) haystacks.append(haystack("1",[3, 2, 2, 3, 4, 3, 6, 7, 8, 9])) haystacks.append(haystack("2",[1, 6, -1, 0, 4, 0, 6, 7, 8, 9])) haystacks.append(haystack("3",[3, 3, 3])) haystackmap = haystackmapper(haystacks) emissions= [] print("USING MAP POOL") pool = Pool(2) # if it is a quad-core machine it can be set to 4 emissions = pool.map(haystackmap.mapper, [[2],[3]]) print(emissions) print(haystackreducer(sum(emissions,[]))) emissions= [] print("USING MAP PROCESS") p = Process(target=simplefunction, args=(1,2)) p.start() p = Process(target=simplefunction, args=(1,3)) p.start()
def run(): """ run runs the database search taking three user inputs, the query wav file, number of partitions, and number of partition samples""" good_file = 0 while (good_file == 0): query = raw_input( "Submit .wav file to search against database (Example: button.wav): " ) if (os.path.isfile(query)): good_file = 1 #Instantiate Wavsound objects from the wav files t_wavsounds = {} query_wavsound = wavsound(query) print( "\n**Higher number of partitions increases false positive rates, \nwhile lower number of partitions increases false negative rates\n" ) partition = raw_input("Set number of partitions of the query from 1 to " + str(int(len(query_wavsound.get_data()) / 3)) + ": ") samples = raw_input("Set number of samples of partitions from 1 to " + partition + " (Recommend < 50): ") # Database Structure haystacks = [] # Database look up directory rootdir = 'db' for subdir, __, files in os.walk(rootdir): for file in files: # for debug print (subdir+"/"+file) t_wavsounds[subdir + "/" + file] = wavsound(subdir + "/" + file) # for debug print(t_wavsounds[subdir+"/"+file]) haystacks.append( haystack(subdir + "/" + file, t_wavsounds[subdir + "/" + file].get_data())) query_needle_factory = needlestorage(query_wavsound, int(partition), int(samples)) haystackmap = haystackmapper(haystacks) needles = query_needle_factory.get_needles() len_needles = len(needles) len_needle = len(needles[0]) # size is the same for all needles manager = Manager() # Map processes emit key-value pairs to emissions return_emissions = manager.dict() # Job is a list of processes jobs = [] # Process number pnum = 0 print "Number of Needles: ", len(needles) # Database query time start_time = time.time() #Distribute processes using multiprocessor for needle in needles: p = Process(target=calltomapper, args=(haystackmap, needle, pnum, len_needles, return_emissions)) jobs.append(p) p.start() pnum += 1 for proc in jobs: proc.join() # flatten return_emissions into a list emissions_list = sum(return_emissions.values(), []) print "Search Result:" result_dict = haystackreducer(emissions_list) # Tabulate % match (wav files with 0% match are excluded from the result) for key in result_dict: print str(key), ": ", (25 - len(str(key))) * " ", str("{0:.2f}".format( int(result_dict[key]) / len(needles) * 100)), "% match" # Show search time timelapse_parallel = time.time() - start_time print timelapse_parallel, "seconds"
def run(): """ run runs the database search taking three user inputs, the query wav file, number of partitions, and number of partition samples""" good_file = 0 while (good_file == 0): query = raw_input("Submit .wav file to search against database (Example: button.wav): ") if (os.path.isfile(query)): good_file = 1 #Instantiate Wavsound objects from the wav files t_wavsounds = {} query_wavsound = wavsound(query) print("\n**Higher number of partitions increases false positive rates, \nwhile lower number of partitions increases false negative rates\n") partition = raw_input("Set number of partitions of the query from 1 to " + str(int(len(query_wavsound.get_data())/3))+": ") samples = raw_input("Set number of samples of partitions from 1 to " + partition + " (Recommend < 50): ") # Database Structure haystacks = [] # Database look up directory rootdir = 'db' for subdir, __, files in os.walk(rootdir): for file in files: # for debug print (subdir+"/"+file) t_wavsounds[subdir+"/"+file] = wavsound(subdir+"/"+file) # for debug print(t_wavsounds[subdir+"/"+file]) haystacks.append(haystack(subdir+"/"+file,t_wavsounds[subdir+"/"+file].get_data())) query_needle_factory = needlestorage(query_wavsound,int(partition),int(samples)) haystackmap = haystackmapper(haystacks) needles = query_needle_factory.get_needles() len_needles = len(needles) len_needle = len(needles[0]) # size is the same for all needles manager = Manager() # Map processes emit key-value pairs to emissions return_emissions = manager.dict() # Job is a list of processes jobs = [] # Process number pnum = 0 print "Number of Needles: ", len(needles) # Database query time start_time = time.time() #Distribute processes using multiprocessor for needle in needles: p = Process(target=calltomapper, args=(haystackmap,needle,pnum,len_needles,return_emissions)) jobs.append(p) p.start() pnum += 1 for proc in jobs: proc.join() # flatten return_emissions into a list emissions_list = sum(return_emissions.values(),[]) print "Search Result:" result_dict = haystackreducer(emissions_list) # Tabulate % match (wav files with 0% match are excluded from the result) for key in result_dict: print str(key),": ",(25-len(str(key)))*" ",str("{0:.2f}".format(int(result_dict[key])/len(needles)*100)),"% match" # Show search time timelapse_parallel = time.time() - start_time print timelapse_parallel, "seconds"