Ejemplo n.º 1
0
from multiprocessing import Pool, Process,  Manager
from simplfunction import simplefunction
from calltomapper import calltomapper

if __name__ == '__main__':  
    
    """Map reduce test is a simple testing module to check functionality
    of map-reduce implementation"""
    
    haystacks = []
    haystacks.append(haystack("0",[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
    haystacks.append(haystack("1",[3, 2, 2, 3, 4, 3, 6, 7, 8, 9]))
    haystacks.append(haystack("2",[1, 6, -1, 0, 4, 0, 6, 7, 8, 9]))
    haystacks.append(haystack("3",[3, 3, 3]))
    
    haystackmap = haystackmapper(haystacks)
    
    emissions= []
    
    print("USING MAP POOL") 
    pool = Pool(2) # if it is a quad-core machine it can be set to 4
    emissions = pool.map(haystackmap.mapper, [[2],[3]])
    print(emissions) 
    print(haystackreducer(sum(emissions,[])))
    emissions= []
    
    print("USING MAP PROCESS")
    p = Process(target=simplefunction, args=(1,2))
    p.start()
    p = Process(target=simplefunction, args=(1,3))
    p.start()
Ejemplo n.º 2
0
def run():
    """ run runs the database search taking three user inputs, the query wav file,
    number of partitions, and number of partition samples"""

    good_file = 0

    while (good_file == 0):
        query = raw_input(
            "Submit .wav file to search against database (Example: button.wav): "
        )
        if (os.path.isfile(query)):
            good_file = 1

    #Instantiate Wavsound objects from the wav files
    t_wavsounds = {}
    query_wavsound = wavsound(query)
    print(
        "\n**Higher number of partitions increases false positive rates, \nwhile lower number of partitions increases false negative rates\n"
    )
    partition = raw_input("Set number of partitions of the query from 1 to " +
                          str(int(len(query_wavsound.get_data()) / 3)) + ": ")
    samples = raw_input("Set number of samples of partitions from 1 to " +
                        partition + " (Recommend < 50): ")

    # Database Structure
    haystacks = []

    # Database look up directory
    rootdir = 'db'

    for subdir, __, files in os.walk(rootdir):
        for file in files:
            # for debug print (subdir+"/"+file)
            t_wavsounds[subdir + "/" + file] = wavsound(subdir + "/" + file)
            # for debug print(t_wavsounds[subdir+"/"+file])
            haystacks.append(
                haystack(subdir + "/" + file,
                         t_wavsounds[subdir + "/" + file].get_data()))

    query_needle_factory = needlestorage(query_wavsound, int(partition),
                                         int(samples))

    haystackmap = haystackmapper(haystacks)

    needles = query_needle_factory.get_needles()
    len_needles = len(needles)
    len_needle = len(needles[0])  # size is the same for all needles

    manager = Manager()

    # Map processes emit key-value pairs to emissions
    return_emissions = manager.dict()

    # Job is a list of processes
    jobs = []

    # Process number
    pnum = 0

    print "Number of Needles: ", len(needles)

    # Database query time
    start_time = time.time()

    #Distribute processes using multiprocessor
    for needle in needles:
        p = Process(target=calltomapper,
                    args=(haystackmap, needle, pnum, len_needles,
                          return_emissions))
        jobs.append(p)
        p.start()
        pnum += 1

    for proc in jobs:
        proc.join()

    # flatten return_emissions into a list
    emissions_list = sum(return_emissions.values(), [])

    print "Search Result:"

    result_dict = haystackreducer(emissions_list)

    # Tabulate % match (wav files with 0% match are excluded from the result)
    for key in result_dict:
        print str(key), ": ", (25 - len(str(key))) * " ", str("{0:.2f}".format(
            int(result_dict[key]) / len(needles) * 100)), "% match"

    # Show search time
    timelapse_parallel = time.time() - start_time
    print timelapse_parallel, "seconds"
Ejemplo n.º 3
0
def run():
    
    """ run runs the database search taking three user inputs, the query wav file,
    number of partitions, and number of partition samples"""
    
    good_file = 0
    
    while (good_file == 0):
        query     = raw_input("Submit .wav file to search against database (Example: button.wav): ")
        if (os.path.isfile(query)):
            good_file = 1
            
    #Instantiate Wavsound objects from the wav files
    t_wavsounds = {}
    query_wavsound = wavsound(query)    
    print("\n**Higher number of partitions increases false positive rates, \nwhile lower number of partitions increases false negative rates\n")
    partition = raw_input("Set number of partitions of the query from 1 to " + str(int(len(query_wavsound.get_data())/3))+": ")
    samples   = raw_input("Set number of samples of partitions from 1 to " + partition + " (Recommend < 50): ")
    
    # Database Structure
    haystacks = []
    
    # Database look up directory
    rootdir    = 'db'
    
    for subdir, __, files in os.walk(rootdir):
        for file in files:
            # for debug print (subdir+"/"+file)
            t_wavsounds[subdir+"/"+file] = wavsound(subdir+"/"+file)
            # for debug print(t_wavsounds[subdir+"/"+file])
            haystacks.append(haystack(subdir+"/"+file,t_wavsounds[subdir+"/"+file].get_data()))
            
    query_needle_factory = needlestorage(query_wavsound,int(partition),int(samples))
    
    
    haystackmap = haystackmapper(haystacks)
    
    needles = query_needle_factory.get_needles()
    len_needles = len(needles)
    len_needle = len(needles[0]) # size is the same for all needles
    
    manager = Manager()
    
    # Map processes emit key-value pairs to emissions
    return_emissions = manager.dict()    
    
    # Job is a list of processes
    jobs = []
    
    # Process number
    pnum = 0
    
    print "Number of Needles: ", len(needles)
    
    # Database query time
    start_time = time.time()
    
    #Distribute processes using multiprocessor
    for needle in needles:
        p = Process(target=calltomapper, args=(haystackmap,needle,pnum,len_needles,return_emissions))
        jobs.append(p)
        p.start()
        pnum += 1
    
    for proc in jobs:
        proc.join() 
    
    # flatten return_emissions into a list
    emissions_list = sum(return_emissions.values(),[])
    
    print "Search Result:"    

    result_dict = haystackreducer(emissions_list)
    
    # Tabulate % match (wav files with 0% match are excluded from the result)
    for key in result_dict:
        print str(key),": ",(25-len(str(key)))*" ",str("{0:.2f}".format(int(result_dict[key])/len(needles)*100)),"% match"
    
    # Show search time
    timelapse_parallel = time.time() - start_time   
    print timelapse_parallel, "seconds"