Exemple #1
0
from wavsound import wavsound
"""wavread is a testing module to test the functionality of wavsound"""

button_wavsound = wavsound('db/button.wav')
print(button_wavsound)

beep_wavsound = wavsound('db/buttonresampled.wav')
print(beep_wavsound)
print(len(button_wavsound.get_data()))
print(len(beep_wavsound.get_data()), len(button_wavsound.get_data()))
print(button_wavsound.get_chunk(0, 100))
def test():
    button_wavsound = wavsound("button.wav")

    haystackss = []  # split database into list of smaller database
    keynames = []
    db_size = 300  # Set Database Size
    num_split_db = 2  # Set number of split databases
    size_split_db = int(db_size / num_split_db)

    for i in range(num_split_db):
        haystackss.append([])

    counter = 0
    for i in range(db_size):
        split_db_key = int(counter / size_split_db)
        keynames.append(i)
        haystackss[split_db_key].append(haystack(i, button_wavsound.get_data()))
        counter += 1

    # haystacks.append(haystack("7",[1, 2, 3, 4, 5]))

    button_needle_factory = needlestorage(button_wavsound, 1000, 50)
    emissions = []

    print("USING MAP PROCESS and Manager")

    needles = button_needle_factory.get_needles()
    print(needles[0])

    manager = Manager()
    return_emissions = manager.dict()
    jobs = []
    pnum = 0

    # number of needles not size of each needle
    len_needles = len(needles)
    print("Number of Needles: ", len_needles)
    start_time = time.time()

    for needle in needles:
        for haystacks in haystackss:
            p = Process(
                target=calltomapper, args=(haystacks, needle, pnum, len_needles * num_split_db, return_emissions)
            )
            jobs.append(p)
            p.start()
            pnum += 1
    print(time.time() - start_time)

    for proc in jobs:
        proc.join()  # wait for each process to end completely

    print(time.time() - start_time)
    emissions_list = sum(return_emissions.values(), [])
    print("Reduce Result:")
    print(haystackreducer(emissions_list, keynames))
    print("Done")
    print(time.time() - start_time)

    """
    This is a pool implementation of parallel processing, it has been 
    commented out as it was slower than the Process method
    
    print(button_wavsound)
    print("Utilizing MapReduce Pattern")
    
    pool = Pool(2) # if it is a quad-core machine it can be set to 4
    print(button_needle_factory.get_needles())
    emissions = pool.map(haystackmap.mapper, button_needle_factory.get_needles())
    print(emissions) 
    print(haystackreducer(sum(emissions,[])))    
    
    emissions = []
    """

    """ The algorithm below is a serial method, no optimization """
    """
from wavsound import wavsound

"""wavread is a testing module to test the functionality of wavsound"""

button_wavsound = wavsound('db/button.wav')
print(button_wavsound)

beep_wavsound = wavsound('db/buttonresampled.wav')
print(beep_wavsound)
print (len(button_wavsound.get_data()))
print (len(beep_wavsound.get_data()),len(button_wavsound.get_data()))
print(button_wavsound.get_chunk(0,100))
def run(query, sample_length, samples, rootdir, max_split):
    
    """ run runs the repository search taking three user inputs, the query wav file,
   sample_length, and number of partition samples"""
    
    
    #Instantiate Wavsound objects from the wav files
    
    t_wavsounds = {}
    query_wavsound = wavsound(query)    
    
    # repository Structure
    
    haystackss = []   # split repository into list of smaller repository
    key_names = []

    # repository Spliting Parameters (1 to number of repository entries)
    
    db_size_per_split = 100
    
    for i in range(max_split):
        haystackss.append([])    
    
    # Read Files in the DB
    
    counter = 0
    for subdir, __, files in os.walk(rootdir):
        for file in files:
            key_names.append(subdir+"/"+file)
            split_db_key = min(max_split, int(counter / db_size_per_split))
            t_wavsounds[subdir+"/"+file] = wavsound(subdir+"/"+file)
            haystackss[split_db_key].append(haystack(subdir+"/"+file,t_wavsounds[subdir+"/"+file].get_data()[::16]))
            counter += 1
            
    query_needle_factory = needlestorage(query_wavsound,sample_length,int(samples))
    
    # Get segments of the query data as needles
    needles = query_needle_factory.get_needles()
    #print("...", len(needles), "needles")
    query_needle_factory.clear_needles()
    
    # MAP --------------------------------------------------
    
    # Manager to keep track of all map results
    manager = Manager()
    
    # Map processes emit key-value pairs to emissions
    return_emissions = manager.list()    
    
    # Job is a list of processes
    jobs = []
    
    # Process number
    pnum = 0
            
    #Distribute processes using multiprocessor
    len_needles = len(needles)
    for needle in needles:
        for haystacks in haystackss:
            if haystacks != []:
                #print(len_needles)
                p = Process(target=calltomapper, args=(haystacks,needle,pnum,len_needles*len(haystackss),return_emissions))
                jobs.append(p)
                p.start()
                pnum += 1
    
    for proc in jobs:
        proc.join() 
        
    # SHUFFLE/REDUCE ------------------------------------------
    
    # Job is a list of processes
    jobs = []     
    
    # Manager to keep track of all map results
    manager_2 = Manager()    
    result_dict = manager_2.dict()
    
    for key in key_names:
        key_list = [1 for x in return_emissions if x[0] == key]
        print (key, key_list)
        q = Process(target=calltoreducer, args=(key_list, key, result_dict))
        jobs.append(q)
        q.start()
        
    for proc in jobs:
        proc.join()         
    
    result_lst = []
    print(len(needles), "is length of needles")
    if len(result_dict.items()) != 0:
        for key, value in sorted(result_dict.items(), key=lambda pair: pair[1], reverse=True):
            if value > 0:
                result_lst.append([str(key), str((int(value)/len(needles)*100))])
            
    needles = []
    return result_lst
def test():
    button_wavsound = wavsound('button.wav')

    haystackss = []  # split database into list of smaller database
    keynames = []
    db_size = 300  # Set Database Size
    num_split_db = 2  # Set number of split databases
    size_split_db = int(db_size / num_split_db)

    for i in range(num_split_db):
        haystackss.append([])

    counter = 0
    for i in range(db_size):
        split_db_key = int(counter / size_split_db)
        keynames.append(i)
        haystackss[split_db_key].append(haystack(i,
                                                 button_wavsound.get_data()))
        counter += 1

    #haystacks.append(haystack("7",[1, 2, 3, 4, 5]))

    button_needle_factory = needlestorage(button_wavsound, 1000, 50)
    emissions = []

    print("USING MAP PROCESS and Manager")

    needles = button_needle_factory.get_needles()
    print(needles[0])

    manager = Manager()
    return_emissions = manager.dict()
    jobs = []
    pnum = 0

    # number of needles not size of each needle
    len_needles = len(needles)
    print("Number of Needles: ", len_needles)
    start_time = time.time()

    for needle in needles:
        for haystacks in haystackss:
            p = Process(target=calltomapper,
                        args=(haystacks, needle, pnum,
                              len_needles * num_split_db, return_emissions))
            jobs.append(p)
            p.start()
            pnum += 1
    print(time.time() - start_time)

    for proc in jobs:
        proc.join()  # wait for each process to end completely

    print(time.time() - start_time)
    emissions_list = sum(return_emissions.values(), [])
    print("Reduce Result:")
    print(haystackreducer(emissions_list, keynames))
    print("Done")
    print(time.time() - start_time)
    """
    This is a pool implementation of parallel processing, it has been 
    commented out as it was slower than the Process method
    
    print(button_wavsound)
    print("Utilizing MapReduce Pattern")
    
    pool = Pool(2) # if it is a quad-core machine it can be set to 4
    print(button_needle_factory.get_needles())
    emissions = pool.map(haystackmap.mapper, button_needle_factory.get_needles())
    print(emissions) 
    print(haystackreducer(sum(emissions,[])))    
    
    emissions = []
    """
    """ The algorithm below is a serial method, no optimization """
    """
def run():
    """ run runs the database search taking three user inputs, the query wav file,
    number of partitions, and number of partition samples"""

    good_file = 0

    while (good_file == 0):
        query = raw_input(
            "Submit .wav file to search against database (Example: button.wav): "
        )
        if (os.path.isfile(query)):
            good_file = 1

    #Instantiate Wavsound objects from the wav files
    t_wavsounds = {}
    query_wavsound = wavsound(query)
    print(
        "\n**Higher number of partitions increases false positive rates, \nwhile lower number of partitions increases false negative rates\n"
    )
    partition = raw_input("Set number of partitions of the query from 1 to " +
                          str(int(len(query_wavsound.get_data()) / 3)) + ": ")
    samples = raw_input("Set number of samples of partitions from 1 to " +
                        partition + " (Recommend < 50): ")

    # Database Structure
    haystacks = []

    # Database look up directory
    rootdir = 'db'

    for subdir, __, files in os.walk(rootdir):
        for file in files:
            # for debug print (subdir+"/"+file)
            t_wavsounds[subdir + "/" + file] = wavsound(subdir + "/" + file)
            # for debug print(t_wavsounds[subdir+"/"+file])
            haystacks.append(
                haystack(subdir + "/" + file,
                         t_wavsounds[subdir + "/" + file].get_data()))

    query_needle_factory = needlestorage(query_wavsound, int(partition),
                                         int(samples))

    haystackmap = haystackmapper(haystacks)

    needles = query_needle_factory.get_needles()
    len_needles = len(needles)
    len_needle = len(needles[0])  # size is the same for all needles

    manager = Manager()

    # Map processes emit key-value pairs to emissions
    return_emissions = manager.dict()

    # Job is a list of processes
    jobs = []

    # Process number
    pnum = 0

    print "Number of Needles: ", len(needles)

    # Database query time
    start_time = time.time()

    #Distribute processes using multiprocessor
    for needle in needles:
        p = Process(target=calltomapper,
                    args=(haystackmap, needle, pnum, len_needles,
                          return_emissions))
        jobs.append(p)
        p.start()
        pnum += 1

    for proc in jobs:
        proc.join()

    # flatten return_emissions into a list
    emissions_list = sum(return_emissions.values(), [])

    print "Search Result:"

    result_dict = haystackreducer(emissions_list)

    # Tabulate % match (wav files with 0% match are excluded from the result)
    for key in result_dict:
        print str(key), ": ", (25 - len(str(key))) * " ", str("{0:.2f}".format(
            int(result_dict[key]) / len(needles) * 100)), "% match"

    # Show search time
    timelapse_parallel = time.time() - start_time
    print timelapse_parallel, "seconds"
Exemple #7
0
def run(query, sample_length, samples, rootdir, max_split):
    """ run runs the repository search taking three user inputs, the query wav file,
   sample_length, and number of partition samples"""

    #Instantiate Wavsound objects from the wav files

    t_wavsounds = {}
    query_wavsound = wavsound(query)

    # repository Structure

    haystackss = []  # split repository into list of smaller repository
    key_names = []

    # repository Spliting Parameters (1 to number of repository entries)

    db_size_per_split = 100

    for i in range(max_split):
        haystackss.append([])

    # Read Files in the DB

    counter = 0
    for subdir, __, files in os.walk(rootdir):
        for file in files:
            key_names.append(subdir + "/" + file)
            split_db_key = min(max_split, int(counter / db_size_per_split))
            t_wavsounds[subdir + "/" + file] = wavsound(subdir + "/" + file)
            haystackss[split_db_key].append(
                haystack(subdir + "/" + file,
                         t_wavsounds[subdir + "/" + file].get_data()[::16]))
            counter += 1

    query_needle_factory = needlestorage(query_wavsound, sample_length,
                                         int(samples))

    # Get segments of the query data as needles
    needles = query_needle_factory.get_needles()
    #print("...", len(needles), "needles")
    query_needle_factory.clear_needles()

    # MAP --------------------------------------------------

    # Manager to keep track of all map results
    manager = Manager()

    # Map processes emit key-value pairs to emissions
    return_emissions = manager.list()

    # Job is a list of processes
    jobs = []

    # Process number
    pnum = 0

    #Distribute processes using multiprocessor
    len_needles = len(needles)
    for needle in needles:
        for haystacks in haystackss:
            if haystacks != []:
                #print(len_needles)
                p = Process(target=calltomapper,
                            args=(haystacks, needle, pnum,
                                  len_needles * len(haystackss),
                                  return_emissions))
                jobs.append(p)
                p.start()
                pnum += 1

    for proc in jobs:
        proc.join()

    # SHUFFLE/REDUCE ------------------------------------------

    # Job is a list of processes
    jobs = []

    # Manager to keep track of all map results
    manager_2 = Manager()
    result_dict = manager_2.dict()

    for key in key_names:
        key_list = [1 for x in return_emissions if x[0] == key]
        print(key, key_list)
        q = Process(target=calltoreducer, args=(key_list, key, result_dict))
        jobs.append(q)
        q.start()

    for proc in jobs:
        proc.join()

    result_lst = []
    print(len(needles), "is length of needles")
    if len(result_dict.items()) != 0:
        for key, value in sorted(result_dict.items(),
                                 key=lambda pair: pair[1],
                                 reverse=True):
            if value > 0:
                result_lst.append(
                    [str(key),
                     str((int(value) / len(needles) * 100))])

    needles = []
    return result_lst
def run():
    
    """ run runs the database search taking three user inputs, the query wav file,
    number of partitions, and number of partition samples"""
    
    good_file = 0
    
    while (good_file == 0):
        query     = raw_input("Submit .wav file to search against database (Example: button.wav): ")
        if (os.path.isfile(query)):
            good_file = 1
            
    #Instantiate Wavsound objects from the wav files
    t_wavsounds = {}
    query_wavsound = wavsound(query)    
    print("\n**Higher number of partitions increases false positive rates, \nwhile lower number of partitions increases false negative rates\n")
    partition = raw_input("Set number of partitions of the query from 1 to " + str(int(len(query_wavsound.get_data())/3))+": ")
    samples   = raw_input("Set number of samples of partitions from 1 to " + partition + " (Recommend < 50): ")
    
    # Database Structure
    haystacks = []
    
    # Database look up directory
    rootdir    = 'db'
    
    for subdir, __, files in os.walk(rootdir):
        for file in files:
            # for debug print (subdir+"/"+file)
            t_wavsounds[subdir+"/"+file] = wavsound(subdir+"/"+file)
            # for debug print(t_wavsounds[subdir+"/"+file])
            haystacks.append(haystack(subdir+"/"+file,t_wavsounds[subdir+"/"+file].get_data()))
            
    query_needle_factory = needlestorage(query_wavsound,int(partition),int(samples))
    
    
    haystackmap = haystackmapper(haystacks)
    
    needles = query_needle_factory.get_needles()
    len_needles = len(needles)
    len_needle = len(needles[0]) # size is the same for all needles
    
    manager = Manager()
    
    # Map processes emit key-value pairs to emissions
    return_emissions = manager.dict()    
    
    # Job is a list of processes
    jobs = []
    
    # Process number
    pnum = 0
    
    print "Number of Needles: ", len(needles)
    
    # Database query time
    start_time = time.time()
    
    #Distribute processes using multiprocessor
    for needle in needles:
        p = Process(target=calltomapper, args=(haystackmap,needle,pnum,len_needles,return_emissions))
        jobs.append(p)
        p.start()
        pnum += 1
    
    for proc in jobs:
        proc.join() 
    
    # flatten return_emissions into a list
    emissions_list = sum(return_emissions.values(),[])
    
    print "Search Result:"    

    result_dict = haystackreducer(emissions_list)
    
    # Tabulate % match (wav files with 0% match are excluded from the result)
    for key in result_dict:
        print str(key),": ",(25-len(str(key)))*" ",str("{0:.2f}".format(int(result_dict[key])/len(needles)*100)),"% match"
    
    # Show search time
    timelapse_parallel = time.time() - start_time   
    print timelapse_parallel, "seconds"