def testAdd(self): # Simple add two elements ht = hash_table.HashTable(5) array = [[None], [None], [None], [None], [None]] ht.add('Bob', '567-8888') ht.add('Ankit', '293-8625') if ht._get_hash('Bob') != ht._get_hash('Ankit'): array[ht._get_hash('Bob')] = [['Bob', '567-8888']] array[ht._get_hash('Ankit')] = [['Ankit', '293-8625']] else: array[ht._get_hash('Bob')] = [['Bob', '567-8888'], ['Ankit', '293-8625']] self.assertEqual(ht.table, array) # Add two elements with substitution value ht = hash_table.HashTable(5) array = [[None], [None], [None], [None], [None]] ht.add('Bob', '567-8888') ht.add('Ankit', '293-8625') ht.add('Ankit', '293-6753') if ht._get_hash('Bob') != ht._get_hash('Ankit'): array[ht._get_hash('Bob')] = [['Bob', '567-8888']] array[ht._get_hash('Ankit')] = [['Ankit', '293-6753']] else: array[ht._get_hash('Bob')] = [['Bob', '567-8888'], ['Ankit', '293-6753']] self.assertEqual(ht.table, array)
def testCreate(self): ht = hash_table.HashTable(5) self.assertEqual(ht._size, 5) self.assertEqual(ht.table, [[None], [None], [None], [None], [None]]) ht = hash_table.HashTable(8) self.assertEqual(ht._size, 8) self.assertEqual( ht.table, [[None], [None], [None], [None], [None], [None], [None], [None]])
def setUp(self): ''' Test the HashTable constructor and prepare several tables for other tests. ''' # If the algorithm works, tables could be any # positive integer length. self.size_one_hashtable = hash_table.HashTable(1) self.size_three_hashtable = hash_table.HashTable(3) self.size_ten_thousand_hashtable = hash_table.HashTable(10000)
def create_blog_post(user_id): # takes in the data sent as part of the request body and populates the BlogPost table data = request.get_json() # this fetches the first user with the given user id user = User.query.filter_by(id=user_id).first() # this is to check if that particular user exists or not if not user: return jsonify({"message": "user does not exist"}), 400 else: # creating an object of HashTable to access all the methods belonging to hash table ht = hash_table.HashTable(10) # creating key-value pairs for all the data belonging to the blog post ht.add_key_value("title", data["title"]) ht.add_key_value("body", data["body"]) ht.add_key_value("date", now) ht.add_key_value("user_id", user_id) # this is getting the data ready for adding to the database new_blog_post = BlogPost(title=ht.get_value("title"), body=ht.get_value("body"), date=ht.get_value("date"), user_id=ht.get_value("user_id")) # this adds the new blog post to the database db.session.add(new_blog_post) db.session.commit() return jsonify({"message": "blog post created"}), 200
def test_hash(self): table = hash_table.HashTable() # hash table 객체 생성 table.__setitem__("one", 1) self.assertEqual(table.size, 1) table.push("two", 2) self.assertEqual(table.size, 2) table.push("three", 3) self.assertEqual(table.size, 3) table.push("four", 4) self.assertEqual(table.size, 4) table["four"] = 4 self.assertEqual(table.size, 4) table["five"] = 5 self.assertEqual(table.size, 5) table.push("six", 6) self.assertEqual(table.size, 6) table.push("seven", 7) self.assertEqual(table.size, 7) table.push("eight", 8) self.assertEqual(table.size, 8) self.assertEqual(table.is_empty(), False) self.assertEqual(table.__getitem__("two"), 2) table["one"] = 123 self.assertEqual(table.size, 8) table.pop("three") self.assertEqual(table.size, 7) table.pop("four") self.assertEqual(table.size, 6) table.pop("five") self.assertEqual(table.size, 5)
def main(): h = hash_table.HashTable() # The number of items in an empty hash_table test(len(h), 0) # Adding a key value pair to the hash table h.put(3, 'Brandon') h.put(6, 'Odiwuor') # The number of items in a hash_table with two items test(len(h), 2) # Testign the __contains__(self, key) method test(3 in h, True) test(5 in h, False) # Getting a value from the hash_table using a key test(h.get(6), 'Odiwuor') test(h[3], 'Brandon') # Changing a values a ssociated with a particular key h[3] = 'Baker' test(h[3], 'Baker') # Deleting a key-value pair from the hash_table del h[6] test(len(h), 1)
def match_pitch(filename, dbasename, density=18, radius=1, step=1 / 20): matcher = audfprint_match.Matcher() hash_tab = hash_table.HashTable(dbasename) analyzer = audfprint_analyze.Analyzer() analyzer.density = density bestScore = 0 bestMatch = "NOMATCH" song = AudioSegment.from_mp3(filename) aa = AudioAugmentation() for i in range(-radius, radius + 1): # Speed up/slow down the sample if i != 0: octave = i * step shifted, _ = aa.pitch_shift(song, octave) shifted.export("tmp.mp3", format="mp3") matches, _, _ = matcher.match_file( analyzer, hash_tab, "tmp.mp3" if not i == 0 else filename) if len(matches) == 0: continue songid, score = matches[0][:2] songname = hash_tab.names[songid] if score > bestScore: bestScore = score bestMatch = songname return bestMatch
def create(event, context): s3.Bucket(BUCKET_NAME).download_file('_test-fprint.afpt', '/tmp/_test-fprint.afpt') analyzer = audfprint_analyze.Analyzer() analyzer.n_fft = 512 analyzer.n_hop = analyzer.n_fft / 2 analyzer.shifts = 1 # analyzer.exact_count = True analyzer.density = 20.0 analyzer.target_sr = 11025 analyzer.verbose = False # hashbits=20, depth=100, maxtime=16384 hash_tab = hash_table.HashTable(hashbits=20, depth=100, maxtime=16384) hash_tab.params['samplerate'] = analyzer.target_sr analyzer.ingest(hash_tab, '/tmp/_test-fprint.afpt') if hash_tab and hash_tab.dirty: hash_tab.save('/tmp/_test-db.pklz') s3.Bucket(BUCKET_NAME).upload_file('/tmp/_test-db.pklz', '_test-db.pklz') body = { "message": "Go Serverless v1.0! Your function executed successfully!", "input": event } response = {"statusCode": 200, "body": json.dumps(body)} return response
def local_tester(): test_fn = '/Users/dpwe/Downloads/carol11k.wav' test_ht = hash_table.HashTable() test_analyzer = Analyzer() test_analyzer.ingest(test_ht, test_fn) test_ht.save('httest.pklz')
def test_search(self): test_table = ht.HashTable(13) test_table.put(10, 30) test_table.put(1, 20) # Normal search self.assertEqual(test_table.search(10), 30) # Non-exist key search self.assertEqual(test_table.search(2), None)
def location_hashmap(): """This returns the location and address hashmap Space Complexity: O(n) Time Complexity: O(n) Returns: hash_table.HashTable: location hashtable, address hashtable """ name_hash = hash_table.HashTable() address_hash = hash_table.HashTable() full_address = parse_distance_name_data() for i, j in enumerate(full_address): name_hash.add(j[1], i) address_hash.add(j[2], i) return name_hash, address_hash
def do_cmd(cmd, analyzer, hash_tab, filename_iter, matcher, outdir, type, report, skip_existing=False, strip_prefix=None): """ Breaks out the core part of running the command. This is just the single-core versions. """ if cmd == 'merge' or cmd == 'newmerge': # files are other hash tables, merge them in for filename in filename_iter: hash_tab2 = hash_table.HashTable(filename) if "samplerate" in hash_tab.params: assert hash_tab.params["samplerate"] == hash_tab2.params["samplerate"] else: # "newmerge" fails to setup the samplerate param hash_tab.params["samplerate"] = hash_tab2.params["samplerate"] hash_tab.merge(hash_tab2) elif cmd == 'precompute': # just precompute fingerprints, single core for filename in filename_iter: report(file_precompute(analyzer, filename, outdir, type, skip_existing=skip_existing, strip_prefix=strip_prefix)) elif cmd == 'match': msgs = [] # Running query, single-core mode for num, filename in enumerate(filename_iter): msgs.append(matcher.file_match_to_msgs(analyzer, hash_tab, filename, num)) #report(msgs) return msgs elif cmd == 'new' or cmd == 'add': # Adding files tothashes = 0 ix = 0 for filename in filename_iter: report([time.ctime() + " ingesting #" + str(ix) + ": " + filename + " ..."]) dur, nhash = analyzer.ingest(hash_tab, filename) tothashes += nhash ix += 1 report(["Added " + str(tothashes) + " hashes " + "(%.1f" % (tothashes/float(analyzer.soundfiletotaldur)) + " hashes/sec)"]) elif cmd == 'remove': # Removing files from hash table. for filename in filename_iter: hash_tab.remove(filename) elif cmd == 'list': hash_tab.list(lambda x: report([x])) else: raise ValueError("unrecognized command: "+cmd)
def loadHashTable(words, option): hashSize = int(input("Insert the size of the hash: ")) hesh = ht.HashTable(hashSize) # Inserting differentely depending on the parameter if option == 'linear': for key, word in words.items(): hesh.insert(word, key, 'linear') elif option == 'quadratic': for key, word in words.items(): hesh.insert(word, key, 'quadratic') return hesh
def match(self): matcher = audfprint_match.Matcher() matcher.find_time_range = True matcher.verbose = 1 matcher.max_returns = 100 analyzer = audfprint_analyze.Analyzer() analyzer.n_fft = 512 analyzer.n_hop = analyzer.n_fft / 2 analyzer.shifts = 1 # analyzer.exact_count = True analyzer.density = 20.0 analyzer.target_sr = 11025 hash_tab = hash_table.HashTable("./samples.pklz") hash_tab.params['samplerate'] = analyzer.target_sr qry = "./Samples/viral.afpt" rslts, dur, nhash = matcher.match_file(analyzer, hash_tab, "./Samples/viral.afpt", 0) t_hop = analyzer.n_hop / float(analyzer.target_sr) qrymsg = qry + (' %.1f ' % dur) + "sec " + str(nhash) + " raw hashes" msgrslt = [] if len(rslts) == 0: nhashaligned = 0 msgrslt.append("NOMATCH " + qrymsg) else: for (tophitid, nhashaligned, aligntime, nhashraw, rank, min_time, max_time) in rslts: # msg = ("Matched {:6.1f} s starting at {:6.1f} s in {:s}" # " to time {:6.1f} s in {:s}").format( # (max_time - min_time) * t_hop, min_time * t_hop, qry, # (min_time + aligntime) * t_hop, hash_tab.names[tophitid]) msg = ( "Matched {:6.1f} s starting at {:6.1f} s in {:s}" " to time {:6.1f} s in {:n}; max {:6.1f} min {:6.1f} align {:6.1f} hop {:6.1f}" ).format( (max_time - min_time) * t_hop, min_time * t_hop, qry, (min_time + aligntime) * t_hop, tophitid, #), max_time * t_hop, min_time * t_hop, aligntime * t_hop, t_hop) msgrslt.append(msg) dumper.dump(msgrslt)
def match(event, context): s3.Bucket(BUCKET_NAME).download_file('_test-fprint.afpt', '/tmp/_test-fprint.afpt') s3.Bucket(BUCKET_NAME).download_file('_test-db.pklz', '/tmp/_test-db.pklz') qry = '/tmp/_test-fprint.afpt' hashFile = '/tmp/_test-db.pklz' matcher = audfprint_match.Matcher() matcher.find_time_range = True matcher.verbose = False matcher.max_returns = 100 matcher.exact_count = True matcher.max_alignments_per_id = 20 analyzer = audfprint_analyze.Analyzer() analyzer.n_fft = 512 analyzer.n_hop = analyzer.n_fft / 2 analyzer.shifts = 1 # analyzer.exact_count = True analyzer.density = 20.0 analyzer.target_sr = 11025 analyzer.verbose = False hash_tab = hash_table.HashTable(hashFile) hash_tab.params['samplerate'] = analyzer.target_sr rslts, dur, nhash = matcher.match_file(analyzer, hash_tab, qry, 0) t_hop = analyzer.n_hop / float(analyzer.target_sr) qrymsg = qry + (' %.1f ' % dur) + "sec " + str(nhash) + " raw hashes" # print "duration,start,from,time,source,sourceId,nhashaligned,aligntime,nhashraw,rank,min_time,max_time, t_hop" matches = [] if len(rslts) == 0: nhashaligned = 0 else: for (tophitid, nhashaligned, aligntime, nhashraw, rank, min_time, max_time) in rslts: msg = ( "{:f},{:f},{:s},{:f},{:s},{:n},{:n},{:n},{:n},{:n},{:n},{:n},{:f}" ).format((max_time - min_time) * t_hop, min_time * t_hop, qry, (min_time + aligntime) * t_hop, hash_tab.names[tophitid], tophitid, nhashaligned, aligntime, nhashraw, rank, min_time, max_time, t_hop) matches.append(msg) response = {"statusCode": 200, "body": json.dumps(matches)} return response
def make_ht_from_list(analyzer, filelist, hashbits, depth, maxtime, pipe=None): """ Populate a hash table from a list, used as target for multiprocess division. pipe is a pipe over which to push back the result, else return it """ # Create new ht instance ht = hash_table.HashTable(hashbits=hashbits, depth=depth, maxtime=maxtime) # Add in the files for filename in filelist: hashes = analyzer.wavfile2hashes(filename) ht.store(filename, hashes) # Pass back to caller if pipe: pipe.send(ht) else: return ht
def glob2hashtable(pattern, density=None): """ Build a hash table from the files matching a glob pattern """ ht = hash_table.HashTable() filelist = glob.glob(pattern) initticks = time.clock() totdur = 0.0 tothashes = 0 for ix, file in enumerate(filelist): print time.ctime(), "ingesting #", ix, ":", file, "..." dur, nhash = ingest(ht, file, density) totdur += dur tothashes += nhash elapsedtime = time.clock() - initticks print "Added", tothashes, "(", tothashes / float( totdur), "hashes/sec) at ", elapsedtime / totdur, "x RT" return ht
def regular_matching(): #get find_peaks from analyze analyzer = audfprint_analyze.Analyzer() hash_tab = hash_table.HashTable('fpdbase.pklz') matcher = audfprint_match.Matcher() sampling_seconds = 10 sampling_interval = 15 prev_resultID = [] count = 0 while True: start = time.time() twoSecondArray = different_record(sampling_seconds) peakLists = analyzer.find_peaks(twoSecondArray, 11025) landmarkLists = analyzer.peaks2landmarks(peakLists) hashesLists = audfprint_analyze.landmarks2hashes(landmarkLists) print(hashesLists) hashes_hashes = (((hashesLists[:, 0].astype(np.uint64)) << 32) + hashesLists[:, 1].astype(np.uint64)) unique_hash_hash = np.sort(np.unique(hashes_hashes)) unique_hashes = np.hstack([ (unique_hash_hash >> 32)[:, np.newaxis], (unique_hash_hash & ((1 << 32) - 1))[:, np.newaxis] ]).astype(np.int32) hashes = unique_hashes #now the matching # for num, filename in enumerate(filename_iter): # # count += 1 # msgs = matcher.file_match_to_msgs(analyzer, hash_tab, filename, num) # report(msgs) # file_match_to_msgs(self, analyzer, ht, qry, number=None) # print(matcher.file_match_to_msgs(analyzer, hash_tab, "Some qry name")) # rslts, dur, nhash = match_file(matcher, analyzer, hash_tab, "some query", hashesLists) message, results = file_match_to_msgs(matcher, analyzer, hash_tab, "FROM MICROPHONE", hashes) print(sampling_seconds, sampling_interval) count += 1 end = time.time() - start print(end) time.sleep(sampling_interval - (end - sampling_seconds)) print(count)
def create(event, context): day = event.get('date') if not day: day = (date.today() - timedelta(2)).strftime('%Y%m%d') channel = event['channel'] analyzer = audfprint_analyze.Analyzer() analyzer.n_fft = 512 analyzer.n_hop = analyzer.n_fft // 2 analyzer.shifts = 1 # analyzer.exact_count = True analyzer.density = 20.0 analyzer.target_sr = 11025 analyzer.verbose = False # hashbits=20, depth=100, maxtime=16384 # maxtime=262144 hash_tab = hash_table.HashTable(hashbits=20, depth=100, maxtime=262144) hash_tab.params['samplerate'] = analyzer.target_sr fingerprints = s3client.list_objects_v2(Bucket=BUCKET_NAME, Prefix='tva/{}/{}/'.format( day, channel))['Contents'] for fingerprint in fingerprints: s3.Bucket(BUCKET_NAME).download_file( fingerprint['Key'], '/tmp/{}'.format(fingerprint['Key'].split('/').pop())) analyzer.ingest(hash_tab, '/tmp/{}'.format(fingerprint['Key'].split('/').pop())) os.remove('/tmp/{}'.format(fingerprint['Key'].split('/').pop())) if hash_tab and hash_tab.dirty: hash_tab.save('/tmp/{}-{}.pklz'.format(channel, day)) s3.Bucket(BUCKET_NAME).upload_file( '/tmp/{}-{}.pklz'.format(channel, day), 'hash/{}/{}-{}.pklz'.format(day, channel, day)) os.remove('/tmp/{}-{}.pklz'.format(channel, day)) body = {"input": event, "fingerprints": len(fingerprints)} response = {"statusCode": 200, "body": json.dumps(body)} return response
def test_put(self): test_table = ht.HashTable(13) # Normal put test_table.put(10, 20) self.assertEqual( test_table.table, [[], [], [], [], [], [], [], [], [], [], [(10, 20)], [], []]) # Replace put test_table.put(10, 30) self.assertEqual( test_table.table, [[], [], [], [], [], [], [], [], [], [], [(10, 30)], [], []]) # Chaining put test_table.put(23, 20) self.assertEqual( test_table.table, [[], [], [], [], [], [], [], [], [], [], [(10, 30), (23, 20)], [], []])
def package_hashmap(): """This returns the package hashmap Space Complexity: O(n) Time Complexity: O(n) Returns: hash_table.HashTable: packages hashtable """ package_hash = hash_table.HashTable() packages = parse_packages() for i in packages: package_hash[i[0]] = package.Package(*i) return package_hash
def create_post(user_id): data = request.get_json() user = User.query.filter_by(id=user_id).first() if not user: return jsonify({'meesage': "user doesn't exist"}), 400 ht = hash_table.HashTable(10) ht.add_key_value("title", data["title"]) ht.add_key_value("body", data["body"]) ht.add_key_value("date", now) ht.add_key_value("user_id", user_id) new_post = BlogPost(title=ht.get_value('title'), body=ht.get_value('body'), date=ht.get_value('date'), user_id=ht.get_value('user_id')) db.session.add(new_post) db.session.commit() return jsonify({'meesage': "a new post created"}), 200
def test_on_a_long_list_of_words(self): # This section heavily styled after: # https://github.com/jbbrokaw/ # data-structures/blob/master/test_hashtable.py word_list_location = '/usr/share/dict/words' # Before beginning to iterate through a file using a while loop, # initialize each_word so that it won't fail immediately: each_word = "Non-null value." # First, figure out the ideal size for maximizing the performance # of the resulting hash table. # Figure out how many words are there: word_count = 0 with io.open(word_list_location) as file_full_of_words: # Terminate at end of file: while each_word != "": each_word = file_full_of_words.readline().strip() word_count += 1 # According to The Powers That Be, we must now multiply the expected # size of the hash table by one point six to divine the value of # the ideal size of the hash table for performance purposes. # Note that HashTable size must be integerized BEFORE construction. calculated_hashtable_size = int(word_count * 1.6) big_huge_hashtable = hash_table.HashTable(calculated_hashtable_size) with io.open(word_list_location) as file_full_of_words: while each_word != "": each_word = file_full_of_words.readline().strip() # Make keys and values identical to ease testing this monster: big_huge_hashtable.set(each_word, each_word) # Now that the table is compiled, ensure the hasher # relates words to the file as expected. with io.open(word_list_location) as file_full_of_words: while each_word != "": each_word = file_full_of_words.readline().strip() assert big_huge_hashtable.get(each_word) == each_word
def create_blog_post(user_id): data = request.get_json() user = User.query.filter_by(id=user_id).first() if not user: return jsonify({"message": "User does not exist!"}), 400 ht = hash_table.HashTable(10) ht.add_key_value("title", data["title"]) ht.add_key_value("body", data["body"]) ht.add_key_value("date", now) ht.add_key_value("user_id", user_id) new_blog_post = BlogPost(title=ht.get_value("title"), body=ht.get_value("body"), date=ht.get_value("date"), user_id=ht.get_value("user_id")) db.session.add(new_blog_post) db.session.commit() return jsonify({"message": "new blog post created"}), 200
def test_delete(self): test_table = ht.HashTable(13) test_table.put(10, 20) self.assertEqual( test_table.table, [[], [], [], [], [], [], [], [], [], [], [(10, 20)], [], []]) test_table.put(3, 7) self.assertEqual( test_table.table, [[], [], [], [(3, 7)], [], [], [], [], [], [], [(10, 20)], [], []]) # Normal delete test_table.delete(10) self.assertEqual( test_table.table, [[], [], [], [(3, 7)], [], [], [], [], [], [], [], [], []]) # Non-exist key delete test_table.delete(2) self.assertEqual( test_table.table, [[], [], [], [(3, 7)], [], [], [], [], [], [], [], [], []])
def glob2hashtable(pattern, density=20.0): """ Build a hash table from the files matching a glob pattern """ global g2h_analyzer if g2h_analyzer is None: g2h_analyzer = Analyzer(density=density) ht = hash_table.HashTable() filelist = glob.glob(pattern) initticks = time.clock() totdur = 0.0 tothashes = 0 for ix, file_ in enumerate(filelist): #print(time.ctime(), "ingesting #", ix, ":", file_, "...") dur, nhash = g2h_analyzer.ingest(ht, file_) totdur += dur tothashes += nhash elapsedtime = time.clock() - initticks #print("Added", tothashes, "(", tothashes / totdur, "hashes/sec) at ", #elapsedtime / totdur, "x RT") return ht
def fingerprint_filename(filename, dbasename): matcher = audfprint_match.Matcher() matcher.window = 2 matcher.threshcount = 5 matcher.max_returns = 1 matcher.search_depth = 100 matcher.sort_by_time = False matcher.exact_count = False matcher.illustrate = False matcher.illustrate_hpf = False matcher.verbose = 1 matcher.find_time_range = False matcher.time_quantile = 0.05 analyzer = audfprint_analyze.Analyzer() # Read parameters from command line/docopts analyzer.density = 20 analyzer.maxpksperframe = 5 analyzer.maxpairsperpeak = 3 analyzer.f_sd = 30.0 analyzer.shifts = 0 # fixed - 512 pt FFT with 256 pt hop at 11025 Hz analyzer.target_sr = 11025 analyzer.n_fft = 512 analyzer.n_hop = analyzer.n_fft // 2 # set default value for shifts depending on mode if analyzer.shifts == 0: # Default shift is 4 for match, otherwise 1 analyzer.shifts = 4 analyzer.fail_on_error = not False hashtable = hash_table.HashTable(dbasename) results, dur, nhash = matcher.match_file(analyzer, hashtable, filename, 0) try: (first_hit, nhashaligned, aligntime, nhashraw, rank, min_time, max_time) = results[0] filename = hashtable.names[first_hit] return nhashraw, filename except: return None, None
def create_blog_post(user_id): """Create new blog post and add it to database. Return the success message if the operation is done. Args: user_id (int): user id Returns: JSON: success message """ data = request.get_json() # Check if the user is in the database user = User.query.filter_by(id=user_id).first() if not user: return jsonify({"message": "user does not exist!"}), 400 # Create an instance of a HashTable ht = hash_table.HashTable(10) # Create a blog post ht.add_key_value("title", data["title"]) ht.add_key_value("body", data["body"]) ht.add_key_value("date", now) ht.add_key_value("user_id", user_id) # Add a blog post to the database new_blog_post = BlogPost( title=ht.get_value("title"), body=ht.get_value("body"), date=ht.get_value("date"), user_id=ht.get_value("user_id"), ) db.session.add(new_blog_post) db.session.commit() return jsonify({"message": "new blog post created"}), 200
import hash_table import numpy as np import csv import time import sys # The first value of the hash_table function is size, which defines the length of m # The method specifies which hash function will be used to get a hash value for selecting a slot. # The different methods are: DivisionMethod, MultiplicationMethod and UniversalMethod. # The collisionType variable is which technique to use for handling collisions. # The different options are: Chaining and OpenAddressing. # The probeType variable defines the probe type, when using open addressing. # The different operations are: Linear, Quadratic, and DoubleHashing. hash_table = hash_table.HashTable(20000, method="MultiplicationMethod", collisionType="Chaining", probeType="Linear") # The two datasets are Video_Games.csv and disney-voice-actors.csv # To change the dataset, then in line 18 define the dataset by writing one of the two datasets mentioned the line above. with open('datasets/Video_Games.csv', mode='r', encoding="utf8") as csv_file: csv_reader = csv.DictReader(csv_file) line_count = 1 startime = time.time() for row in csv_reader: if row is not None: if line_count == 0: line_count += 1 # The key is set to provide a random key value between 0 and 3500 for each Name element. # The value variable when using the Video_games.csv files should be set to Name. # The value variable when using the disney-voice-actors.csv files should be set to voice-actor.
def main(argv): """ Main routine for the command-line interface to audfprint """ # Other globals set from command line args = docopt.docopt(USAGE, version=__version__, argv=argv[1:]) # Figure which command was chosen poss_cmds = [ 'new', 'add', 'precompute', 'merge', 'newmerge', 'match', 'list', 'remove' ] cmdlist = [cmdname for cmdname in poss_cmds if args[cmdname]] if len(cmdlist) != 1: raise ValueError("must specify exactly one command") # The actual command as a str cmd = cmdlist[0] # Setup output function report = setup_reporter(args) # Keep track of wall time initticks = time.clock() # Command line sanity. if args["--maxtimebits"]: args["--maxtimebits"] = int(args["--maxtimebits"]) else: args["--maxtimebits"] = hash_table._bitsfor(int(args["--maxtime"])) # Setup the analyzer if we're using one (i.e., unless "merge") analyzer = setup_analyzer(args) if not (cmd is "merge" or cmd is "newmerge" or cmd is "list" or cmd is "remove") else None precomp_type = 'hashes' # Set up the hash table, if we're using one (i.e., unless "precompute") if cmd is not "precompute": # For everything other than precompute, we need a database name # Check we have one dbasename = args['--dbase'] if not dbasename: raise ValueError("dbase name must be provided if not precompute") if cmd == "new" or cmd == "newmerge": # Check that the output directory can be created before we start ensure_dir(os.path.split(dbasename)[0]) # Create a new hash table hash_tab = hash_table.HashTable( hashbits=int(args['--hashbits']), depth=int(args['--bucketsize']), maxtime=(1 << int(args['--maxtimebits']))) # Set its samplerate param if analyzer: hash_tab.params['samplerate'] = analyzer.target_sr else: # Load existing hash table file (add, match, merge) if args['--verbose']: report([time.ctime() + " Reading hash table " + dbasename]) hash_tab = hash_table.HashTable(dbasename) if analyzer and 'samplerate' in hash_tab.params \ and hash_tab.params['samplerate'] != analyzer.target_sr: # analyzer.target_sr = hash_tab.params['samplerate'] print("db samplerate overridden to ", analyzer.target_sr) else: # The command IS precompute # dummy empty hash table hash_tab = None if args['--precompute-peaks']: precomp_type = 'peaks' # Create a matcher matcher = setup_matcher(args) if cmd == 'match' else None filename_iter = filename_list_iterator(args['<file>'], args['--wavdir'], args['--wavext'], args['--list']) ####################### # Run the main commmand ####################### # How many processors to use (multiprocessing) ncores = int(args['--ncores']) if ncores > 1 and not (cmd == "merge" or cmd == "newmerge" or cmd == "list" or cmd == "remove"): # merge/newmerge/list/remove are always single-thread processes do_cmd_multiproc(cmd, analyzer, hash_tab, filename_iter, matcher, args['--precompdir'], precomp_type, report, skip_existing=args['--skip-existing'], ncores=ncores) else: do_cmd(cmd, analyzer, hash_tab, filename_iter, matcher, args['--precompdir'], precomp_type, report, skip_existing=args['--skip-existing']) elapsedtime = time.clock() - initticks if analyzer and analyzer.soundfiletotaldur > 0.: print("Processed " + "%d files (%.1f s total dur) in %.1f s sec = %.3f x RT" \ % (analyzer.soundfilecount, analyzer.soundfiletotaldur, elapsedtime, (elapsedtime/analyzer.soundfiletotaldur))) # Save the hash table file if it has been modified if hash_tab and hash_tab.dirty: # We already created the directory, if "new". hash_tab.save(dbasename)