def create_fill_one_partial_db(filelist=None, outputdb=None): """ This is the main function called by each process """ # assert we have the params assert (not filelist is None) and ( not outputdb is None), "internal arg passing error...!" # must be imported there... maybe... because of local num_hash_tables count import cover_hash_table as CHT # other imports import quick_query_test as QQT # should be replaced in the future import fingerprint_hash as FH # create output db, including PRAGMA conn = sqlite3.connect(outputdb) conn.execute('PRAGMA temp_store = MEMORY;') conn.execute('PRAGMA synchronous = OFF;') conn.execute('PRAGMA journal_mode = OFF;') # no ROLLBACK! conn.execute( 'PRAGMA cache_size = 1000000;') # default=2000, page_size=1024 CHT.init_db(conn) # iterate over files cnt_tid_added = 0 for filepath in filelist: # get bthcroma btchroma = QQT.get_cpressed_btchroma(filepath, compression=COMPRESSION) if btchroma is None: continue # get tid from filepath (faster than querying h5 file, less robust) tid = os.path.split(os.path.splitext(filepath)[0])[1] # get jumps landmarks = FH.get_landmarks(btchroma, decay=DECAY, max_per_frame=MAX_PER_FRAME) jumps = FH.get_jumps(landmarks, win=WIN) cjumps = FH.get_composed_jumps(jumps, levels=LEVELS, win=WIN) # add them jumpcodes = map( lambda cj: FH.get_jumpcode_from_composed_jump(cj, maxwin=WIN), cjumps) CHT.add_jumpcodes(conn, tid, jumpcodes, normalize=NORMALIZE, commit=False) cnt_tid_added += 1 if cnt_tid_added % 1000 == 0: conn.commit() # debug if cnt_tid_added % 500 == 0: print 'We added %d tid in the hash table(s) of %s.' % ( cnt_tid_added, outputdb) # we index CHT.reindex(conn) # close connection conn.close() # done return
def create_fill_one_partial_db(filelist=None, outputdb=None): """ This is the main function called by each process """ # assert we have the params assert (not filelist is None) and (not outputdb is None), "internal arg passing error...!" # must be imported there... maybe... because of local num_hash_tables count import cover_hash_table as CHT # other imports import quick_query_test as QQT # should be replaced in the future import fingerprint_hash as FH # create output db, including PRAGMA conn = sqlite3.connect(outputdb) conn.execute('PRAGMA temp_store = MEMORY;') conn.execute('PRAGMA synchronous = OFF;') conn.execute('PRAGMA journal_mode = OFF;') # no ROLLBACK! conn.execute('PRAGMA cache_size = 1000000;') # default=2000, page_size=1024 CHT.init_db(conn) # iterate over files cnt_tid_added = 0 for filepath in filelist: # get bthcroma btchroma = QQT.get_cpressed_btchroma(filepath, compression=COMPRESSION) if btchroma is None: continue # get tid from filepath (faster than querying h5 file, less robust) tid = os.path.split(os.path.splitext(filepath)[0])[1] # get jumps landmarks = FH.get_landmarks(btchroma, decay=DECAY, max_per_frame=MAX_PER_FRAME) jumps = FH.get_jumps(landmarks, win=WIN) cjumps = FH.get_composed_jumps(jumps, levels=LEVELS, win=WIN) # add them jumpcodes = map(lambda cj: FH.get_jumpcode_from_composed_jump(cj, maxwin=WIN), cjumps) CHT.add_jumpcodes(conn, tid, jumpcodes, normalize=NORMALIZE, commit=False) cnt_tid_added += 1 if cnt_tid_added % 1000 == 0: conn.commit() # debug if cnt_tid_added % 500 == 0: print 'We added %d tid in the hash table(s) of %s.' % (cnt_tid_added, outputdb) # we index CHT.reindex(conn) # close connection conn.close() # done return
cnt_tid_added = 0 for tid in all_tids: # the db is already full? if use_existing_db: break # get paths filepath = PC.path_from_tid(maindir,tid) # get btchromas #btchromas = map(lambda p: BAF.get_btchromas(p), filepaths) btchroma = QQT.get_cpressed_btchroma(filepath, compression=COMPRESSION) # add jumps if btchroma is None: continue # get jumps jumps = get_jumps1(btchroma) cjumps = FH.get_composed_jumps(jumps, levels=LEVELS, win=WIN) # add them jumpcodes = map(lambda cj: FH.get_jumpcode_from_composed_jump(cj, maxwin=WIN), cjumps) add_jumpcodes(conn, tid, jumpcodes, normalize=NORMALIZE, commit=False) cnt_tid_added += 1 # commit #if cnt_tid_added == 10: # reindex(conn) if cnt_tid_added % 1000 == 0: conn.commit() # debug if cnt_tid_added % 500 == 0: print 'We added %d tid in the hash table(s).' % cnt_tid_added # DEBUG!!!!!!! #if cnt_tid_added > 500:
def one_exp(maindir, clique_tid, verbose=0): """ performs one experiment: - select two covers - select random song - computes hashes / jumps - return 1 if we return cover correctly, 0 otherwise """ # select cliques cliques = sorted(clique_tid.keys()) np.random.shuffle(cliques) cl = cliques[0] other_cl = cliques[1] # select tracks tids = sorted(clique_tid[cl]) np.random.shuffle(tids) query = tids[0] good_ans = tids[1] len_other_tids = len(clique_tid[other_cl]) bad_ans = clique_tid[other_cl][np.random.randint(len_other_tids)] # create hash table, init conn = sqlite3.connect(':memory:') conn.execute('PRAGMA synchronous = OFF;') conn.execute('PRAGMA journal_mode = OFF;') conn.execute('PRAGMA page_size = 4096;') conn.execute('PRAGMA cache_size = 250000;') CHT.init_db(conn) # verbose if verbose > 0: t1 = time.time() # compression (still testing) for cid, compression in enumerate(COMPRESSION): # get btchromas query_path = path_from_tid(maindir, query) query_btc = get_cpressed_btchroma(query_path, compression=compression) good_ans_path = path_from_tid(maindir, good_ans) good_ans_btc = get_cpressed_btchroma(good_ans_path, compression=compression) bad_ans_path = path_from_tid(maindir, bad_ans) bad_ans_btc = get_cpressed_btchroma(bad_ans_path, compression=compression) if query_btc is None or good_ans_btc is None or bad_ans_btc is None: conn.close() return None # get hashes (jumps) for good / bad answer jumps = get_jumps(query_btc, verbose=verbose) cjumps = FH.get_composed_jumps(jumps, levels=LEVELS, win=WIN) jumpcodes = map( lambda cj: FH.get_jumpcode_from_composed_jump(cj, maxwin=WIN), cjumps) if len(jumpcodes) == 0: print 'query has no jumpcode!' conn.close() return None #assert cid == 0 CHT.add_jumpcodes(conn, query, jumpcodes, normalize=NORMALIZE, commit=False) # debug if verbose > 0: res = conn.execute("SELECT Count(tidid) FROM hashes1") print 'query added %d jumps' % res.fetchone()[0] # good ans jumps = get_jumps(good_ans_btc) cjumps = FH.get_composed_jumps(jumps, levels=LEVELS, win=WIN, verbose=verbose) jumpcodes = map( lambda cj: FH.get_jumpcode_from_composed_jump(cj, maxwin=WIN), cjumps) CHT.add_jumpcodes(conn, good_ans, jumpcodes, normalize=NORMALIZE, commit=False) # bad ans jumps = get_jumps(bad_ans_btc) cjumps = FH.get_composed_jumps(jumps, levels=LEVELS, win=WIN) jumpcodes = map( lambda cj: FH.get_jumpcode_from_composed_jump(cj, maxwin=WIN), cjumps) CHT.add_jumpcodes(conn, bad_ans, jumpcodes, normalize=NORMALIZE, commit=False) conn.commit() # indices q = "CREATE INDEX tmp_idx1 ON hashes1 ('jumpcode', 'weight')" conn.execute(q) q = "CREATE INDEX tmp_idx2 ON hashes1 ('tidid')" conn.execute(q) conn.commit() # verbose if verbose > 0: print 'Extracted/added jumps and indexed the db in %f seconds.' % ( time.time() - t1) # get query #q = "SELECT jumpcode, weight FROM hashes WHERE tid='" + query + "'" #res = conn.execute(q) #res = res.fetchall() #jumps = map(lambda x: x[0], res) #weights = map(lambda x: x[1], res) jumps = None weights = None # do the actual query tids = CHT.select_matches(conn, jumps, weights, weight_margin=WEIGHT_MARGIN, from_tid=query, verbose=verbose) #assert tids[0] == query assert len(tids) < 4 for t in tids: assert t in (query, bad_ans, good_ans) tids = np.array(tids) tids = tids[np.where(tids != query)] # close connection conn.close() # check result if len(tids) == 0: print '(no matches)' return 0 if tids[0] == good_ans: if verbose > 0: print 'We got it right!' return 1 assert tids[0] == bad_ans if verbose > 0: print 'We got it wrong :(' # DONE return 0 # 0 = error
def one_exp(maindir, clique_tid, verbose=0): """ performs one experiment: - select two covers - select random song - computes hashes / jumps - return 1 if we return cover correctly, 0 otherwise """ # select cliques cliques = sorted(clique_tid.keys()) np.random.shuffle(cliques) cl = cliques[0] other_cl = cliques[1] # select tracks tids = sorted(clique_tid[cl]) np.random.shuffle(tids) query = tids[0] good_ans = tids[1] len_other_tids = len(clique_tid[other_cl]) bad_ans = clique_tid[other_cl][np.random.randint(len_other_tids)] # create hash table, init conn = sqlite3.connect(':memory:') conn.execute('PRAGMA synchronous = OFF;') conn.execute('PRAGMA journal_mode = OFF;') conn.execute('PRAGMA page_size = 4096;') conn.execute('PRAGMA cache_size = 250000;') CHT.init_db(conn) # verbose if verbose>0: t1 = time.time() # compression (still testing) for cid, compression in enumerate(COMPRESSION): # get btchromas query_path = path_from_tid(maindir, query) query_btc = get_cpressed_btchroma(query_path, compression=compression) good_ans_path = path_from_tid(maindir, good_ans) good_ans_btc = get_cpressed_btchroma(good_ans_path, compression=compression) bad_ans_path = path_from_tid(maindir, bad_ans) bad_ans_btc = get_cpressed_btchroma(bad_ans_path, compression=compression) if query_btc is None or good_ans_btc is None or bad_ans_btc is None: conn.close() return None # get hashes (jumps) for good / bad answer jumps = get_jumps(query_btc, verbose=verbose) cjumps = FH.get_composed_jumps(jumps, levels=LEVELS, win=WIN) jumpcodes = map(lambda cj: FH.get_jumpcode_from_composed_jump(cj, maxwin=WIN), cjumps) if len(jumpcodes) == 0: print 'query has no jumpcode!' conn.close() return None #assert cid == 0 CHT.add_jumpcodes(conn, query, jumpcodes, normalize=NORMALIZE, commit=False) # debug if verbose > 0: res = conn.execute("SELECT Count(tidid) FROM hashes1") print 'query added %d jumps' % res.fetchone()[0] # good ans jumps = get_jumps(good_ans_btc) cjumps = FH.get_composed_jumps(jumps, levels=LEVELS, win=WIN, verbose=verbose) jumpcodes = map(lambda cj: FH.get_jumpcode_from_composed_jump(cj, maxwin=WIN), cjumps) CHT.add_jumpcodes(conn, good_ans, jumpcodes, normalize=NORMALIZE, commit=False) # bad ans jumps = get_jumps(bad_ans_btc) cjumps = FH.get_composed_jumps(jumps, levels=LEVELS, win=WIN) jumpcodes = map(lambda cj: FH.get_jumpcode_from_composed_jump(cj, maxwin=WIN), cjumps) CHT.add_jumpcodes(conn, bad_ans, jumpcodes, normalize=NORMALIZE, commit=False) conn.commit() # indices q = "CREATE INDEX tmp_idx1 ON hashes1 ('jumpcode', 'weight')" conn.execute(q) q = "CREATE INDEX tmp_idx2 ON hashes1 ('tidid')" conn.execute(q) conn.commit() # verbose if verbose > 0: print 'Extracted/added jumps and indexed the db in %f seconds.' % (time.time()-t1) # get query #q = "SELECT jumpcode, weight FROM hashes WHERE tid='" + query + "'" #res = conn.execute(q) #res = res.fetchall() #jumps = map(lambda x: x[0], res) #weights = map(lambda x: x[1], res) jumps = None; weights = None # do the actual query tids = CHT.select_matches(conn, jumps, weights, weight_margin=WEIGHT_MARGIN, from_tid=query, verbose=verbose) #assert tids[0] == query assert len(tids) < 4 for t in tids: assert t in (query, bad_ans, good_ans) tids = np.array(tids) tids = tids[np.where(tids!=query)] # close connection conn.close() # check result if len(tids) == 0: print '(no matches)' return 0 if tids[0] == good_ans: if verbose > 0: print 'We got it right!' return 1 assert tids[0] == bad_ans if verbose > 0: print 'We got it wrong :(' # DONE return 0 # 0 = error
cnt_tid_added = 0 for tid in all_tids: # the db is already full? if use_existing_db: break # get paths filepath = PC.path_from_tid(maindir, tid) # get btchromas # btchromas = map(lambda p: BAF.get_btchromas(p), filepaths) btchroma = QQT.get_cpressed_btchroma(filepath, compression=COMPRESSION) # add jumps if btchroma is None: continue # get jumps jumps = get_jumps1(btchroma) cjumps = FH.get_composed_jumps(jumps, levels=LEVELS, win=WIN) # add them jumpcodes = map(lambda cj: FH.get_jumpcode_from_composed_jump(cj, maxwin=WIN), cjumps) add_jumpcodes(conn, tid, jumpcodes, normalize=NORMALIZE, commit=False) cnt_tid_added += 1 # commit # if cnt_tid_added == 10: # reindex(conn) if cnt_tid_added % 1000 == 0: conn.commit() # debug if cnt_tid_added % 500 == 0: print "We added %d tid in the hash table(s)." % cnt_tid_added # DEBUG!!!!!!! # if cnt_tid_added > 500: