def create_fill_one_partial_db(filelist=None, outputdb=None):
    """
    This is the main function called by each process
    """
    # assert we have the params
    assert (not filelist is None) and (
        not outputdb is None), "internal arg passing error...!"
    # must be imported there... maybe... because of local num_hash_tables count
    import cover_hash_table as CHT
    # other imports
    import quick_query_test as QQT  # should be replaced in the future
    import fingerprint_hash as FH
    # create output db, including PRAGMA
    conn = sqlite3.connect(outputdb)
    conn.execute('PRAGMA temp_store = MEMORY;')
    conn.execute('PRAGMA synchronous = OFF;')
    conn.execute('PRAGMA journal_mode = OFF;')  # no ROLLBACK!
    conn.execute(
        'PRAGMA cache_size = 1000000;')  # default=2000, page_size=1024
    CHT.init_db(conn)
    # iterate over files
    cnt_tid_added = 0
    for filepath in filelist:
        # get bthcroma
        btchroma = QQT.get_cpressed_btchroma(filepath, compression=COMPRESSION)
        if btchroma is None:
            continue
        # get tid from filepath (faster than querying h5 file, less robust)
        tid = os.path.split(os.path.splitext(filepath)[0])[1]
        # get jumps
        landmarks = FH.get_landmarks(btchroma,
                                     decay=DECAY,
                                     max_per_frame=MAX_PER_FRAME)
        jumps = FH.get_jumps(landmarks, win=WIN)
        cjumps = FH.get_composed_jumps(jumps, levels=LEVELS, win=WIN)
        # add them
        jumpcodes = map(
            lambda cj: FH.get_jumpcode_from_composed_jump(cj, maxwin=WIN),
            cjumps)
        CHT.add_jumpcodes(conn,
                          tid,
                          jumpcodes,
                          normalize=NORMALIZE,
                          commit=False)
        cnt_tid_added += 1
        if cnt_tid_added % 1000 == 0:
            conn.commit()
        # debug
        if cnt_tid_added % 500 == 0:
            print 'We added %d tid in the hash table(s) of %s.' % (
                cnt_tid_added, outputdb)
    # we index
    CHT.reindex(conn)
    # close connection
    conn.close()
    # done
    return
def create_fill_one_partial_db(filelist=None, outputdb=None):
    """
    This is the main function called by each process
    """
    # assert we have the params
    assert (not filelist is None) and (not outputdb is None), "internal arg passing error...!"
    # must be imported there... maybe... because of local num_hash_tables count
    import cover_hash_table as CHT
    # other imports
    import quick_query_test as QQT # should be replaced in the future
    import fingerprint_hash as FH
    # create output db, including PRAGMA
    conn = sqlite3.connect(outputdb)
    conn.execute('PRAGMA temp_store = MEMORY;')
    conn.execute('PRAGMA synchronous = OFF;')
    conn.execute('PRAGMA journal_mode = OFF;') # no ROLLBACK!
    conn.execute('PRAGMA cache_size = 1000000;') # default=2000, page_size=1024
    CHT.init_db(conn)
    # iterate over files
    cnt_tid_added = 0
    for filepath in filelist:
        # get bthcroma
        btchroma = QQT.get_cpressed_btchroma(filepath, compression=COMPRESSION)
        if btchroma is None:
            continue
        # get tid from filepath (faster than querying h5 file, less robust)
        tid = os.path.split(os.path.splitext(filepath)[0])[1]
        # get jumps
        landmarks = FH.get_landmarks(btchroma, decay=DECAY, max_per_frame=MAX_PER_FRAME)
        jumps = FH.get_jumps(landmarks, win=WIN)
        cjumps = FH.get_composed_jumps(jumps, levels=LEVELS, win=WIN)
        # add them
        jumpcodes = map(lambda cj: FH.get_jumpcode_from_composed_jump(cj, maxwin=WIN), cjumps)
        CHT.add_jumpcodes(conn, tid, jumpcodes, normalize=NORMALIZE, commit=False)
        cnt_tid_added += 1
        if cnt_tid_added % 1000 == 0:
            conn.commit()
        # debug
        if cnt_tid_added % 500 == 0:
            print 'We added %d tid in the hash table(s) of %s.' % (cnt_tid_added,
                                                                   outputdb)
    # we index
    CHT.reindex(conn)
    # close connection
    conn.close()
    # done
    return
Exemple #3
0
def one_exp(maindir, clique_tid, verbose=0):
    """
    performs one experiment:
      - select two covers
      - select random song
      - computes hashes / jumps
      - return 1 if we return cover correctly, 0 otherwise
    """
    # select cliques
    cliques = sorted(clique_tid.keys())
    np.random.shuffle(cliques)
    cl = cliques[0]
    other_cl = cliques[1]
    # select tracks
    tids = sorted(clique_tid[cl])
    np.random.shuffle(tids)
    query = tids[0]
    good_ans = tids[1]
    len_other_tids = len(clique_tid[other_cl])
    bad_ans = clique_tid[other_cl][np.random.randint(len_other_tids)]
    # create hash table, init
    conn = sqlite3.connect(':memory:')
    conn.execute('PRAGMA synchronous = OFF;')
    conn.execute('PRAGMA journal_mode = OFF;')
    conn.execute('PRAGMA page_size = 4096;')
    conn.execute('PRAGMA cache_size = 250000;')
    CHT.init_db(conn)
    # verbose
    if verbose > 0:
        t1 = time.time()
    # compression (still testing)
    for cid, compression in enumerate(COMPRESSION):
        # get btchromas
        query_path = path_from_tid(maindir, query)
        query_btc = get_cpressed_btchroma(query_path, compression=compression)
        good_ans_path = path_from_tid(maindir, good_ans)
        good_ans_btc = get_cpressed_btchroma(good_ans_path,
                                             compression=compression)
        bad_ans_path = path_from_tid(maindir, bad_ans)
        bad_ans_btc = get_cpressed_btchroma(bad_ans_path,
                                            compression=compression)
        if query_btc is None or good_ans_btc is None or bad_ans_btc is None:
            conn.close()
            return None
        # get hashes (jumps) for good / bad answer
        jumps = get_jumps(query_btc, verbose=verbose)
        cjumps = FH.get_composed_jumps(jumps, levels=LEVELS, win=WIN)
        jumpcodes = map(
            lambda cj: FH.get_jumpcode_from_composed_jump(cj, maxwin=WIN),
            cjumps)
        if len(jumpcodes) == 0:
            print 'query has no jumpcode!'
            conn.close()
            return None
        #assert cid == 0
        CHT.add_jumpcodes(conn,
                          query,
                          jumpcodes,
                          normalize=NORMALIZE,
                          commit=False)
        # debug
        if verbose > 0:
            res = conn.execute("SELECT Count(tidid) FROM hashes1")
            print 'query added %d jumps' % res.fetchone()[0]
        # good ans
        jumps = get_jumps(good_ans_btc)
        cjumps = FH.get_composed_jumps(jumps,
                                       levels=LEVELS,
                                       win=WIN,
                                       verbose=verbose)
        jumpcodes = map(
            lambda cj: FH.get_jumpcode_from_composed_jump(cj, maxwin=WIN),
            cjumps)
        CHT.add_jumpcodes(conn,
                          good_ans,
                          jumpcodes,
                          normalize=NORMALIZE,
                          commit=False)
        # bad ans
        jumps = get_jumps(bad_ans_btc)
        cjumps = FH.get_composed_jumps(jumps, levels=LEVELS, win=WIN)
        jumpcodes = map(
            lambda cj: FH.get_jumpcode_from_composed_jump(cj, maxwin=WIN),
            cjumps)
        CHT.add_jumpcodes(conn,
                          bad_ans,
                          jumpcodes,
                          normalize=NORMALIZE,
                          commit=False)
    conn.commit()
    # indices
    q = "CREATE INDEX tmp_idx1 ON hashes1 ('jumpcode', 'weight')"
    conn.execute(q)
    q = "CREATE INDEX tmp_idx2 ON hashes1 ('tidid')"
    conn.execute(q)
    conn.commit()
    # verbose
    if verbose > 0:
        print 'Extracted/added jumps and indexed the db in %f seconds.' % (
            time.time() - t1)
    # get query
    #q = "SELECT jumpcode, weight FROM hashes WHERE tid='" + query + "'"
    #res = conn.execute(q)
    #res = res.fetchall()
    #jumps = map(lambda x: x[0], res)
    #weights = map(lambda x: x[1], res)
    jumps = None
    weights = None
    # do the actual query
    tids = CHT.select_matches(conn,
                              jumps,
                              weights,
                              weight_margin=WEIGHT_MARGIN,
                              from_tid=query,
                              verbose=verbose)
    #assert tids[0] == query
    assert len(tids) < 4
    for t in tids:
        assert t in (query, bad_ans, good_ans)
    tids = np.array(tids)
    tids = tids[np.where(tids != query)]
    # close connection
    conn.close()
    # check result
    if len(tids) == 0:
        print '(no matches)'
        return 0
    if tids[0] == good_ans:
        if verbose > 0:
            print 'We got it right!'
        return 1
    assert tids[0] == bad_ans
    if verbose > 0:
        print 'We got it wrong :('
    # DONE
    return 0  # 0 = error
def one_exp(maindir, clique_tid, verbose=0):
    """
    performs one experiment:
      - select two covers
      - select random song
      - computes hashes / jumps
      - return 1 if we return cover correctly, 0 otherwise
    """
    # select cliques
    cliques = sorted(clique_tid.keys())
    np.random.shuffle(cliques)
    cl = cliques[0]
    other_cl = cliques[1]
    # select tracks
    tids = sorted(clique_tid[cl])
    np.random.shuffle(tids)
    query = tids[0]
    good_ans = tids[1]
    len_other_tids = len(clique_tid[other_cl])
    bad_ans = clique_tid[other_cl][np.random.randint(len_other_tids)]
    # create hash table, init
    conn = sqlite3.connect(':memory:')
    conn.execute('PRAGMA synchronous = OFF;')
    conn.execute('PRAGMA journal_mode = OFF;')
    conn.execute('PRAGMA page_size = 4096;')
    conn.execute('PRAGMA cache_size = 250000;')
    CHT.init_db(conn)
    # verbose
    if verbose>0:
        t1 = time.time()
    # compression (still testing)
    for cid, compression in enumerate(COMPRESSION):
        # get btchromas
        query_path = path_from_tid(maindir, query)
        query_btc = get_cpressed_btchroma(query_path, compression=compression)
        good_ans_path = path_from_tid(maindir, good_ans)
        good_ans_btc = get_cpressed_btchroma(good_ans_path, compression=compression)
        bad_ans_path = path_from_tid(maindir, bad_ans)
        bad_ans_btc = get_cpressed_btchroma(bad_ans_path, compression=compression)
        if query_btc is None or good_ans_btc is None or bad_ans_btc is None:
            conn.close()
            return None
        # get hashes (jumps) for good / bad answer
        jumps = get_jumps(query_btc, verbose=verbose)
        cjumps = FH.get_composed_jumps(jumps, levels=LEVELS, win=WIN)
        jumpcodes = map(lambda cj: FH.get_jumpcode_from_composed_jump(cj, maxwin=WIN), cjumps)
        if len(jumpcodes) == 0:
            print 'query has no jumpcode!'
            conn.close()
            return None
        #assert cid == 0
        CHT.add_jumpcodes(conn, query, jumpcodes, normalize=NORMALIZE, commit=False)
        # debug
        if verbose > 0:
            res = conn.execute("SELECT Count(tidid) FROM hashes1")
            print 'query added %d jumps' % res.fetchone()[0]
        # good ans
        jumps = get_jumps(good_ans_btc)
        cjumps = FH.get_composed_jumps(jumps, levels=LEVELS, win=WIN, verbose=verbose)
        jumpcodes = map(lambda cj: FH.get_jumpcode_from_composed_jump(cj, maxwin=WIN), cjumps)
        CHT.add_jumpcodes(conn, good_ans, jumpcodes, normalize=NORMALIZE, commit=False)
        # bad ans
        jumps = get_jumps(bad_ans_btc)
        cjumps = FH.get_composed_jumps(jumps, levels=LEVELS, win=WIN)
        jumpcodes = map(lambda cj: FH.get_jumpcode_from_composed_jump(cj, maxwin=WIN), cjumps)
        CHT.add_jumpcodes(conn, bad_ans, jumpcodes, normalize=NORMALIZE, commit=False)
    conn.commit()
    # indices
    q = "CREATE INDEX tmp_idx1 ON hashes1 ('jumpcode', 'weight')"
    conn.execute(q)
    q = "CREATE INDEX tmp_idx2 ON hashes1 ('tidid')"
    conn.execute(q)
    conn.commit()
    # verbose
    if verbose > 0:
        print 'Extracted/added jumps and indexed the db in %f seconds.' % (time.time()-t1)
    # get query
    #q = "SELECT jumpcode, weight FROM hashes WHERE tid='" + query + "'"
    #res = conn.execute(q)
    #res = res.fetchall()
    #jumps = map(lambda x: x[0], res)
    #weights = map(lambda x: x[1], res)
    jumps = None; weights = None
    # do the actual query
    tids = CHT.select_matches(conn, jumps, weights,
                              weight_margin=WEIGHT_MARGIN,
                              from_tid=query,
                              verbose=verbose)
    #assert tids[0] == query
    assert len(tids) < 4
    for t in tids:
        assert t in (query, bad_ans, good_ans)
    tids = np.array(tids)
    tids = tids[np.where(tids!=query)]
    # close connection
    conn.close()
    # check result
    if len(tids) == 0:
        print '(no matches)'
        return 0
    if tids[0] == good_ans:
        if verbose > 0:
            print 'We got it right!'
        return 1
    assert tids[0] == bad_ans
    if verbose > 0:
        print 'We got it wrong :('
    # DONE
    return 0 # 0 = error