Esempio n. 1
0
def test_single(filename, local=False, **munge_kwargs):
    """
        Perform a test on a single file. Prints more diagnostic information than usual.
    """
    (new_file, what) = munge(filename, **munge_kwargs)
    query_obj = song.util.codegen(new_file, start=-1, duration=-1)
    s = fp.best_match_for_query(query_obj[0]["code"], local=local)
    if s.TRID is not None:
        if local:
            metad = _local_bigeval[s.TRID]
        else:
            metad = fp.metadata_for_track_id(s.TRID)
            metad["title"] = metad["track"]
        song_metadata = {
            "artist": metad.get("artist", ""),
            "release": metad.get("release", ""),
            "title": metad.get("title", "")
        }
        print str(song_metadata)
    else:
        print "No match"

    decoded = fp.decode_code_string(query_obj[0]["code"])
    print str(len(decoded.split(" ")) / 2) + " codes in original"
    response = fp.query_fp(decoded, local=local, rows=15)
    if response is not None:
        print "From FP flat:"
        tracks = {}
        scores = {}
        for r in response.results:
            trid = r["track_id"].split("-")[0]
            if local:
                metad = _local_bigeval[trid]
            else:
                metad = fp.metadata_for_track_id(trid)
                metad["title"] = metad["track"]
            m = {
                "artist": metad.get("artist", ""),
                "release": metad.get("release", ""),
                "title": metad.get("title", "")
            }
            if m is not None:
                actual_match = fp.actual_matches(
                    decoded, fp.fp_code_for_track_id(r["track_id"],
                                                     local=local))
                tracks[r["track_id"]] = (m, r["score"], actual_match)
                scores[r["track_id"]] = actual_match
            else:
                print "problem getting metadata for " + r["track_id"]
        sorted_scores = sorted(scores.iteritems(),
                               key=lambda (k, v): (v, k),
                               reverse=True)
        for (trackid, score) in sorted_scores:
            (m, score, actual_match) = tracks[trackid]
            print trackid + " (" + str(int(score)) + ", " + str(
                actual_match) + ") - " + m["artist"] + " - " + m["title"]
    else:
        print "response from fp flat was None -- decoded code was " + str(
            decoded)
    os.remove(new_file)
Esempio n. 2
0
def main():
    if not len(sys.argv)==4:
        print "usage: python little_eval.py [database_list | disk] query_list [limit]"
        sys.exit()
        
    fp_codes = []
    limit = int(sys.argv[3])
    if sys.argv[1] == "disk":
        fp.local_load("disk.pkl")
    else:
        database_list = open(sys.argv[1]).read().split("\n")[0:limit]
        for line in database_list:
            (track_id, file) = line.split(" ### ")
            print track_id
            # TODO - use threaded codegen
            j = codegen(file, start=-1, duration=-1)
            if len(j):
                code_str = fp.decode_code_string(j[0]["code"])
                meta = j[0]["metadata"]
                l = meta["duration"] * 1000
                a = meta["artist"]
                r = meta["release"]
                t = meta["title"]
                v = meta["version"]
                fp_codes.append({"track_id": track_id, "fp": code_str, "length": str(l), "codever": str(round(v, 2)), "artist": a, "release": r, "track": t})
        fp.ingest(fp_codes, local=True)
        fp.local_save("disk.pkl")

    counter = 0
    actual_win = 0
    original_win = 0
    bm_win = 0
    query_list = open(sys.argv[2]).read().split("\n")[0:limit]
    for line in query_list:
        (track_id, file) = line.split(" ### ")
        print track_id
        j = codegen(munge(file))
        if len(j):
            counter+=1
            response = fp.query_fp(fp.decode_code_string(j[0]["code"]), rows=30, local=True, get_data=True)
            (winner_actual, winner_original) = get_winners(fp.decode_code_string(j[0]["code"]), response, elbow=8)
            winner_actual = winner_actual.split("-")[0]
            winner_original = winner_original.split("-")[0]
            response = fp.best_match_for_query(j[0]["code"], local=True)
            if(response.TRID == track_id):
                bm_win+=1
            if(winner_actual == track_id):
                actual_win+=1
            if(winner_original == track_id):
                original_win+=1
    print "%d / %d actual (%2.2f%%) %d / %d original (%2.2f%%) %d / %d bm (%2.2f%%)" % (actual_win, counter, (float(actual_win)/float(counter))*100.0, \
        original_win, counter, (float(original_win)/float(counter))*100.0, \
        bm_win, counter, (float(bm_win)/float(counter))*100.0)
Esempio n. 3
0
def test_single(filename, local=False, **munge_kwargs):
    """
        Perform a test on a single file. Prints more diagnostic information than usual.
    """
    (new_file, what) = munge(filename, **munge_kwargs)
    query_obj = song.util.codegen(new_file, start=-1, duration=-1)
    if not local:
        s = fp.best_match_for_query(query_obj[0]["code"])
        if s.TRID is not None:
            metad = _local_bigeval[s.TRID]
            song_metadata = {"artist": metad["artist"], "release": metad["release"], "title": metad["title"]}
            print str(song_metadata)
        else:
            print "No match"
    else:
        s = fp.best_match_for_query(query_obj[0]["code"],local=local)
        if s.TRID is not None:
            metad = _local_bigeval[s.TRID]
            song_metadata = {"artist": metad["artist"], "release": metad["release"], "title": metad["title"]}
            print str(song_metadata)
        else:
            print "No match"
    
    decoded = fp.decode_code_string(query_obj[0]["code"])
    print str(len(decoded.split(" "))/2) + " codes in original"
    response = fp.query_fp(decoded, local=local, rows=15)
    if response is not None:
        print "From FP flat:"
        tracks = {}
        scores = {}
        for r in response.results:
            trid = r["track_id"].split("-")[0]
            metad = _local_bigeval[trid]
            m = {"artist": metad["artist"], "release": metad["release"], "title": metad["title"]}
            if m is not None:
                actual_match = fp.actual_matches(decoded, fp.fp_code_for_track_id(r["track_id"], local=local))
                tracks[r["track_id"]] = (m, r["score"], actual_match)
                scores[r["track_id"]] = actual_match
            else:
                print "problem getting metadata for " + r["track_id"]
        sorted_scores = sorted(scores.iteritems(), key=lambda (k,v): (v,k), reverse=True)
        for (trackid, score) in sorted_scores:
            (m, score, actual_match) = tracks[trackid]
            print trackid + " (" + str(int(score)) + ", " + str(actual_match) +") - " + m["artist"] + " - " + m["title"]
    else:
        print "response from fp flat was None -- decoded code was " + str(decoded)
    os.remove(new_file)
Esempio n. 4
0
def magic_matches_list(code_string, elbow=10, local=False):
    # DEC strings come in as unicode so we have to force them to ASCII
    code_string = code_string.encode("utf8")

    # First see if this is a compressed code
    if re.match('[A-Za-z\/\+\_\-]', code_string) is not None:
        code_string = decode_code_string(code_string)
        if code_string is None:
            return False

    # Query the FP flat directly.
    response = query_fp(code_string, rows=30, local=local, get_data=True)

    if len(response.results) == 0:
        return False

    # Get the actual score for all responses
    original_scores = {}
    actual_scores = {}

    trackids = [r["track_id"].encode("utf8") for r in response.results]
    tcodes = get_tyrant().multi_get(trackids)

    # For each result compute the "actual score" (based on the histogram matching)
    for (i, r) in enumerate(response.results):
        track_id = r["track_id"]
        original_scores[track_id] = int(r["score"])
        track_code = tcodes[i]
        if track_code is None:
            # Solr gave us back a track id but that track
            # is not in our keystore
            continue
        actual_scores[track_id] = actual_matches(code_string, track_code, elbow = elbow)

    # Sort the actual scores
    sorted_actual_scores = sorted(actual_scores.iteritems(), key=lambda (k,v): (v,k), reverse=True)

    # Because we split songs up into multiple parts, sometimes the results will have the same track in the
    # first few results. Remove these duplicates so that the falloff is (potentially) higher.
    final_results = []
    for trid, result in sorted_actual_scores:
        trid = trid.split("-")[0]
        meta = metadata_for_track_id(trid, local=local)
        final_results.append(meta)
    return final_results
Esempio n. 5
0
def main():
    if not len(sys.argv) == 4:
        print "usage: python little_eval.py [database_list | disk] query_list [limit]"
        sys.exit()

    fp_codes = []
    limit = int(sys.argv[3])
    if sys.argv[1] == "disk":
        fp.local_load("disk.pkl")
    else:
        database_list = open(sys.argv[1]).read().split("\n")[0:limit]
        for line in database_list:
            (track_id, file) = line.split(" ### ")
            print track_id
            # TODO - use threaded codegen
            j = codegen(file, start=-1, duration=-1)
            if len(j):
                code_str = fp.decode_code_string(j[0]["code"])
                meta = j[0]["metadata"]
                l = meta["duration"] * 1000
                a = meta["artist"]
                r = meta["release"]
                t = meta["title"]
                v = meta["version"]
                fp_codes.append({
                    "track_id": track_id,
                    "fp": code_str,
                    "length": str(l),
                    "codever": str(round(v, 2)),
                    "artist": a,
                    "release": r,
                    "track": t
                })
        fp.ingest(fp_codes, local=True)
        fp.local_save("disk.pkl")

    counter = 0
    actual_win = 0
    original_win = 0
    bm_win = 0
    query_list = open(sys.argv[2]).read().split("\n")[0:limit]
    for line in query_list:
        (track_id, file) = line.split(" ### ")
        print track_id
        j = codegen(munge(file))
        if len(j):
            counter += 1
            response = fp.query_fp(fp.decode_code_string(j[0]["code"]),
                                   rows=30,
                                   local=True,
                                   get_data=True)
            (winner_actual, winner_original) = get_winners(
                fp.decode_code_string(j[0]["code"]), response, elbow=8)
            winner_actual = winner_actual.split("-")[0]
            winner_original = winner_original.split("-")[0]
            response = fp.best_match_for_query(j[0]["code"], local=True)
            if (response.TRID == track_id):
                bm_win += 1
            if (winner_actual == track_id):
                actual_win += 1
            if (winner_original == track_id):
                original_win += 1
    print "%d / %d actual (%2.2f%%) %d / %d original (%2.2f%%) %d / %d bm (%2.2f%%)" % (actual_win, counter, (float(actual_win)/float(counter))*100.0, \
        original_win, counter, (float(original_win)/float(counter))*100.0, \
        bm_win, counter, (float(bm_win)/float(counter))*100.0)