def test_single(filename, local=False, **munge_kwargs): """ Perform a test on a single file. Prints more diagnostic information than usual. """ (new_file, what) = munge(filename, **munge_kwargs) query_obj = song.util.codegen(new_file, start=-1, duration=-1) s = fp.best_match_for_query(query_obj[0]["code"], local=local) if s.TRID is not None: if local: metad = _local_bigeval[s.TRID] else: metad = fp.metadata_for_track_id(s.TRID) metad["title"] = metad["track"] song_metadata = { "artist": metad.get("artist", ""), "release": metad.get("release", ""), "title": metad.get("title", "") } print str(song_metadata) else: print "No match" decoded = fp.decode_code_string(query_obj[0]["code"]) print str(len(decoded.split(" ")) / 2) + " codes in original" response = fp.query_fp(decoded, local=local, rows=15) if response is not None: print "From FP flat:" tracks = {} scores = {} for r in response.results: trid = r["track_id"].split("-")[0] if local: metad = _local_bigeval[trid] else: metad = fp.metadata_for_track_id(trid) metad["title"] = metad["track"] m = { "artist": metad.get("artist", ""), "release": metad.get("release", ""), "title": metad.get("title", "") } if m is not None: actual_match = fp.actual_matches( decoded, fp.fp_code_for_track_id(r["track_id"], local=local)) tracks[r["track_id"]] = (m, r["score"], actual_match) scores[r["track_id"]] = actual_match else: print "problem getting metadata for " + r["track_id"] sorted_scores = sorted(scores.iteritems(), key=lambda (k, v): (v, k), reverse=True) for (trackid, score) in sorted_scores: (m, score, actual_match) = tracks[trackid] print trackid + " (" + str(int(score)) + ", " + str( actual_match) + ") - " + m["artist"] + " - " + m["title"] else: print "response from fp flat was None -- decoded code was " + str( decoded) os.remove(new_file)
def main(): if not len(sys.argv)==4: print "usage: python little_eval.py [database_list | disk] query_list [limit]" sys.exit() fp_codes = [] limit = int(sys.argv[3]) if sys.argv[1] == "disk": fp.local_load("disk.pkl") else: database_list = open(sys.argv[1]).read().split("\n")[0:limit] for line in database_list: (track_id, file) = line.split(" ### ") print track_id # TODO - use threaded codegen j = codegen(file, start=-1, duration=-1) if len(j): code_str = fp.decode_code_string(j[0]["code"]) meta = j[0]["metadata"] l = meta["duration"] * 1000 a = meta["artist"] r = meta["release"] t = meta["title"] v = meta["version"] fp_codes.append({"track_id": track_id, "fp": code_str, "length": str(l), "codever": str(round(v, 2)), "artist": a, "release": r, "track": t}) fp.ingest(fp_codes, local=True) fp.local_save("disk.pkl") counter = 0 actual_win = 0 original_win = 0 bm_win = 0 query_list = open(sys.argv[2]).read().split("\n")[0:limit] for line in query_list: (track_id, file) = line.split(" ### ") print track_id j = codegen(munge(file)) if len(j): counter+=1 response = fp.query_fp(fp.decode_code_string(j[0]["code"]), rows=30, local=True, get_data=True) (winner_actual, winner_original) = get_winners(fp.decode_code_string(j[0]["code"]), response, elbow=8) winner_actual = winner_actual.split("-")[0] winner_original = winner_original.split("-")[0] response = fp.best_match_for_query(j[0]["code"], local=True) if(response.TRID == track_id): bm_win+=1 if(winner_actual == track_id): actual_win+=1 if(winner_original == track_id): original_win+=1 print "%d / %d actual (%2.2f%%) %d / %d original (%2.2f%%) %d / %d bm (%2.2f%%)" % (actual_win, counter, (float(actual_win)/float(counter))*100.0, \ original_win, counter, (float(original_win)/float(counter))*100.0, \ bm_win, counter, (float(bm_win)/float(counter))*100.0)
def test_single(filename, local=False, **munge_kwargs): """ Perform a test on a single file. Prints more diagnostic information than usual. """ (new_file, what) = munge(filename, **munge_kwargs) query_obj = song.util.codegen(new_file, start=-1, duration=-1) if not local: s = fp.best_match_for_query(query_obj[0]["code"]) if s.TRID is not None: metad = _local_bigeval[s.TRID] song_metadata = {"artist": metad["artist"], "release": metad["release"], "title": metad["title"]} print str(song_metadata) else: print "No match" else: s = fp.best_match_for_query(query_obj[0]["code"],local=local) if s.TRID is not None: metad = _local_bigeval[s.TRID] song_metadata = {"artist": metad["artist"], "release": metad["release"], "title": metad["title"]} print str(song_metadata) else: print "No match" decoded = fp.decode_code_string(query_obj[0]["code"]) print str(len(decoded.split(" "))/2) + " codes in original" response = fp.query_fp(decoded, local=local, rows=15) if response is not None: print "From FP flat:" tracks = {} scores = {} for r in response.results: trid = r["track_id"].split("-")[0] metad = _local_bigeval[trid] m = {"artist": metad["artist"], "release": metad["release"], "title": metad["title"]} if m is not None: actual_match = fp.actual_matches(decoded, fp.fp_code_for_track_id(r["track_id"], local=local)) tracks[r["track_id"]] = (m, r["score"], actual_match) scores[r["track_id"]] = actual_match else: print "problem getting metadata for " + r["track_id"] sorted_scores = sorted(scores.iteritems(), key=lambda (k,v): (v,k), reverse=True) for (trackid, score) in sorted_scores: (m, score, actual_match) = tracks[trackid] print trackid + " (" + str(int(score)) + ", " + str(actual_match) +") - " + m["artist"] + " - " + m["title"] else: print "response from fp flat was None -- decoded code was " + str(decoded) os.remove(new_file)
def magic_matches_list(code_string, elbow=10, local=False): # DEC strings come in as unicode so we have to force them to ASCII code_string = code_string.encode("utf8") # First see if this is a compressed code if re.match('[A-Za-z\/\+\_\-]', code_string) is not None: code_string = decode_code_string(code_string) if code_string is None: return False # Query the FP flat directly. response = query_fp(code_string, rows=30, local=local, get_data=True) if len(response.results) == 0: return False # Get the actual score for all responses original_scores = {} actual_scores = {} trackids = [r["track_id"].encode("utf8") for r in response.results] tcodes = get_tyrant().multi_get(trackids) # For each result compute the "actual score" (based on the histogram matching) for (i, r) in enumerate(response.results): track_id = r["track_id"] original_scores[track_id] = int(r["score"]) track_code = tcodes[i] if track_code is None: # Solr gave us back a track id but that track # is not in our keystore continue actual_scores[track_id] = actual_matches(code_string, track_code, elbow = elbow) # Sort the actual scores sorted_actual_scores = sorted(actual_scores.iteritems(), key=lambda (k,v): (v,k), reverse=True) # Because we split songs up into multiple parts, sometimes the results will have the same track in the # first few results. Remove these duplicates so that the falloff is (potentially) higher. final_results = [] for trid, result in sorted_actual_scores: trid = trid.split("-")[0] meta = metadata_for_track_id(trid, local=local) final_results.append(meta) return final_results
def main(): if not len(sys.argv) == 4: print "usage: python little_eval.py [database_list | disk] query_list [limit]" sys.exit() fp_codes = [] limit = int(sys.argv[3]) if sys.argv[1] == "disk": fp.local_load("disk.pkl") else: database_list = open(sys.argv[1]).read().split("\n")[0:limit] for line in database_list: (track_id, file) = line.split(" ### ") print track_id # TODO - use threaded codegen j = codegen(file, start=-1, duration=-1) if len(j): code_str = fp.decode_code_string(j[0]["code"]) meta = j[0]["metadata"] l = meta["duration"] * 1000 a = meta["artist"] r = meta["release"] t = meta["title"] v = meta["version"] fp_codes.append({ "track_id": track_id, "fp": code_str, "length": str(l), "codever": str(round(v, 2)), "artist": a, "release": r, "track": t }) fp.ingest(fp_codes, local=True) fp.local_save("disk.pkl") counter = 0 actual_win = 0 original_win = 0 bm_win = 0 query_list = open(sys.argv[2]).read().split("\n")[0:limit] for line in query_list: (track_id, file) = line.split(" ### ") print track_id j = codegen(munge(file)) if len(j): counter += 1 response = fp.query_fp(fp.decode_code_string(j[0]["code"]), rows=30, local=True, get_data=True) (winner_actual, winner_original) = get_winners( fp.decode_code_string(j[0]["code"]), response, elbow=8) winner_actual = winner_actual.split("-")[0] winner_original = winner_original.split("-")[0] response = fp.best_match_for_query(j[0]["code"], local=True) if (response.TRID == track_id): bm_win += 1 if (winner_actual == track_id): actual_win += 1 if (winner_original == track_id): original_win += 1 print "%d / %d actual (%2.2f%%) %d / %d original (%2.2f%%) %d / %d bm (%2.2f%%)" % (actual_win, counter, (float(actual_win)/float(counter))*100.0, \ original_win, counter, (float(original_win)/float(counter))*100.0, \ bm_win, counter, (float(bm_win)/float(counter))*100.0)