예제 #1
0
def ingest(file):
    if file == "-":
        reader = csv.reader(sys.stdin)
    else:
        reader = csv.reader(open(file))
    ingest_list = []
    size = 0
    for line in reader:
        (trid, codever, codes, length, artist, release, track) = line
        ingest_list.append({"track_id": trid,
                            "codever": codever,
                            "fp": codes,
                            "length": length,
                            "artist": artist,
                            "release": release,
                            "track": track,
                            "import_date":now,
                            "source": "master"})
        size += 1
        if size % 1000 == 0:
            sys.stdout.write(".")
            sys.stdout.flush()
        if size == 10000:
            size = 0
            fp.ingest(ingest_list, do_commit=False, split=False)
            ingest_list = []
    fp.ingest(ingest_list, do_commit=True, split=False)
    print ""
예제 #2
0
    def POST(self):
        params = web.input(
            track_id="default", fp_code="", artist=None, release=None, track=None, length=None, codever=None
        )
        if params.track_id == "default":
            track_id = fp.new_track_id()
        else:
            track_id = params.track_id
        if params.length is None or params.codever is None:
            return web.webapi.BadRequest()

        # First see if this is a compressed code
        if re.match("[A-Za-z\/\+\_\-]", params.fp_code) is not None:
            code_string = fp.decode_code_string(params.fp_code)
            if code_string is None:
                return json.dumps(
                    {"track_id": track_id, "ok": False, "error": "cannot decode code string %s" % params.fp_code}
                )
        else:
            code_string = params.fp_code

        data = {"track_id": track_id, "fp": code_string, "length": params.length, "codever": params.codever}
        if params.artist:
            data["artist"] = params.artist
        if params.release:
            data["release"] = params.release
        if params.track:
            data["track"] = params.track
        fp.ingest(data, do_commit=True, local=False)

        return json.dumps({"track_id": track_id, "status": "ok"})
예제 #3
0
def ingest(params, mp3):
    fp_code = generate_fingerprint(mp3)
    if fp_code is dict:
        return False
    if params.get('track_id', "default") == "default":
        track_id = fp.new_track_id()
    else:
        track_id = params['track_id']
    if not params.get('length', None):
        return False

    # First see if this is a compressed code
    if re.match('[A-Za-z\/\+\_\-]', fp_code) is not None:
        code_string = fp.decode_code_string(fp_code)
        if code_string is None:
            result = json.dumps({"track_id": track_id, "ok": False,
                                 "error": "cannot decode code string %s" % fp_code})
            return False, result
    else:
        code_string = fp_code

    data = {"track_id": track_id,
            "fp": code_string,
            "length": params['length'],
            "codever": params['codever']}
    if params.get('artist'):
        data["artist"] = params.get('artist')
    if params.get('release'):
        data["release"] = params.get('release')
    if params.get('track'):
        data["track"] = params.get('track')
    fp.ingest(data, do_commit=True, local=False)
예제 #4
0
 def POST(self):
     data = web.data()
     #print(data)
     tracks = json.loads(data)
     codes, bigeval, track_ids = self.parse_json_dump(tracks)
     #print("Codes est: '%s'" % codes)
     fp.ingest(codes, do_commit=True, local=False)
     return json.dumps({"track_ids":track_ids, "status":"ok"})
예제 #5
0
    def handle(self, *args, **options):
        files = os.listdir(settings.INGESTER_JSON_DIR)
        files = [f for f in files if '.json' in f]

        for f in files:
            print "Ingesting file {0}".format(f)
            codes, bigeval = parse_json_dump(os.path.join(settings.INGESTER_JSON_DIR, f))
            fp.ingest(codes, do_commit=False)
            print "Commiting to database!"
            fp.commit()
            self.backup([f])
예제 #6
0
def main():
    if not len(sys.argv)==4:
        print "usage: python little_eval.py [database_list | disk] query_list [limit]"
        sys.exit()
        
    fp_codes = []
    limit = int(sys.argv[3])
    if sys.argv[1] == "disk":
        fp.local_load("disk.pkl")
    else:
        database_list = open(sys.argv[1]).read().split("\n")[0:limit]
        for line in database_list:
            (track_id, file) = line.split(" ### ")
            print track_id
            # TODO - use threaded codegen
            j = codegen(file, start=-1, duration=-1)
            if len(j):
                code_str = fp.decode_code_string(j[0]["code"])
                meta = j[0]["metadata"]
                l = meta["duration"] * 1000
                a = meta["artist"]
                r = meta["release"]
                t = meta["title"]
                v = meta["version"]
                fp_codes.append({"track_id": track_id, "fp": code_str, "length": str(l), "codever": str(round(v, 2)), "artist": a, "release": r, "track": t})
        fp.ingest(fp_codes, local=True)
        fp.local_save("disk.pkl")

    counter = 0
    actual_win = 0
    original_win = 0
    bm_win = 0
    query_list = open(sys.argv[2]).read().split("\n")[0:limit]
    for line in query_list:
        (track_id, file) = line.split(" ### ")
        print track_id
        j = codegen(munge(file))
        if len(j):
            counter+=1
            response = fp.query_fp(fp.decode_code_string(j[0]["code"]), rows=30, local=True, get_data=True)
            (winner_actual, winner_original) = get_winners(fp.decode_code_string(j[0]["code"]), response, elbow=8)
            winner_actual = winner_actual.split("-")[0]
            winner_original = winner_original.split("-")[0]
            response = fp.best_match_for_query(j[0]["code"], local=True)
            if(response.TRID == track_id):
                bm_win+=1
            if(winner_actual == track_id):
                actual_win+=1
            if(winner_original == track_id):
                original_win+=1
    print "%d / %d actual (%2.2f%%) %d / %d original (%2.2f%%) %d / %d bm (%2.2f%%)" % (actual_win, counter, (float(actual_win)/float(counter))*100.0, \
        original_win, counter, (float(original_win)/float(counter))*100.0, \
        bm_win, counter, (float(bm_win)/float(counter))*100.0)
예제 #7
0
def ingest(request):
    # TODO: Add Track object
    params = request.POST
    mp3 = request.FILES['mp3']
    fp_code = generate_fingerprint(mp3)
    if fp_code is dict:
        return HttpResponse(fp_code['error'], status=400)
    if params.get('track_id', "default") == "default":
        track_id = fp.new_track_id()
    else:
        track_id = params['track_id']
    if not params.get('length', None):
        return HttpResponse("Invalid data", status=400)

    # First see if this is a compressed code
    if re.match('[A-Za-z\/\+\_\-]', fp_code) is not None:
        code_string = fp.decode_code_string(fp_code)
        if code_string is None:
            result = json.dumps( {"track_id": track_id, "ok": False,
                                  "error": "cannot decode code string %s" % fp_code})
            return HttpResponse(result, status=400)
    else:
        code_string = fp_code

    artist = params.get('artist')
    release = params.get('release')
    title = params.get('track')
    year = params.get('year')
    youtube_code = params.get('youtube_code')
    # Track creation
    track = Track(band=artist, release=release,
                  name=title,
                  year=year,
                  youtube_code=youtube_code)
    track.save()
    # Remove all - (due to limitation in fingerprint-server track_id match)
    track_id = track.echoprint_id
    data = {"track_id": track_id,
            "fp": code_string,
            "length": params['length'],
            "codever": params['codever']}
    if params.get('artist'):
        data["artist"] = params.get('artist')
    if params.get('release'):
        data["release"] = params.get('release')
    if params.get('track'):
        data["track"] = params.get('track')
    fp.ingest(data, do_commit=True, local=False)

    data = json.dumps({"track_id": track_id, "fp": fp_code,"status": "ok"})
    return HttpResponse(data, status=200)
예제 #8
0
    def generate_fingerprint_from_list(results, file_list):
        # TODO: os.system is thread safe??
        # TODO: How to test this?
        codes_file = '/tmp/allcodes_%s.json' % (random.randint(1, 10000))
        command = '/home/vagrant/echoprint-codegen/echoprint-codegen -s 10 30 < %s > %s' % (file_list, codes_file)
        os.system(command)
        # Create the Track models
        with open(codes_file, 'r') as data_file:
            data = json.load(data_file)
            for fingerprint in data:
                # check fp doesn't exist in database
                code_string = fingerprint.get('code')
                if code_string:
                    response = fp.best_match_for_query(code_string)
                    if not response.match():
                        label = [v for v in results if v[1] == fingerprint['metadata']['filename']][0][0]
                        youtube_code = fingerprint['metadata']['filename'].replace('.mp3', '').replace('/tmp/', '')
                        year = label.split('-')[0].strip()
                        release = label.split('-')[1].strip()
                        artist = label.split('-')[2].strip()
                        title = label.split('-')[3].strip()
                        fingerprint['metadata']['artist'] = artist
                        fingerprint['metadata']['title'] = title
                        # Track creation
                        Track.sync()
                        track = Track(band=artist, release=release,
                                      name=title,
                                      year=year,
                                      youtube_code=youtube_code)
                        track.save()
                        # Remove all - (due to limitation in fingerprint-server track_id match)
                        fingerprint['metadata']['track_id'] = track.echoprint_id
                    else:
                        # remove duplicate element
                        data.remove(fingerprint)
                        print "This file is duplicated"

        # Overwrite with artist and title
        with open(codes_file, 'w') as data_file:
            data_file.write(json.dumps(data))

        # Fastingest invoke => post all into echo-fingerprint
        codes, _ = parse_json_dump(codes_file)
        fp.ingest(codes)

        FileHandler.delete_file(codes_file)

        return True
예제 #9
0
    def POST(self):
        params = web.input(track_id="default",
                           fp_code="",
                           artist=None,
                           release=None,
                           track=None,
                           length=None,
                           codever=None)
        if params.track_id == "default":
            track_id = fp.new_track_id()
        else:
            track_id = params.track_id
        if params.length is None or params.codever is None:
            return web.webapi.BadRequest()

        # First see if this is a compressed code
        if re.match('[A-Za-z\/\+\_\-]', params.fp_code) is not None:
            code_string = fp.decode_code_string(params.fp_code)
            if code_string is None:
                return json.dumps({
                    "track_id":
                    track_id,
                    "ok":
                    False,
                    "error":
                    "cannot decode code string %s" % params.fp_code
                })
        else:
            code_string = params.fp_code

        data = {
            "track_id": track_id,
            "fp": code_string,
            "length": params.length,
            "codever": params.codever
        }
        if params.artist: data["artist"] = params.artist
        if params.release: data["release"] = params.release
        if params.track: data["track"] = params.track
        fp.ingest(data, do_commit=True, local=False)

        return json.dumps({"track_id": track_id, "status": "ok"})
예제 #10
0
def process_file(absoluteFilename,c):
	#Get filename from absolute path
	filename = absoluteFilename.split('/')[-1].split("-")	
	
	#Get company from filename
	company=filename[0].strip()
        logfile.write(absoluteFilename+'\n')
	
	#Get reklama name from filename
	name = filename[1].strip()
	
	#Get language
	language = filename[2].strip().split(".")[0]

	#Add artists to Database
	company_id=addCompanyToDb(company)
	
	#Get track id
	code = parse_json(c[0])	
	track_id =code["track_id"]
	length = code["length"]
	
	conn = MySQLdb.connect(host= "localhost",user="******", passwd="ulut123", db="reklama",charset='utf8')
	db = conn.cursor()
	
	try:
		#Insert into reklama table
	   	db.execute("""INSERT INTO reklama(track_id,company_id,name,filename,length,language,date_added,status,company_name) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)""",(track_id,company_id,name,absoluteFilename,length,language.decode("utf-8"),getNowDate(),'Y',company.decode("utf-8")))
	   	logfile.write("Inserted reklama to database "+track_id+'\n')
	   	conn.commit()
		
		#Save fingerprint in Solr
		fp.ingest(code, do_commit=False)
    		fp.commit()
	except db.Error, e:
           	logfile.write("Error %d: %s" % (e.args[0],e.args[1]))
	   	conn.rollback()
예제 #11
0
        fullcodes.append(data)
        trids.append(trid)
        
    return fullcodes,trids

def check_ingests(trids):
    dt = datetime.datetime.strftime(datetime.datetime.now(),'%Y%m%d_%H%M%S')
    log = open('logs/'+dt+'.log','w')
    con = sqlite3.connect(cb_home+'cuibono.db')
    cur = con.cursor()
    t = pytyrant.PyTyrant.open('127.0.0.1',1978)
    ty_keys = t.keys()
    for trid in trids:
        if str(trid)+'-1' in ty_keys:
            cur.execute('UPDATE cuibono_ad SET ingested = 1 WHERE id = %d' % trid)
        elif str(trid)+'-0' in ty_keys:
            cur.execute('UPDATE cuibono_ad SET ingested = 1 WHERE id = %d' % trid)
        else:
            log.write('ad id '+trid+' not ingested correctly.')
    con.commit()


if __name__ == "__main__":
    jfile = sys.argv[1]
    fullcodes,trids = parse_json_dump(jfile)
    fp.ingest(fullcodes, do_commit=False)
    fp.commit()
    check_ingests(trids)        
    os.remove(jfile)
    os.remove('fingerprint_queue.txt')
예제 #12
0
    S = '<sep>'

    for dir in sys.argv[1:]:
        collect_mp3s(mp3s, dir)

    print 'processing', len(mp3s), 'files'
    dups = open("dedup.dat", "a")
    for count, mp3 in enumerate(mp3s):
        if mp3 in done:
            print '        skipping', mp3
            continue
        print "       ", len(done), count, mp3
        results = codegen(mp3)
        if not results:
            print "can't process", mp3, "skipping"
            continue
        trid, raw_code, ingest_data = results
        response = fp.best_match_for_query(raw_code)
        if response.match():
            print >>dups, 'duplicate', S, mp3, S, 'original', S, get_file(response.TRID)
            print
            print 'duplicate', mp3  
            print '         ', get_file(response.TRID)
            print
        else:
            fp.ingest([ingest_data], do_commit=True)
        done[mp3] = trid
    dups.close()

    
예제 #13
0
        # First see if this is a compressed code
        if re.match('[A-Za-z\/\+\_\-]', params.fp_code) is not None:
           code_string = fp.decode_code_string(params.fp_code)
#           if code_string is None:
               return json.dumps({"track_id":track_id, "ok":False, "error":"cannot decode code string %s" % params.fp_code})
        else:
            code_string = params.fp_code

        data = {"track_id": track_id, 
                "fp": code_string,
                "length": params.length,
                "codever": params.codever }
        if params.artist: data["artist"] = params.artist
        if params.release: data["release"] = params.release
        if params.track: data["track"] = params.track
        fp.ingest(data, do_commit=True, local=False)

        return json.dumps({"track_id":track_id, "status":"ok"})
        
    
class query:
    def POST(self):
        return self.GET()
        
    def GET(self):
        stuff = web.input(fp_code="")
        response = fp.best_match_for_query(stuff.fp_code)
        return json.dumps({"ok":True, "query":stuff.fp_code, "message":response.message(), "match":response.match(), "score":response.score, \
                        "qtime":response.qtime, "track_id":response.TRID, "total_time":response.total_time})

예제 #14
0
    mp3s = []
    S = '<sep>'

    for dir in sys.argv[1:]:
        collect_mp3s(mp3s, dir)

    print 'processing', len(mp3s), 'files'
    dups = open("dedup.dat", "a")
    for count, mp3 in enumerate(mp3s):
        if mp3 in done:
            print '        skipping', mp3
            continue
        print "       ", len(done), count, mp3
        results = codegen(mp3, start=0, duration=60)
        if not results:
            print "can't process", mp3, "skipping"
            continue
        trid, raw_code, ingest_data = results
        response = fp.best_match_for_query(raw_code)
        if response.match():
            print >> dups, 'duplicate', S, mp3, S, 'original', S, get_file(
                response.TRID)
            print
            print 'duplicate', mp3
            print '         ', get_file(response.TRID)
            print
        else:
            fp.ingest([ingest_data], do_commit=True)
        done[mp3] = trid
    dups.close()
예제 #15
0
	    except Exception, e:
	    	print 'Exception:', e    

    return (fullcodes, bigeval)

if __name__ == "__main__":
    if len(sys.argv) < 2:
        print >>sys.stderr, "Usage: %s [-b] [json dump] ..." % sys.argv[0]
        print >>sys.stderr, "       -b: write a file to disk for bigeval"
        sys.exit(1)
    
    write_bigeval = False
    pos = 1
    if sys.argv[1] == "-b":
        write_bigeval = True
        pos = 2
    
    for (i, f) in enumerate(sys.argv[pos:]):
        print "%d/%d %s" % (i+1, len(sys.argv)-pos, f)
        codes, bigeval = parse_json_dump(f)
        fp.ingest(codes, do_commit=False)
        if write_bigeval:
            bename = "bigeval.json"
            if not os.path.exists(bename):
                be = {}
            else:
                be = json.load(open(bename))
            be.update(bigeval)
            json.dump(be, open(bename, "w"))
    fp.commit()
예제 #16
0
def main(argv):
    global _local_bigeval
    
    single = None
    how_many = None
    start = 0
    duration = 30
    bitrate = 128
    volume = -1
    lowpass = -1
    encoder = "wav"
    local = None
    diag = False
    channels = 2
    downsample = False
    
    try:
        opts, args = getopt.getopt(argv, "1:c:s:d:b:v:l:L:e:B:pm2h", ["single=","count=","start=","duration=","bitrate=","volume=","lowpass="******"encoder=","print","mono","local=","22kHz","help"])
    except getopt.GetoptError:
        usage()
        sys.exit(2)
    
    for opt,arg in opts:
        if opt in ("-1","--single"):
            single = arg
        if opt in ("-c","--count"):
            how_many = int(arg)
        if opt in ("-s","--start"):
            start = int(arg)
        if opt in ("-d","--duration"):
            duration = int(arg)
        if opt in ("-b","--bitrate"):
            bitrate = int(arg)
        if opt in ("-v","--volume"):
            volume = int(arg)
        if opt in ("-l","--lowpass"):
            lowpass = int(arg)
        if opt in ("-e","--encoder"):
            encoder = arg
        if opt in ("-L","--local"):
            local = arg
        if opt in ("-p","--print"):
            diag = True
        if opt in ("-m","--mono"):
            channels = 1
        if opt in ("-2","--22kHz"):
            downsample = True
        if opt in ("-B","--binary"):
            if not os.path.exists(arg):
                print "Binary %s not found. Exiting." % arg
                sys.exit(2)
            config.CODEGEN_BINARY_OVERRIDE = arg
        if opt in ("-h","--help"):
            usage()
    
    if (single is None) and (how_many is None):
        usage()
        sys.exit(2)
    if (single is not None) and (diag is not False):
        usage()
        sys.exit(2)
    
    if local is None:
        local = False
    else:
        # ingest
        codes = json.load(open(local,'r'))
        _reversed_bigeval = dict( (_local_bigeval[k], k) for k in _local_bigeval)
        code_dict = {}
        tids = {}
        for c in codes:
            fn = c["metadata"]["filename"]
            tid = _reversed_bigeval.get(fn, None)
            tids[tid] = True
            if tid is not None:
                if c.has_key("code"):
                    if len(c["code"]) > 4:
                        code_dict[tid] = fp.decode_code_string(c["code"])
                        
        fp.ingest(code_dict, local=True)
        lp = {}
        for r in _local_bigeval.keys():
            if tids.has_key(r):
                lp[r] = _local_bigeval[r]
        _local_bigeval = lp
        local = True
        
    if single is not None:
        test_single(single, local=local, start=start, duration = duration, bitrate = bitrate, volume = volume, lowpass_freq = lowpass, encode_to=encoder, downsample_to_22 = downsample, channels = channels)
    else:
        results = test(how_many, diag = diag, local=local, start=start, duration = duration, bitrate = bitrate, volume = volume, lowpass_freq = lowpass, encode_to=encoder, downsample_to_22 = downsample, channels = channels)
        prf(results)
        dpwe(results)
예제 #17
0
def main(argv):
    global _local_bigeval, _new_music_files
    global _new_queries, _old_queries, _total_queries
    
    single = None
    how_many = None
    start = 0
    duration = 30
    bitrate = 128
    volume = -1
    lowpass = -1
    decoder = "mpg123"
    encoder = "wav"
    local = None
    diag = False
    channels = 2
    downsample = False
    decoder = "mpg123"
    testfile = os.path.join(os.path.dirname(__file__), 'bigeval.json')
    newfile = "new_music"
    no_shuffle = False
    
    try:
        opts, args = getopt.getopt(argv, "1:c:s:d:D:b:v:l:L:e:B:t:n:pm2h", 
            ["single=","count=","start=","duration=", "decoder=","bitrate=","volume=","lowpass="******"encoder=","print","mono","local=", "test=", "new=","22kHz","help","no-shuffle"])
    except getopt.GetoptError:
        usage()
        sys.exit(2)
    
    for opt,arg in opts:
        if opt in ("-1","--single"):
            single = arg
        if opt in ("-c","--count"):
            how_many = int(arg)
        if opt in ("-s","--start"):
            start = int(arg)
        if opt in ("-d","--duration"):
            duration = int(arg)
        if opt in ("-D","--decoder"):
            decoder = arg
        if opt in ("-b","--bitrate"):
            bitrate = int(arg)
        if opt in ("-v","--volume"):
            volume = int(arg)
        if opt in ("-l","--lowpass"):
            lowpass = int(arg)
        if opt in ("-e","--encoder"):
            encoder = arg
        if opt in ("-L","--local"):
            local = arg
        if opt in ("-p","--print"):
            diag = True
        if opt in ("-m","--mono"):
            channels = 1
        if opt in ("-2","--22kHz"):
            downsample = True
        if opt in ("-B","--binary"):
            if not os.path.exists(arg):
                print "Binary %s not found. Exiting." % arg
                sys.exit(2)
            config.CODEGEN_BINARY_OVERRIDE = arg
        if opt in ("-n","--new"):
            newfile = arg
        if opt in ("-t","--test"):
            testfile = arg
        if opt == "--no-shuffle":
            no_shuffle = True
        if opt in ("-h","--help"):
            usage()
            sys.exit(2)
    
    if (single is None) and (how_many is None):
        print >>sys.stderr, "Run in single mode (-1) or say how many files to test (-c)"
        usage()
        sys.exit(2)
    
    if testfile.lower() == "none" and newfile.lower() == "none" and single is None:
        # If both are none, we can't run
        print >>sys.stderr, "Can't run with no datafiles. Skip --test, --new or add -1"
        sys.exit(2)
    if testfile.lower() == "none":
        _local_bigeval = {}
    else:
        if not os.path.exists(testfile):
            print >>sys.stderr, "Cannot find bigeval.json. did you run fastingest with the -b flag?"
            sys.exit(1)
        _local_bigeval = json.load(open(testfile,'r'))
    if newfile.lower() == "none" or not os.path.exists(newfile):
        _new_music_files = []
    else:
        _new_music_files = open(newfile,'r').read().split('\n')

    _new_queries = float(len(_new_music_files))
    _old_queries = float(len(_local_bigeval.keys()))
    _total_queries = _new_queries + _old_queries
    
    if local is None:
        local = False
    else:
        # ingest
        codes = json.load(open(local,'r'))
        _reversed_bigeval = dict( (_local_bigeval[k], k) for k in _local_bigeval)
        code_dict = {}
        tids = {}
        for c in codes:
            fn = c["metadata"]["filename"]
            tid = _reversed_bigeval.get(fn, None)
            tids[tid] = True
            if tid is not None:
                if c.has_key("code"):
                    if len(c["code"]) > 4:
                        code_dict[tid] = fp.decode_code_string(c["code"])
                        
        fp.ingest(code_dict, local=True)
        lp = {}
        for r in _local_bigeval.keys():
            if tids.has_key(r):
                lp[r] = _local_bigeval[r]
        _local_bigeval = lp
        local = True
        
    if single is not None:
        test_single(single, local=local, start=start, duration = duration, bitrate = bitrate, volume = volume, lowpass_freq = lowpass, encode_to=encoder, downsample_to_22 = downsample, channels = channels)
    else:
        results = test(how_many, diag = diag, local=local, no_shuffle=no_shuffle, start=start, duration = duration, bitrate = bitrate, volume = volume, lowpass_freq = lowpass, encode_to=encoder, downsample_to_22 = downsample, channels = channels)
        prf(results)
        dpwe(results)
예제 #18
0
def main():
    if not len(sys.argv) == 4:
        print "usage: python little_eval.py [database_list | disk] query_list [limit]"
        sys.exit()

    fp_codes = []
    limit = int(sys.argv[3])
    if sys.argv[1] == "disk":
        fp.local_load("disk.pkl")
    else:
        database_list = open(sys.argv[1]).read().split("\n")[0:limit]
        for line in database_list:
            (track_id, file) = line.split(" ### ")
            print track_id
            # TODO - use threaded codegen
            j = codegen(file, start=-1, duration=-1)
            if len(j):
                code_str = fp.decode_code_string(j[0]["code"])
                meta = j[0]["metadata"]
                l = meta["duration"] * 1000
                a = meta["artist"]
                r = meta["release"]
                t = meta["title"]
                v = meta["version"]
                fp_codes.append({
                    "track_id": track_id,
                    "fp": code_str,
                    "length": str(l),
                    "codever": str(round(v, 2)),
                    "artist": a,
                    "release": r,
                    "track": t
                })
        fp.ingest(fp_codes, local=True)
        fp.local_save("disk.pkl")

    counter = 0
    actual_win = 0
    original_win = 0
    bm_win = 0
    query_list = open(sys.argv[2]).read().split("\n")[0:limit]
    for line in query_list:
        (track_id, file) = line.split(" ### ")
        print track_id
        j = codegen(munge(file))
        if len(j):
            counter += 1
            response = fp.query_fp(fp.decode_code_string(j[0]["code"]),
                                   rows=30,
                                   local=True,
                                   get_data=True)
            (winner_actual, winner_original) = get_winners(
                fp.decode_code_string(j[0]["code"]), response, elbow=8)
            winner_actual = winner_actual.split("-")[0]
            winner_original = winner_original.split("-")[0]
            response = fp.best_match_for_query(j[0]["code"], local=True)
            if (response.TRID == track_id):
                bm_win += 1
            if (winner_actual == track_id):
                actual_win += 1
            if (winner_original == track_id):
                original_win += 1
    print "%d / %d actual (%2.2f%%) %d / %d original (%2.2f%%) %d / %d bm (%2.2f%%)" % (actual_win, counter, (float(actual_win)/float(counter))*100.0, \
        original_win, counter, (float(original_win)/float(counter))*100.0, \
        bm_win, counter, (float(bm_win)/float(counter))*100.0)
예제 #19
0
        fullcodes.append(data)

    return (fullcodes, bigeval)


if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("Usage: %s [-b] [json dump] ..." % sys.argv[0], file=sys.stderr)
        print("       -b: write a file to disk for bigeval", file=sys.stderr)
        sys.exit(1)

    write_bigeval = False
    pos = 1
    if sys.argv[1] == "-b":
        write_bigeval = True
        pos = 2

    for (i, f) in enumerate(sys.argv[pos:]):
        print("%d/%d %s" % (i + 1, len(sys.argv) - pos, f))
        codes, bigeval = parse_json_dump(f)
        fp.ingest(codes, do_commit=False)
        if write_bigeval:
            bename = "bigeval.json"
            if not os.path.exists(bename):
                be = {}
            else:
                be = json.load(open(bename))
            be.update(bigeval)
            json.dump(be, open(bename, "w"))
    fp.commit()
예제 #20
0
def main(argv):
    global _local_bigeval, _new_music_files
    global _new_queries, _old_queries, _total_queries

    single = None
    how_many = None
    start = 0
    duration = 30
    bitrate = 128
    volume = -1
    lowpass = -1
    decoder = "mpg123"
    encoder = "wav"
    local = None
    diag = False
    channels = 2
    downsample = False
    decoder = "mpg123"
    testfile = os.path.join(os.path.dirname(__file__), 'bigeval.json')
    newfile = "new_music"
    no_shuffle = False

    try:
        opts, args = getopt.getopt(argv, "1:c:s:d:D:b:v:l:L:e:B:t:n:pm2h", [
            "single=", "count=", "start=", "duration=", "decoder=", "bitrate=",
            "volume=", "lowpass="******"encoder=", "print", "mono", "local=",
            "test=", "new=", "22kHz", "help", "no-shuffle"
        ])
    except getopt.GetoptError:
        usage()
        sys.exit(2)

    for opt, arg in opts:
        if opt in ("-1", "--single"):
            single = arg
        if opt in ("-c", "--count"):
            how_many = int(arg)
        if opt in ("-s", "--start"):
            start = int(arg)
        if opt in ("-d", "--duration"):
            duration = int(arg)
        if opt in ("-D", "--decoder"):
            decoder = arg
        if opt in ("-b", "--bitrate"):
            bitrate = int(arg)
        if opt in ("-v", "--volume"):
            volume = int(arg)
        if opt in ("-l", "--lowpass"):
            lowpass = int(arg)
        if opt in ("-e", "--encoder"):
            encoder = arg
        if opt in ("-L", "--local"):
            local = arg
        if opt in ("-p", "--print"):
            diag = True
        if opt in ("-m", "--mono"):
            channels = 1
        if opt in ("-2", "--22kHz"):
            downsample = True
        if opt in ("-B", "--binary"):
            if not os.path.exists(arg):
                print "Binary %s not found. Exiting." % arg
                sys.exit(2)
            config.CODEGEN_BINARY_OVERRIDE = arg
        if opt in ("-n", "--new"):
            newfile = arg
        if opt in ("-t", "--test"):
            testfile = arg
        if opt == "--no-shuffle":
            no_shuffle = True
        if opt in ("-h", "--help"):
            usage()
            sys.exit(2)

    if (single is None) and (how_many is None):
        print >> sys.stderr, "Run in single mode (-1) or say how many files to test (-c)"
        usage()
        sys.exit(2)

    if testfile.lower() == "none" and newfile.lower(
    ) == "none" and single is None:
        # If both are none, we can't run
        print >> sys.stderr, "Can't run with no datafiles. Skip --test, --new or add -1"
        sys.exit(2)
    if testfile.lower() == "none":
        _local_bigeval = {}
    else:
        if not os.path.exists(testfile):
            print >> sys.stderr, "Cannot find bigeval.json. did you run fastingest with the -b flag?"
            sys.exit(1)
        _local_bigeval = json.load(open(testfile, 'r'))
    if newfile.lower() == "none" or not os.path.exists(newfile):
        _new_music_files = []
    else:
        _new_music_files = open(newfile, 'r').read().split('\n')

    _new_queries = float(len(_new_music_files))
    _old_queries = float(len(_local_bigeval.keys()))
    _total_queries = _new_queries + _old_queries

    if local is None:
        local = False
    else:
        # ingest
        codes = json.load(open(local, 'r'))
        _reversed_bigeval = dict(
            (_local_bigeval[k], k) for k in _local_bigeval)
        code_dict = {}
        tids = {}
        for c in codes:
            fn = c["metadata"]["filename"]
            tid = _reversed_bigeval.get(fn, None)
            tids[tid] = True
            if tid is not None:
                if c.has_key("code"):
                    if len(c["code"]) > 4:
                        code_dict[tid] = fp.decode_code_string(c["code"])

        fp.ingest(code_dict, local=True)
        lp = {}
        for r in _local_bigeval.keys():
            if tids.has_key(r):
                lp[r] = _local_bigeval[r]
        _local_bigeval = lp
        local = True

    if single is not None:
        test_single(single,
                    local=local,
                    start=start,
                    duration=duration,
                    bitrate=bitrate,
                    volume=volume,
                    lowpass_freq=lowpass,
                    encode_to=encoder,
                    downsample_to_22=downsample,
                    channels=channels)
    else:
        results = test(how_many,
                       diag=diag,
                       local=local,
                       no_shuffle=no_shuffle,
                       start=start,
                       duration=duration,
                       bitrate=bitrate,
                       volume=volume,
                       lowpass_freq=lowpass,
                       encode_to=encoder,
                       downsample_to_22=downsample,
                       channels=channels)
        prf(results)
        dpwe(results)