Beispiel #1
0
def test_perplexity(res_file, playlist_test_file, tag_test_file):
    res = analyze.loadresult(res_file, [-1])[0]
    playlists, tags = train_hte.loaddata(playlist_test_file, tag_test_file)

    Pi = res[0]
    Gt = res[1]
    Tt = res[2]
    A = res[3]

    predictor = predict.NeitherPredictor(Pi, A, Gt, Tt)
    print predictor.calc_perplexity(playlists, tags)
Beispiel #2
0
def analyze(res_file='experiments/small_set_with_tag.txt',
            dataset='yes_small_cross'):

    #res = loadresult(res_file,sn)
    res = mio.load_model(res_file)

    _lambda = [k for k in res_file.split('-') if k.startswith('L')]
    _lambda = float(_lambda[0][1:])

    with open(path.join('data',dataset,'tag_hash_weighted.txt')) as fin:
        ths = fin.read().strip()
    thss = ths.splitlines()
    G = res[0].Tt.shape[0]
    thsm = [0]*G
    for i in xrange(G):
        thsm[i] = thss[i].split(',\t')[2].strip()

    playlists, tags = train_hte.loaddata(path.join('data',dataset,'playlists.tagged.txt'),
                                         path.join('data',dataset,'tags.3.txt'))
#    b = np.zeros((tags.shape[0],G))
#    b[:,:tags.shape[1]] = tags
#    tags = b
    song = 14 
    for i in xrange(len(res)):
        print i
        fig = pylab.figure()
        fig.hold()
        pylab.title(res_file + ':' + str(i))
        Pi = res[i].Pi
        Gt = res[i].Gt
        Tt = res[i].Tt
        A = res[i].A

        #predictor = predict.Predictor(Pi, A, Gt, Tt)
        #p = predictor.calc_perplexity(playlists, tags)
        #print p, p[0] + _lambda*p[1]
        drawcircles(Gt[:,0],Gt[:,1],np.sqrt(Pi),fill=True)
        drawcircles(Tt[:,0],Tt[:,1],np.ones(Tt.shape[0])*0.05,color='m',
                    edgecolor='none')
        for j in xrange(Tt.shape[0]):
            pylab.text(Tt[j,0], Tt[j,1], thsm[j],figure=fig)
        for j in xrange(Gt.shape[0]):
            pylab.text(Gt[j,0], Gt[j,1], str(j), color='r',  figure=fig)
        # highlight song
        pylab.scatter(Tt[np.nonzero(tags[song])[0],0], 
                      Tt[np.nonzero(tags[song])[0],1],
                      c='r', marker='D', figure=fig)
        pylab.scatter(Gt[np.nonzero(A[:,song])[0],0],
                      Gt[np.nonzero(A[:,song])[0],1],
                      s=Pi[np.nonzero(A[:,song])[0]],
                      c='g', marker='s', figure=fig)
         
    pylab.show()
Beispiel #3
0
def test_tag_accuracy(res_file, tag_test_file, n):
    res = analyze.loadresult(res_file, [-1])[0]
    tag_true = train_hte.loaddata(None, tag_test_file)[1]

    Pi = res[0]
    Gt = res[1]
    Tt = res[2]
    A = res[3]

    predictor = predict.Predictor(Pi, A, Gt, Tt)
    tag_est = predictor.predict_tag(n)
   
    return test_tag_acc(tag_est, tag_true)
Beispiel #4
0
def split_tag(tag_file, mode, param):
    tmp, tags = trainer.loaddata(None, tag_file)

    if mode == 'random_tag':
        tag_train, tag_test = split_tag_random(tags, param)
    elif mode == 'random_song':
        tag_train, tag_test = split_tag_song(tags, param)

    basename = path.basename(tag_file).split('.txt')[0]
    dirname = path.dirname(tag_file)

    save_tags(tag_train,
            path.join(dirname,basename+'_'+mode+str(param)+'_train.txt'))
    save_tags(tag_test,
            path.join(dirname,basename+'_'+mode+str(param)+'_test.txt'))
Beispiel #5
0
def report_all(res_dir, playlist_test_file, tag_test_file):
    files = os.listdir(res_dir)
    files = [path.join(res_dir,f) for f in files if f.startswith('res')
             and path.isfile(path.join(res_dir,f))]
    playlists, tags = train_hte.loaddata(playlist_test_file, tag_test_file)
    b = np.zeros((tags.shape[0], 250))
    b[:,:tags.shape[1]] = tags
    tags = b

    stat = {}
    for f in files:
        print f
        ss = f.split('-')
        lam = float([k[1:] for k in ss if k.startswith('L')][0])
        sc = float([k[2:] for k in ss if k.startswith('sc')][0])
        ss = int([k[2:] for k in ss if k.startswith('ss')][0])

        res = analyze.loadresult(f, [-1])[0]
        Pi = res[0]
        Gt = res[1]
        Tt = res[2]
        A = res[3]

        predictor = predict.NeitherPredictor(Pi, A, Gt, Tt)
        r = predictor.calc_perplexity(playlists, tags)
        #r = (predictor.calc_hmm_perplexity(playlists),
        #        predictor.calc_lda_perplexity(tags))
        print r, r[0]+lam*r[1]

        if (lam,sc,ss) in stat:
            stat[(lam,sc,ss)].append(r)
        else:
            stat[(lam,sc,ss)] = [r]
    report = [] 
    for k in stat:
        ph = [x[0] for x in stat[k]]
        pl = [x[1] for x in stat[k]]
        report.append([k[0],k[1],k[2],np.mean(ph),np.std(ph),np.mean(pl),np.std(pl)])
    report = np.array(report)
    from scipy import io as sio
    sio.savemat('report.mat', {'report':report})
    return report