def test_perplexity(res_file, playlist_test_file, tag_test_file): res = analyze.loadresult(res_file, [-1])[0] playlists, tags = train_hte.loaddata(playlist_test_file, tag_test_file) Pi = res[0] Gt = res[1] Tt = res[2] A = res[3] predictor = predict.NeitherPredictor(Pi, A, Gt, Tt) print predictor.calc_perplexity(playlists, tags)
def analyze(res_file='experiments/small_set_with_tag.txt', dataset='yes_small_cross'): #res = loadresult(res_file,sn) res = mio.load_model(res_file) _lambda = [k for k in res_file.split('-') if k.startswith('L')] _lambda = float(_lambda[0][1:]) with open(path.join('data',dataset,'tag_hash_weighted.txt')) as fin: ths = fin.read().strip() thss = ths.splitlines() G = res[0].Tt.shape[0] thsm = [0]*G for i in xrange(G): thsm[i] = thss[i].split(',\t')[2].strip() playlists, tags = train_hte.loaddata(path.join('data',dataset,'playlists.tagged.txt'), path.join('data',dataset,'tags.3.txt')) # b = np.zeros((tags.shape[0],G)) # b[:,:tags.shape[1]] = tags # tags = b song = 14 for i in xrange(len(res)): print i fig = pylab.figure() fig.hold() pylab.title(res_file + ':' + str(i)) Pi = res[i].Pi Gt = res[i].Gt Tt = res[i].Tt A = res[i].A #predictor = predict.Predictor(Pi, A, Gt, Tt) #p = predictor.calc_perplexity(playlists, tags) #print p, p[0] + _lambda*p[1] drawcircles(Gt[:,0],Gt[:,1],np.sqrt(Pi),fill=True) drawcircles(Tt[:,0],Tt[:,1],np.ones(Tt.shape[0])*0.05,color='m', edgecolor='none') for j in xrange(Tt.shape[0]): pylab.text(Tt[j,0], Tt[j,1], thsm[j],figure=fig) for j in xrange(Gt.shape[0]): pylab.text(Gt[j,0], Gt[j,1], str(j), color='r', figure=fig) # highlight song pylab.scatter(Tt[np.nonzero(tags[song])[0],0], Tt[np.nonzero(tags[song])[0],1], c='r', marker='D', figure=fig) pylab.scatter(Gt[np.nonzero(A[:,song])[0],0], Gt[np.nonzero(A[:,song])[0],1], s=Pi[np.nonzero(A[:,song])[0]], c='g', marker='s', figure=fig) pylab.show()
def test_tag_accuracy(res_file, tag_test_file, n): res = analyze.loadresult(res_file, [-1])[0] tag_true = train_hte.loaddata(None, tag_test_file)[1] Pi = res[0] Gt = res[1] Tt = res[2] A = res[3] predictor = predict.Predictor(Pi, A, Gt, Tt) tag_est = predictor.predict_tag(n) return test_tag_acc(tag_est, tag_true)
def split_tag(tag_file, mode, param): tmp, tags = trainer.loaddata(None, tag_file) if mode == 'random_tag': tag_train, tag_test = split_tag_random(tags, param) elif mode == 'random_song': tag_train, tag_test = split_tag_song(tags, param) basename = path.basename(tag_file).split('.txt')[0] dirname = path.dirname(tag_file) save_tags(tag_train, path.join(dirname,basename+'_'+mode+str(param)+'_train.txt')) save_tags(tag_test, path.join(dirname,basename+'_'+mode+str(param)+'_test.txt'))
def report_all(res_dir, playlist_test_file, tag_test_file): files = os.listdir(res_dir) files = [path.join(res_dir,f) for f in files if f.startswith('res') and path.isfile(path.join(res_dir,f))] playlists, tags = train_hte.loaddata(playlist_test_file, tag_test_file) b = np.zeros((tags.shape[0], 250)) b[:,:tags.shape[1]] = tags tags = b stat = {} for f in files: print f ss = f.split('-') lam = float([k[1:] for k in ss if k.startswith('L')][0]) sc = float([k[2:] for k in ss if k.startswith('sc')][0]) ss = int([k[2:] for k in ss if k.startswith('ss')][0]) res = analyze.loadresult(f, [-1])[0] Pi = res[0] Gt = res[1] Tt = res[2] A = res[3] predictor = predict.NeitherPredictor(Pi, A, Gt, Tt) r = predictor.calc_perplexity(playlists, tags) #r = (predictor.calc_hmm_perplexity(playlists), # predictor.calc_lda_perplexity(tags)) print r, r[0]+lam*r[1] if (lam,sc,ss) in stat: stat[(lam,sc,ss)].append(r) else: stat[(lam,sc,ss)] = [r] report = [] for k in stat: ph = [x[0] for x in stat[k]] pl = [x[1] for x in stat[k]] report.append([k[0],k[1],k[2],np.mean(ph),np.std(ph),np.mean(pl),np.std(pl)]) report = np.array(report) from scipy import io as sio sio.savemat('report.mat', {'report':report}) return report