beta = np.loadtxt(dirpath+'/final.beta') beta = beta/np.sum(beta,0) # test model s1 = np.random.randint(seed) cmdtxt = codepath + ' ' + str(s1) + ' inf ' + tdocs + ' ' + dirpath + '/final ' + dirpath #print(cmdtxt) os.system(cmdtxt) # read test theta ttheta = np.loadtxt(dirpath+'/testfinal.theta') D = ttheta.shape[0] ttheta = ttheta/(np.sum(ttheta,1).reshape(D,1)) # compute likelihood on test tlkh = funcs.compute_lkh(tdocs, beta, ttheta) # compute ccr (trccr,tpc_lbl_distn) = funcs.classifier_training(trlbls,trtheta,C,M) tccr = funcs.classifier_test(tlbls,tpc_lbl_distn,ttheta) # compute beta-mse, theta-mse gbeta = np.loadtxt(dirpath+'/beta.txt') gbeta = gbeta/np.sum(gbeta,0) beta_mse = np.mean((beta[:,np.argsort(np.argmax(tpc_lbl_distn,1))]-gbeta)**2.); gtheta = np.loadtxt(dirpath+'/theta.txt') D = gtheta.shape[0] gtheta = gtheta/(np.sum(gtheta,1).reshape(D,1)) theta_mse = np.mean((ttheta[:,np.argsort(np.argmax(tpc_lbl_distn,1))]-gtheta)**2.);
# read training likelihood lk = np.loadtxt(path + '/likelihood.dat') bic = lk[-1, 0] lkh = lk[-1, 1] runtime = lk[-1, 3] # inference on test set seed = np.random.randint(seed0) cmdtxt = CODEPATH + '/ptm --task test' + ' --corpus ' + testfile + ' --convergence 1e-4 --seed ' + str( seed) cmdtxt = cmdtxt + ' --dir ' + path + ' --model ' + path + '/final' os.system(cmdtxt) ## compute likelihood on training set Etrlk = funcs.compute_lkh(trainingfile, beta[:, 1:M + 1], theta) ### read test topic proportions theta_test = np.loadtxt(path + '/test.alpha') ## measure class label consistency (ccr_tr, tpc_lbl_distn) = funcs.classifier_training(tr_label_file, theta, C, M) ccr_t = funcs.classifier_test(t_label_file, tpc_lbl_distn, theta_test) ## save useful stuff # results file refile = open('results.dat', 'a') refile.write( str(M) + ', ' + str(bic) + ', ' + str(Etrlk) + ', ' + str(avg_tpcs) + ', ')
(avg_tpcs, avg_wrds, unq_wrds) = funcs.topic_word_sparsity(path+'/word-assignments.dat',N,M,uswitch) (uv_avg_tpcs, uv_avg_wrds, uv_unq_wrds) = funcs.switch_topic_word_sparsity(uswitch,vswitch,N,M) # read training likelihood lk = np.loadtxt(path+'/likelihood.dat') bic = lk[-1,0] lkh = lk[-1,1] runtime = lk[-1,3] # inference on test set cmdtxt = './ptm --task test' + ' --corpus ' + testfile + ' --convergence 1e-4' cmdtxt = cmdtxt + ' --dir ' + path + ' --model ' + path + '/final' os.system(cmdtxt) ## compute likelihood on held-out set Etrlk = funcs.compute_lkh(trainingfile, beta[:,1:M+1], theta) ### read test topic proportions theta_test = np.loadtxt(path+'/test.alpha') ## measure class label consistency (ccr_tr,tpc_lbl_distn) = funcs.classifier_training(tr_label_file,theta,C,M) ccr_t = funcs.classifier_test(t_label_file,tpc_lbl_distn,theta_test) ## inference on observed test set cmdtxt = './ptm --task test' + ' --corpus ' + obs_tfile + ' --convergence 1e-4' cmdtxt = cmdtxt + ' --dir ' + path + ' --model ' + path + '/final' os.system(cmdtxt) ## topic proportions of the observed set theta_test = np.loadtxt(path+'/test.alpha')
(uv_avg_tpcs, uv_avg_wrds, uv_unq_wrds) = funcs.switch_topic_word_sparsity(uswitch,vswitch,N,M) # read training likelihood lk = np.loadtxt(path+'/likelihood.dat') bic = lk[-1,0] lkh = lk[-1,1] runtime = lk[-1,3] # inference on test set seed = np.random.randint(seed0) cmdtxt = CODEPATH + '/ptm --task test' + ' --corpus ' + testfile + ' --convergence 1e-4 --seed '+str(seed) cmdtxt = cmdtxt + ' --dir ' + path + ' --model ' + path + '/final' os.system(cmdtxt) ## compute likelihood on training set Etrlk = funcs.compute_lkh(trainingfile, beta[:,1:M+1], theta) ### read test topic proportions theta_test = np.loadtxt(path+'/test.alpha') ## measure class label consistency (ccr_tr,tpc_lbl_distn) = funcs.classifier_training(tr_label_file,theta,C,M) ccr_t = funcs.classifier_test(t_label_file,tpc_lbl_distn,theta_test) ## save useful stuff # results file refile = open('results.dat', 'a') refile.write(str(M) + ', ' + str(bic) + ', ' + str(Etrlk) + ', ' + str(avg_tpcs) + ', ') refile.write(str(np.mean(avg_wrds)) + ', ' + str(np.sum(unq_wrds)) + ', ' + str(ccr_tr) + ', ' + str(ccr_t) + ', ') refile.write(str(runtime)+', '+str(uv_avg_tpcs) + ', ' + str(np.mean(uv_avg_wrds)) + ', ' + str(np.sum(uv_unq_wrds)) + '\n')