예제 #1
0
	beta = np.loadtxt(dirpath+'/final.beta')
	beta = beta/np.sum(beta,0)

	# test model
	s1 = np.random.randint(seed)
	cmdtxt = codepath + ' ' + str(s1) + ' inf ' + tdocs + ' ' + dirpath + '/final ' + dirpath
	#print(cmdtxt)
	os.system(cmdtxt)

	# read test theta
	ttheta = np.loadtxt(dirpath+'/testfinal.theta')
	D = ttheta.shape[0]
	ttheta = ttheta/(np.sum(ttheta,1).reshape(D,1))

	# compute likelihood on test
	tlkh = funcs.compute_lkh(tdocs, beta, ttheta)

	# compute ccr
	(trccr,tpc_lbl_distn) = funcs.classifier_training(trlbls,trtheta,C,M)
	tccr = funcs.classifier_test(tlbls,tpc_lbl_distn,ttheta)

	# compute beta-mse, theta-mse
	gbeta = np.loadtxt(dirpath+'/beta.txt')
	gbeta = gbeta/np.sum(gbeta,0)
	beta_mse = np.mean((beta[:,np.argsort(np.argmax(tpc_lbl_distn,1))]-gbeta)**2.);

	gtheta = np.loadtxt(dirpath+'/theta.txt')
	D = gtheta.shape[0]
	gtheta = gtheta/(np.sum(gtheta,1).reshape(D,1))
	theta_mse = np.mean((ttheta[:,np.argsort(np.argmax(tpc_lbl_distn,1))]-gtheta)**2.);	
예제 #2
0
    # read training likelihood
    lk = np.loadtxt(path + '/likelihood.dat')
    bic = lk[-1, 0]
    lkh = lk[-1, 1]
    runtime = lk[-1, 3]

    # inference on test set
    seed = np.random.randint(seed0)
    cmdtxt = CODEPATH + '/ptm --task test' + ' --corpus ' + testfile + ' --convergence 1e-4 --seed ' + str(
        seed)
    cmdtxt = cmdtxt + ' --dir ' + path + ' --model ' + path + '/final'
    os.system(cmdtxt)

    ## compute likelihood on training set
    Etrlk = funcs.compute_lkh(trainingfile, beta[:, 1:M + 1], theta)

    ### read test topic proportions
    theta_test = np.loadtxt(path + '/test.alpha')

    ## measure class label consistency
    (ccr_tr,
     tpc_lbl_distn) = funcs.classifier_training(tr_label_file, theta, C, M)
    ccr_t = funcs.classifier_test(t_label_file, tpc_lbl_distn, theta_test)

    ## save useful stuff
    # results file
    refile = open('results.dat', 'a')
    refile.write(
        str(M) + ', ' + str(bic) + ', ' + str(Etrlk) + ', ' + str(avg_tpcs) +
        ', ')
예제 #3
0
파일: PyRun.py 프로젝트: neoql/ATD
    (avg_tpcs, avg_wrds, unq_wrds) = funcs.topic_word_sparsity(path+'/word-assignments.dat',N,M,uswitch)
    (uv_avg_tpcs, uv_avg_wrds, uv_unq_wrds) = funcs.switch_topic_word_sparsity(uswitch,vswitch,N,M)
    
    # read training likelihood
    lk = np.loadtxt(path+'/likelihood.dat')
    bic = lk[-1,0]
    lkh = lk[-1,1]
    runtime = lk[-1,3]

    # inference on test set
    cmdtxt = './ptm --task test' + ' --corpus ' + testfile + ' --convergence 1e-4'
    cmdtxt = cmdtxt + ' --dir ' + path + ' --model ' + path + '/final' 
    os.system(cmdtxt) 
      
    ## compute likelihood on held-out set
    Etrlk = funcs.compute_lkh(trainingfile, beta[:,1:M+1], theta)

    ### read test topic proportions
    theta_test = np.loadtxt(path+'/test.alpha')
    
    ## measure class label consistency
    (ccr_tr,tpc_lbl_distn) = funcs.classifier_training(tr_label_file,theta,C,M)
    ccr_t = funcs.classifier_test(t_label_file,tpc_lbl_distn,theta_test)
    
    ## inference on observed test set
    cmdtxt = './ptm --task test' + ' --corpus ' + obs_tfile + ' --convergence 1e-4'
    cmdtxt = cmdtxt + ' --dir ' + path + ' --model ' + path + '/final' 
    os.system(cmdtxt) 
    
    ## topic proportions of the observed set
    theta_test = np.loadtxt(path+'/test.alpha')
예제 #4
0
파일: PyRun.py 프로젝트: hsoleimani/ATD
    (uv_avg_tpcs, uv_avg_wrds, uv_unq_wrds) = funcs.switch_topic_word_sparsity(uswitch,vswitch,N,M)
    
    # read training likelihood
    lk = np.loadtxt(path+'/likelihood.dat')
    bic = lk[-1,0]
    lkh = lk[-1,1]
    runtime = lk[-1,3]

    # inference on test set
    seed = np.random.randint(seed0)
    cmdtxt = CODEPATH + '/ptm --task test' + ' --corpus ' + testfile + ' --convergence 1e-4 --seed '+str(seed)
    cmdtxt = cmdtxt + ' --dir ' + path + ' --model ' + path + '/final' 
    os.system(cmdtxt) 
      
    ## compute likelihood on training set
    Etrlk = funcs.compute_lkh(trainingfile, beta[:,1:M+1], theta)

    ### read test topic proportions
    theta_test = np.loadtxt(path+'/test.alpha')
    
    ## measure class label consistency
    (ccr_tr,tpc_lbl_distn) = funcs.classifier_training(tr_label_file,theta,C,M)
    ccr_t = funcs.classifier_test(t_label_file,tpc_lbl_distn,theta_test)
    
    
    ## save useful stuff
    # results file
    refile = open('results.dat', 'a')
    refile.write(str(M) + ', ' + str(bic) + ', ' + str(Etrlk) + ', ' + str(avg_tpcs) + ', ')
    refile.write(str(np.mean(avg_wrds)) + ', ' + str(np.sum(unq_wrds)) + ', ' + str(ccr_tr) + ', ' + str(ccr_t) + ', ')
    refile.write(str(runtime)+', '+str(uv_avg_tpcs) + ', ' + str(np.mean(uv_avg_wrds)) + ', ' + str(np.sum(uv_unq_wrds)) + '\n')