qdata_K10 = np.loadtxt('data/kos_q_a1p0_b0p5_K10.dat') ndata_K19 = np.loadtxt('data/kos_n_a1p0_b0p5_K19.dat') qdata_K19 = np.loadtxt('data/kos_q_a1p0_b0p5_K19.dat') # import the doc,vocab indices kosdata = np.loadtxt('data/kosdata.dat') doc_idx = kosdata[:,0] voc_idx = kosdata[:,1] f = open('data/vocab.kos.txt', 'r') vocabulary = f.readlines() f.close() V = 6906 beta = 0.5 * np.ones(V) phiK2 = ml.calc_phi(qdata_K2,beta,voc_idx,V) phiK3 = ml.calc_phi(qdata_K3,beta,voc_idx,V) phiK10 = ml.calc_phi(qdata_K10,beta,voc_idx,V) phiK19 = ml.calc_phi(qdata_K19,beta,voc_idx,V) # get the top 20 words and their probabilities topK2 = np.sort(phiK2,axis=1)[:,-20:][:,::-1] topK2_ind = np.argsort(phiK2,axis=1)[:,-20:][:,::-1] topK3 = np.sort(phiK3,axis=1)[:,-20:][:,::-1] topK3_ind = np.argsort(phiK3,axis=1)[:,-20:][:,::-1] topK10 = np.sort(phiK10,axis=1)[:,-20:][:,::-1] topK10_ind = np.argsort(phiK10,axis=1)[:,-20:][:,::-1] topK19 = np.sort(phiK19,axis=1)[:,-20:][:,::-1] topK19_ind = np.argsort(phiK19,axis=1)[:,-20:][:,::-1] # now print the top words and their probabilities to file
ndata_b1p0 = np.loadtxt("data/c400_n_a1p0_b1p0_K3.dat") qdata_b1p0 = np.loadtxt("data/c400_q_a1p0_b1p0_K3.dat") ndata_b10p0 = np.loadtxt("data/c400_n_a1p0_b10p0_K3.dat") qdata_b10p0 = np.loadtxt("data/c400_q_a1p0_b10p0_K3.dat") # import doc,vocab indices c400data = scipy.io.loadmat("data/classic400.mat") doc_idx, voc_idx = c400data["classic400"].nonzero() K = 3 V = 6205 beta0p1 = 0.1 * np.ones(V) beta1p0 = 1.0 * np.ones(V) beta10p0 = 10.0 * np.ones(V) phi0p1 = ml.calc_phi(qdata_b0p1, beta0p1, voc_idx, V) phi1p0 = ml.calc_phi(qdata_b1p0, beta1p0, voc_idx, V) phi10p0 = ml.calc_phi(qdata_b10p0, beta10p0, voc_idx, V) # get the top 10 words top1 = np.sort(phi0p1[0])[-10:][::-1] print top1 # find max probabilities phimax0p1 = np.max(phi0p1, axis=1) phimax1p0 = np.max(phi1p0, axis=1) phimax10p0 = np.max(phi10p0, axis=1) print phimax0p1 print phimax1p0 print phimax10p0
c400data = scipy.io.loadmat('data/classic400.mat') doc_idx,voc_idx = c400data['classic400'].nonzero() # import vocabulary vocabulary = [] for i,entry in enumerate(c400data['classicwordlist']): vocabulary.append(entry[0][0]) K = 3 V = 6205 beta1 = 10000.0 * np.ones(V) beta2 = 0.1 * np.ones(V) beta3 = 0.1 * np.ones(V) beta4 = 0.1 * np.ones(V) beta5 = 100.0 * np.ones(V) phi1 = ml.calc_phi(qdata_1,beta1,voc_idx,V) phi2 = ml.calc_phi(qdata_2,beta2,voc_idx,V) phi3 = ml.calc_phi(qdata_3,beta3,voc_idx,V) phi4 = ml.calc_phi(qdata_4,beta4,voc_idx,V) phi5 = ml.calc_phi(qdata_5,beta5,voc_idx,V) # get the top 20 words top1 = np.sort(phi1,axis=1)[:,-20:][:,::-1] top1_ind = np.argsort(phi1,axis=1)[:,-20:][:,::-1] top2 = np.sort(phi2,axis=1)[:,-20:][:,::-1] top2_ind = np.argsort(phi2,axis=1)[:,-20:][:,::-1] top3 = np.sort(phi3,axis=1)[:,-20:][:,::-1] top3_ind = np.argsort(phi3,axis=1)[:,-20:][:,::-1] top4 = np.sort(phi4,axis=1)[:,-20:][:,::-1] top4_ind = np.argsort(phi4,axis=1)[:,-20:][:,::-1] top5 = np.sort(phi5,axis=1)[:,-20:][:,::-1]