path_test='20news-bydate-test' #perprocess the test data test,test_label=ps.data_perprocess(path_test) #get test word-document matrix corpus_test=ps.corpus(dictionary,test) test=ps.word_document(corpus_test,token_id) #calculate ppl for test #load the rsm_1 model from disk result=dsl.load('result/rsm_result_5') w_vh=result['w_vh'] w_v=result['w_v'] w_h=result['w_h'] # return the perplexity which is to assess the topic model Eppl_CD5=ppl.rsmppl(w_v,w_h,w_vh,test) print("Eppl_CD5=",Eppl_CD5) #load the rsm_1 model from disk result=dsl.load('result/rsm_result_1') w_vh=result['w_vh'] w_v=result['w_v'] w_h=result['w_h'] Eppl_CD1=ppl.rsmppl(w_v,w_h,w_vh,test) print("Eppl_CD1=",Eppl_CD1) dsl.save(text,'result/text') dsl.save(train_label,'result/train_label') dsl.save(dictionary,'result/dictionary') dsl.save(token_id,'result/token_id')
path_test = '20news-bydate-test' #perprocess the test data test, test_label = ps.data_perprocess(path_test) #get test word-document matrix corpus_test = ps.corpus(dictionary, test) test = ps.word_document(corpus_test, token_id) #calculate ppl for test #load the rsm_1 model from disk result = dsl.load('result/rsm_result_5') w_vh = result['w_vh'] w_v = result['w_v'] w_h = result['w_h'] # return the perplexity which is to assess the topic model Eppl_CD5 = ppl.rsmppl(w_v, w_h, w_vh, test) print("Eppl_CD5=", Eppl_CD5) #load the rsm_1 model from disk result = dsl.load('result/rsm_result_1') w_vh = result['w_vh'] w_v = result['w_v'] w_h = result['w_h'] Eppl_CD1 = ppl.rsmppl(w_v, w_h, w_vh, test) print("Eppl_CD1=", Eppl_CD1) dsl.save(text, 'result/text') dsl.save(train_label, 'result/train_label') dsl.save(dictionary, 'result/dictionary') dsl.save(token_id, 'result/token_id') dsl.save(train, 'result/train')
ppl_lda.append(pplt) ppl_lda = np.array(ppl_lda) dsl.save(ppl_lda, 'result/ppl_lda') #load the rsm_5 model from disk result = dsl.load('result/rsm_result_5') w_vh = result['w_vh'] w_v = result['w_v'] w_h = result['w_h'] #calculate the ppl of rsm_5 model ppl_rsm_5 = [] for i in xrange(sample): test_sample = test[sample_id[i]] pplt = ppl.rsmppl(w_v, w_h, w_vh, test_sample) ppl_rsm_5.append(pplt) ppl_rsm_5 = np.array(ppl_rsm_5) dsl.save(ppl_lda, 'result/ppl_rsm_5') #load the rsm_1 model from disk result = dsl.load('result/rsm_result_1') w_vh = result['w_vh'] w_v = result['w_v'] w_h = result['w_h'] #calculate the ppl of rsm_1 model ppl_rsm_1 = [] for i in xrange(sample): test_sample = test[sample_id[i]]
ppl_lda.append(pplt) ppl_lda=np.array(ppl_lda) dsl.save(ppl_lda,'result/ppl_lda') #load the rsm_5 model from disk result=dsl.load('result/rsm_result_5') w_vh=result['w_vh'] w_v=result['w_v'] w_h=result['w_h'] #calculate the ppl of rsm_5 model ppl_rsm_5=[] for i in xrange(sample): test_sample=test[sample_id[i]] pplt=ppl.rsmppl(w_v,w_h,w_vh,test_sample) ppl_rsm_5.append(pplt) ppl_rsm_5=np.array(ppl_rsm_5) dsl.save(ppl_lda,'result/ppl_rsm_5') #load the rsm_1 model from disk result=dsl.load('result/rsm_result_1') w_vh=result['w_vh'] w_v=result['w_v'] w_h=result['w_h'] #calculate the ppl of rsm_1 model ppl_rsm_1=[]