dictionary,token_id=ps.dictionary_count(text_high) corpus=ps.corpus(dictionary,text) train=ps.word_document(corpus,token_id) # hyperparameters hiddens = 50 batch = 100 epochs = 1000 rate = 0.0001 iter=1 #train the RSM model by iter=1 RSM = rsm_numpy.RSM() result = RSM.train(train, hiddens, epochs, iter, lr=rate, btsz=batch) #save the result of the RSM_CD1 dsl.save(result,'result/rsm_result_1') #set iterations=5, i.e., CD-5 iter=5 #train the RSM model by iter=5 RSM = rsm_numpy.RSM() result = RSM.train(train, hiddens, epochs, iter, lr=rate, btsz=batch) dsl.save(result,'result/rsm_result_5') #path of test data path_test='20news-bydate-test' #perprocess the test data test,test_label=ps.data_perprocess(path_test)
dictionary, token_id = ps.dictionary_count(text_high) corpus = ps.corpus(dictionary, text) train = ps.word_document(corpus, token_id) # hyperparameters hiddens = 50 batch = 100 epochs = 1000 rate = 0.0001 iter = 1 #train the RSM model by iter=1 RSM = rsm_numpy.RSM() result = RSM.train(train, hiddens, epochs, iter, lr=rate, btsz=batch) #save the result of the RSM_CD1 dsl.save(result, 'result/rsm_result_1') #set iterations=5, i.e., CD-5 iter = 5 #train the RSM model by iter=5 RSM = rsm_numpy.RSM() result = RSM.train(train, hiddens, epochs, iter, lr=rate, btsz=batch) dsl.save(result, 'result/rsm_result_5') #path of test data path_test = '20news-bydate-test' #perprocess the test data test, test_label = ps.data_perprocess(path_test) #get test word-document matrix
# When use LDA we need change the matrix type as int64 train = np.int64(train) test = np.int64(test) ''' Experiment1: perplexity of LDA and RSM ''' #train the LDA model print("-------------------LDA GET Training--------------------") model = lda.LDA(n_topics=50, n_iter=2000, random_state=1) model.fit(train) #get the topic_word distribution and doc_topic distribution. topic_word = model.components_ doc_topic = model.doc_topic_ #save the these data dsl.save(topic_word, 'result/topic_word') dsl.save(model, 'result/lda_model') dsl.save(doc_topic, 'result/doc_topic') print("-------------------LDA Model Has Been Saved--------------------") #sample the held document from the test data sample = 50 sample_id = np.random.randint(test.shape[0], size=(50, sample)) dsl.save(sample_id, 'result/sample_id') #Since the doc-topic distribution is different for each document, we need to #calculate it for each test document #calculte the ppl of lda model ppl_lda = [] for i in xrange(sample):
train=np.int64(train) test=np.int64(test) ''' Experiment1: perplexity of LDA and RSM ''' #train the LDA model print("-------------------LDA GET Training--------------------") model=lda.LDA(n_topics=50,n_iter=2000,random_state=1) model.fit(train) #get the topic_word distribution and doc_topic distribution. topic_word=model.components_ doc_topic=model.doc_topic_ #save the these data dsl.save(topic_word,'result/topic_word') dsl.save(model,'result/lda_model') dsl.save(doc_topic,'result/doc_topic') print("-------------------LDA Model Has Been Saved--------------------") #sample the held document from the test data sample=50 sample_id=np.random.randint(test.shape[0],size=(50,sample)) dsl.save(sample_id,'result/sample_id') #Since the doc-topic distribution is different for each document, we need to #calculate it for each test document #calculte the ppl of lda model ppl_lda=[]