dictionary,token_id=ps.dictionary_count(text_high)
corpus=ps.corpus(dictionary,text)
train=ps.word_document(corpus,token_id)

# hyperparameters
hiddens = 50
batch = 100
epochs = 1000
rate = 0.0001
iter=1

#train the RSM model by iter=1
RSM = rsm_numpy.RSM()
result = RSM.train(train, hiddens, epochs, iter, lr=rate, btsz=batch)
#save the result of the RSM_CD1
dsl.save(result,'result/rsm_result_1')


#set iterations=5, i.e., CD-5
iter=5

#train the RSM model by iter=5
RSM = rsm_numpy.RSM()
result = RSM.train(train, hiddens, epochs, iter, lr=rate, btsz=batch)
dsl.save(result,'result/rsm_result_5')

#path of test data
path_test='20news-bydate-test'

#perprocess the test data
test,test_label=ps.data_perprocess(path_test)
dictionary, token_id = ps.dictionary_count(text_high)
corpus = ps.corpus(dictionary, text)
train = ps.word_document(corpus, token_id)

# hyperparameters
hiddens = 50
batch = 100
epochs = 1000
rate = 0.0001
iter = 1

#train the RSM model by iter=1
RSM = rsm_numpy.RSM()
result = RSM.train(train, hiddens, epochs, iter, lr=rate, btsz=batch)
#save the result of the RSM_CD1
dsl.save(result, 'result/rsm_result_1')

#set iterations=5, i.e., CD-5
iter = 5

#train the RSM model by iter=5
RSM = rsm_numpy.RSM()
result = RSM.train(train, hiddens, epochs, iter, lr=rate, btsz=batch)
dsl.save(result, 'result/rsm_result_5')

#path of test data
path_test = '20news-bydate-test'

#perprocess the test data
test, test_label = ps.data_perprocess(path_test)
#get test word-document matrix
예제 #3
0
# When use LDA we need change the matrix type as int64
train = np.int64(train)
test = np.int64(test)
'''
Experiment1: perplexity of LDA and RSM
'''

#train the LDA model
print("-------------------LDA GET Training--------------------")
model = lda.LDA(n_topics=50, n_iter=2000, random_state=1)
model.fit(train)
#get the topic_word distribution and doc_topic distribution.
topic_word = model.components_
doc_topic = model.doc_topic_
#save the these data
dsl.save(topic_word, 'result/topic_word')
dsl.save(model, 'result/lda_model')
dsl.save(doc_topic, 'result/doc_topic')
print("-------------------LDA Model Has Been Saved--------------------")

#sample the held document from the test data
sample = 50
sample_id = np.random.randint(test.shape[0], size=(50, sample))
dsl.save(sample_id, 'result/sample_id')

#Since the doc-topic distribution is different for each document, we need to
#calculate it for each test document

#calculte the ppl of lda model
ppl_lda = []
for i in xrange(sample):
train=np.int64(train)
test=np.int64(test)

'''
Experiment1: perplexity of LDA and RSM
'''

#train the LDA model
print("-------------------LDA GET Training--------------------")
model=lda.LDA(n_topics=50,n_iter=2000,random_state=1)
model.fit(train)
#get the topic_word distribution and doc_topic distribution.
topic_word=model.components_
doc_topic=model.doc_topic_
#save the these data
dsl.save(topic_word,'result/topic_word')
dsl.save(model,'result/lda_model')
dsl.save(doc_topic,'result/doc_topic')
print("-------------------LDA Model Has Been Saved--------------------")

#sample the held document from the test data
sample=50
sample_id=np.random.randint(test.shape[0],size=(50,sample))
dsl.save(sample_id,'result/sample_id')


#Since the doc-topic distribution is different for each document, we need to 
#calculate it for each test document

#calculte the ppl of lda model
ppl_lda=[]