test_doc2 = 'Material and physics'
test_doc2 = custom_preprocess(test_doc2)
test_doc_bow2 = journals_dictionary.doc2bow(test_doc2)
print(test_doc_bow2)

print(lsi[test_doc_bow2])

lsi_cm=CoherenceModel(model=lsi,corpus=journals_corpus,dictionary=journals_dictionary,texts= journals['Full title'],coherence='c_v')
LSI_cm=lsi_cm.get_coherence()
LSI_cm

from gensim.models.hdpmodel import HdpModel

hdp = HdpModel(corpus=journals_corpus,id2word=journals_dictionary)

hdp_topics = hdp.print_topics()
for topic in hdp_topics:
  print(topic)

test_doc = 'Journal of medicines and herbs'
test_doc = custom_preprocess(test_doc)
test_doc_bow = journals_dictionary.doc2bow(test_doc)
print(test_doc_bow)

print(hdp[test_doc_bow])

test_doc2 = 'Material and physics'
test_doc2 = custom_preprocess(test_doc2)
test_doc_bow2 = journals_dictionary.doc2bow(test_doc2)
print(test_doc_bow2)
Ejemplo n.º 2
0
kfolds=10
kf = cross_validation.KFold(count1, n_folds=kfolds)
for li in f:
    li=li.split()
    corpora_documents.append(li)
for la in f2:
    la=la.split()
    label_level.append(la)
corpora_documents=array(corpora_documents)
label_level=array(label_level)

#生成字典和向量语料
dictionary = corpora.Dictionary(corpora_documents)
#dictionary.save('dictionary.dict')
corpus = [dictionary.doc2bow(text) for text in corpora_documents]
tfidf=models.TfidfModel(corpus)
corpus_tfidf=tfidf[corpus]

hdp=HdpModel(corpus_tfidf,id2word=dictionary)
corpus_hdp=hdp[corpus_tfidf]
index=similarities.MatrixSimilarity(corpus_hdp)

print(hdp.print_topics(num_topics=20, num_words=10))







Ejemplo n.º 3
0
def fit_model(corpus,id2word,num_topics=20):
    # 训练模型
    hdp = HdpModel(corpus=corpus, id2word=id2word)
    hdp.print_topics(num_topics)
    return hdp