Beispiel #1
0
 def get_keywords(self, pageText, count):
         mytfidf = TFIDF()
         tokenPageText = getTokenizedDocs([pageText])
         token_bow = [mytfidf.doc2bow(doc) for doc in tokenPageText]
         mytfidf.buildVocabIndex(token_bow)
         selected = mytfidf.selectImportantWords_tf(count)
         wordsList = mytfidf.index.keys()
         selected_words = [wordsList[k[1]] for k in selected]
         return selected_words