def test_code_words(): dal = ReviewsDAL() wordcoder = WordCoder() for index,r in enumerate(dal.sampling(5)): print "****************** [{}]".format(index+1) print r.sent.raw print "\n" print wordcoder.code(r.sent.words)
coded_words = wordcoder.code(sentence.words) bow = dictionary.doc2bow(coded_words) topic_distribution = lda_model[bow] topic_distribution.sort(key=lambda t: t[1], reverse=True) tags = None for index, (topic_id, topic_percentage) in enumerate(topic_distribution): mt = MixTopic(topic_mapping[topic_id]) mt.weight(topic_percentage) if tags is None: tags = mt else: tags.add(mt) tags.normalize() print tags if __name__ == "__main__": dal = ReviewsDAL() review_stream = dal.sampling(10) for index,review in enumerate( review_stream): print "*********** [{}] ***********".format(index+1) for sentence in sent_tokenizer.tokenize(review.sent.raw): print_topics(sentence) dal.close()