예제 #1
0
def test_code_words():
    dal = ReviewsDAL()
    wordcoder = WordCoder()

    for index,r in enumerate(dal.sampling(5)):
        print "****************** [{}]".format(index+1)
        print r.sent.raw
        print "\n"
        print wordcoder.code(r.sent.words)
예제 #2
0
    coded_words = wordcoder.code(sentence.words)
    bow = dictionary.doc2bow(coded_words)

    topic_distribution = lda_model[bow]
    topic_distribution.sort(key=lambda t: t[1], reverse=True)

    tags = None
    for index, (topic_id, topic_percentage) in enumerate(topic_distribution):
        mt = MixTopic(topic_mapping[topic_id])
        mt.weight(topic_percentage)

        if tags is None:
            tags = mt
        else:
            tags.add(mt)

    tags.normalize()
    print tags

if __name__ == "__main__":
    dal = ReviewsDAL()
    review_stream = dal.sampling(10)

    for index,review in enumerate( review_stream):
        print "*********** [{}] ***********".format(index+1)

        for sentence in sent_tokenizer.tokenize(review.sent.raw):
            print_topics(sentence)

    dal.close()