Esempio n. 1
0
        print("Topic %d:" % (topic_idx))
        print(" ".join([
            tf_feature_names[i]
            for i in topic.argsort()[:-args.n_top_words - 1:-1]
        ]))
        for i in topic.argsort()[:-args.n_top_words - 1:-1]:
            print("%.4f" % (topic[i] / sum(topic)), end=' ')
        print()

else:
    word2id = {}
    for idx, word in enumerate(tf_feature_names):
        word2id[word] = idx

    docs = {}
    for doc_idx, doc in enumerate(corpus):
        doc_list = [word2id[w] for w in doc.split() if w in word2id.keys()]
        docs[doc_idx] = doc_list

    print("Begin training.")
    model = LDA_GIBBS(docs, args.topic, args.iter, tf_feature_names)
    model.learn()
    print(model.get_perplexity())
    for t_id, t in enumerate(model.phi):
        print("Topic %d:" % (t_id))
        print(" ".join([
            tf_feature_names[i] for i in t.argsort()[:-args.n_top_words - 1:-1]
        ]))
        for i in t.argsort()[:-args.n_top_words - 1:-1]:
            print("%.4f" % t[i], end=' ')
        print()