Exemple #1
0
ARTICLE_NAME = ["Computer","Entertainment","Sports","Science","Economy","World",'Politics','Society']
ARTICLE = {"Computer":"","Entertainment":"","Sports":"","Science":"","Economy":"","World":"","Politics":"","Society":""}
ret=[]
data_train = [[] for row in range(8)]
train_num = 0
train_sum = 0


for n in ARTICLE_NAME:
    data_range = countline('/Users/Soma/Onedrive/News_Dataset/article'+n+'.txt')
    train_sum += data_range
    for col in range(data_range):
        data_train[train_num].append(None)
    train_num += 1

label_train = [None for col in range(train_sum)]
article_data = [None for col in range(train_sum)]
num =0
num3 =0
dictionary = corpora.Dictionary.load_from_text('test_dic4.txt')

M.pp(M.isMecab("香川ループ弾「衝撃」と賞賛"))
test=[[]for col in range(6)]
test[0].append("a")
test[0].append("b")
test[0].append("c")
test[1].append("d")
test[1].append("e")
print (test)
        "Sports": "",
        "Science": "",
        "Economy": "",
        "World": "",
        "Politics": "",
        "Society": "",
    }
    dictionary = corpora.Dictionary.load_from_text("noun_dic.txt")

    for n in ARTICLE_NAME:
        print "\n" + n + " LDA modl cleate..\n"
        f = codecs.open("/Users/Soma/Onedrive/News_Dataset/article" + n + ".txt", "r")
        ARTICLE[n] = f.readlines()
        f.close()
        data_train = [dictionary.doc2bow(M.isMecab(j)) for j in ARTICLE[n]]
        tfidf_corpus = gensim.corpora.MmCorpus("news_noun_" + n + ".mm")
        lda = models.LdaModel(corpus=tfidf_corpus, id2word=dictionary, num_topics=30)
        lda.save("model_" + n + ".lda")


if __name__ == "__main__":
    print "cleate_dic.."
    # CD.cleate_dic()
    cleate_lda_model()
    lda = models.LdaModel.load("model_Sports.lda")
    lda2 = models.LdaModel.load("model_Computer.lda")
    for n in range(30):
        M.pp(lda.print_topics(n + 1))
    for n in range(30):
        M.pp(lda2.print_topics(n + 1))