def q04_topic_modelling(path, n_top_words=20):
    v1, v2 = q02_count_vectorizer_for_LDA(path)
    v3 = q03_LDA(path)
    message = []
    for topic_idx, topic in enumerate(v3):
        message = [v2[i] for i in topic.argsort()[:-n_top_words:-1]]
    return message
Example #2
0
def q03_LDA(path):

    matrix, feature_names = q02_count_vectorizer_for_LDA(path)
#     vocab = feature_names
    model =LatentDirichletAllocation(n_topics=20, random_state=1,learning_method='batch',max_iter=500)
    model.fit(matrix)
    topic_word = model.components_
    return topic_word
Example #3
0
def q04_topic_modelling(path, n_top_words=20):
    v1, v2 = q02_count_vectorizer_for_LDA(path)
    v3 = q03_LDA(path)
    message = list()
    for topic_idx, topic in enumerate(v3):
        topic = 'Topic ' + str(topic_idx) + ': ' + ' '.join(
            [v2[i] for i in topic.argsort()[:-n_top_words:-1]])
        message.append(topic)
    return message
Example #4
0
def q04_topic_modelling(path, n_top_words=20):
    topic_word = q03_LDA(path)
    topics = []
    for i, topic_dist in enumerate(topic_word):
        matrix, feature_names = q02_count_vectorizer_for_LDA(path)
        topic_words = np.array(feature_names)[np.argsort(
            topic_dist)][:-n_top_words:-1]
        func = ('Topic {}: {}'.format(i, ' '.join(topic_words)))
        topics.append(func)
    return topics
Example #5
0
def q03_LDA(path):
    tf,v2=q02_count_vectorizer_for_LDA(path)
    
    lda = LatentDirichletAllocation(n_components=20, max_iter=500,learning_method='batch',random_state=1)
    lda.fit(tf)
    return lda.components_