예제 #1
0
df.set_index(["term_sort", "topic_n"], inplace=True)
df = df.unstack()
# ----+ sidewaystable
df_h = pd.DataFrame()
for i in range(8):
    terms = df["term"][i]
    weights = df["weight"][i]
    weights = pd.Series(["( %s )" % j for j in weights])
    df_h = pd.concat([df_h, terms, weights], axis=1)
# ----+ write data to file
out_f = os.path.join(
    "scripts", "analysis", "topicModeling", ".output", "8t_term_topic.tex"
)
df_h.to_latex(out_f, index=True)
# --+ get transformed corpus as per the lda model
transf_corpus = lda_8.get_document_topics(corpus)
# ----+ rearrange data on document-topic pairs probabilities
doc_topic_m = []
for id, doc in enumerate(transf_corpus):
    for topic in doc:
        topic_n = topic[0]
        topic_prob = topic[1]
        doc_topic_m.append([id, topic_n, topic_prob])  # , topic_prob])
# ----+ get a df
df = pd.DataFrame(doc_topic_m)
# ----+ rename columns
old_names = [0, 1, 2]
new_names = ["doc_id", "topic_n", "prob"]
cols = dict(zip(old_names, new_names))
df.rename(columns=cols, inplace=True)
# ----+ dominant topic
예제 #2
0
df.rename(columns=cols, inplace=True)
df.set_index(['term_sort', 'topic_n'], inplace=True)
df = df.unstack()
# ----+ sidewaystable
df_h = pd.DataFrame()
for i in range(9):
    terms = df['term'][i]
    weights = df['weight'][i]
    weights = pd.Series(['( %s )' % j for j in weights ])
    df_h = pd.concat([df_h, terms, weights], axis=1)
# ----+ write data to file
out_f = os.path.join('analysis', 'topicModeling',
                     '.output', '9t_term_topic.tex')
df_h.to_latex(out_f, index=True)
# --+ get transformed corpus as per the lda model
transf_corpus = lda_9.get_document_topics(corpus)
# ----+ rearrange data on document-topic pairs probabilities
doc_topic_m = []
for id, doc in enumerate(transf_corpus):
    for topic in doc:
        topic_n = topic[0]
        topic_prob = topic[1]
        doc_topic_m.append([id, topic_n, topic_prob]) #, topic_prob])
# ----+ get a df
df = pd.DataFrame(doc_topic_m)
# ----+ rename columns
old_names = [0, 1, 2]
new_names = ['doc_id', 'topic_n', 'prob']
cols = dict(zip(old_names, new_names))
df.rename(columns=cols, inplace=True)
# ----+ dominant topic