df.set_index(["term_sort", "topic_n"], inplace=True) df = df.unstack() # ----+ sidewaystable df_h = pd.DataFrame() for i in range(8): terms = df["term"][i] weights = df["weight"][i] weights = pd.Series(["( %s )" % j for j in weights]) df_h = pd.concat([df_h, terms, weights], axis=1) # ----+ write data to file out_f = os.path.join( "scripts", "analysis", "topicModeling", ".output", "8t_term_topic.tex" ) df_h.to_latex(out_f, index=True) # --+ get transformed corpus as per the lda model transf_corpus = lda_8.get_document_topics(corpus) # ----+ rearrange data on document-topic pairs probabilities doc_topic_m = [] for id, doc in enumerate(transf_corpus): for topic in doc: topic_n = topic[0] topic_prob = topic[1] doc_topic_m.append([id, topic_n, topic_prob]) # , topic_prob]) # ----+ get a df df = pd.DataFrame(doc_topic_m) # ----+ rename columns old_names = [0, 1, 2] new_names = ["doc_id", "topic_n", "prob"] cols = dict(zip(old_names, new_names)) df.rename(columns=cols, inplace=True) # ----+ dominant topic
df.rename(columns=cols, inplace=True) df.set_index(['term_sort', 'topic_n'], inplace=True) df = df.unstack() # ----+ sidewaystable df_h = pd.DataFrame() for i in range(9): terms = df['term'][i] weights = df['weight'][i] weights = pd.Series(['( %s )' % j for j in weights ]) df_h = pd.concat([df_h, terms, weights], axis=1) # ----+ write data to file out_f = os.path.join('analysis', 'topicModeling', '.output', '9t_term_topic.tex') df_h.to_latex(out_f, index=True) # --+ get transformed corpus as per the lda model transf_corpus = lda_9.get_document_topics(corpus) # ----+ rearrange data on document-topic pairs probabilities doc_topic_m = [] for id, doc in enumerate(transf_corpus): for topic in doc: topic_n = topic[0] topic_prob = topic[1] doc_topic_m.append([id, topic_n, topic_prob]) #, topic_prob]) # ----+ get a df df = pd.DataFrame(doc_topic_m) # ----+ rename columns old_names = [0, 1, 2] new_names = ['doc_id', 'topic_n', 'prob'] cols = dict(zip(old_names, new_names)) df.rename(columns=cols, inplace=True) # ----+ dominant topic