Beispiel #1
0
 def visualize_topics(self, ):
     topics = self.get_topics()
     visualize_semantic_netwrok(topics,[],
                                visualize_method='plotly',
                                filename="../outputs/lsi_out.html",
                                title='Latent Semantic Indexing')
lda = LDAModel(config)
num_topics = sum(lda.get_theta_matrix()[doc_id, :] != 0)
###########visualize

lda_config_file = "/home/rohola/codes/topical_language_generation/configs/generated_fake_alexa_lda_config.json"

config = LDAConfig.from_json_file(lda_config_file)
config.num_topics = num_topics

##save the generate text to disk
if not os.path.isdir(config.dataset_dir):
    os.mkdir(config.dataset_dir)
with open(os.path.join(config.dataset_dir, "generated_text.txt"),
          'w') as file_writer:
    file_writer.write(all_text)

lda = LDAModel(config, build=True)
all_topic_tokens = lda.get_all_topic_tokens(num_words=15)

# clean up words
topic_words = [[(t[0].strip('Ġ'), t[1]) for t in tw]
               for tw in all_topic_tokens]

for tw in topic_words:
    print(topic_words)

plot_config = PlotConfig.from_json_file("configs/lda_plot_config.json")

fig = visualize_semantic_netwrok(plot_config, topic_words)
Beispiel #3
0
state = np.random.RandomState(random_seed)
lda = LdaModel(corpus=corpus,
               id2word=dictionary,
               num_topics=config.num_topics,
               random_state=state,
               update_every=1,
               passes=10,
               alpha=config.alpha,
               eta='auto')

topic_words = lda.show_topics(config.num_topics_to_show,
                              num_words=config.num_words,
                              formatted=False)
topic_words = [j for (i, j) in topic_words]

for topic in topic_words:
    for word, p in topic:
        print(word)
    print('\n')

visualize_method = ""
if config.dimension == 2:
    visualize_method = 'plotly'
elif config.dimension == 3:
    visualize_method = 'plotly3d'
else:
    raise ("Wrong dimension, can accept only 2 or 3")

topic_modeling_semantic_network.visualize_semantic_netwrok(
    config, topic_words, visualize_method=visualize_method)
#config_file = "configs/alexa_lda_config.json"
#config_file = "configs/nytimes_lda_config.json"
#config_file = "configs/anes_lda_config.json"
config_file = "configs/congress_lda_config.json"

lda = LDAModel(config_file=config_file)

lda._start()

all_topic_tokens = lda.get_all_topic_tokens(num_words=15)

#clean up words
topic_words = [[(t[0].strip('Ġ'), t[1]) for t in tw]
               for tw in all_topic_tokens]
for topic in topic_words:
    print(topic)


#todo remove dataclass and replace it with VisualizationConfig class
@dataclass
class config:
    dimension: int = 2
    threshold: float = 0.00001
    node_size: float = 30
    color_scale: str = "Viridis"
    title: str = "LDA"
    out_file_name: str = lda.config.cached_dir + "/lda_viz.html"


visualize_semantic_netwrok(config, topic_words)
Beispiel #5
0
            session_state.config.num_topics = session_state.num_topics
            session_state.config.alpha = session_state.alpha
            lda = LDAModel(session_state.config, build=True)
            all_topic_tokens = lda.get_all_topic_tokens(num_words=15)

            a = lda.get_psi_matrix()
            print("first time", a.max())

            # clean up words
            session_state.topic_words = [[(t[0].strip('Ġ'), t[1]) for t in tw]
                                         for tw in all_topic_tokens]
            plot_config = PlotConfig.from_json_file(
                "configs/lda_plot_config.json")

            fig = visualize_semantic_netwrok(plot_config,
                                             session_state.topic_words,
                                             auto_open=False)
            st.plotly_chart(fig)
            session_state.fig = fig
        elif session_state.topic_model == "lsi":
            session_state.config = get_draft_config(session_state.topic_model,
                                                    session_state.dataset)
            session_state.config.num_topics = session_state.num_topics
            lsi = LSIModel(session_state.config, build=True)

            tw = lsi.get_topic_words(num_words=10)
            topic_words = [t[1] for t in tw]
            # clean up words
            session_state.topic_words = [[(t[0].strip('Ġ'), t[1]) for t in tw]
                                         for tw in topic_words]
Beispiel #6
0
 def visualize_topics(self, ):
     topics, word_based_on_topic = self.get_topics()
     visualize_semantic_netwrok(topics, word_based_on_topic,  visualize_method='plotly',
                                    filename="../outputs/lda_out.html",
                                    title="Latent Dirichlet Analysis")