Esempio n. 1
0
 def plot_arun_metric(self, min_num_topics=10, max_num_topics=50, iterations=10):
     symmetric_kl_divergence = self.topic_model.arun_metric(min_num_topics, max_num_topics, iterations)
     plt.clf()
     plt.plot(range(min_num_topics, max_num_topics + 1), symmetric_kl_divergence)
     plt.title("Arun et al. metric")
     plt.xlabel("number of topics")
     plt.ylabel("symmetric KL divergence")
     plt.savefig("output/arun.png")
     save_topic_number_metrics_data(
         "output/arun.tsv", range_=(min_num_topics, max_num_topics), data=symmetric_kl_divergence, metric_type="arun"
     )
Esempio n. 2
0
 def plot_greene_metric(self, min_num_topics=10, max_num_topics=20, tao=10, step=5, top_n_words=10):
     greene_stability = self.topic_model.greene_metric(
         min_num_topics=min_num_topics, max_num_topics=max_num_topics, step=step, top_n_words=top_n_words, tao=tao
     )
     plt.clf()
     plt.plot(np.arange(min_num_topics, max_num_topics + 1, step), greene_stability)
     plt.title("Greene et al. metric")
     plt.xlabel("number of topics")
     plt.ylabel("stability")
     plt.savefig("output/greene.png")
     save_topic_number_metrics_data(
         "output/greene.tsv", range_=(min_num_topics, max_num_topics), data=greene_stability, metric_type="greene"
     )
Esempio n. 3
0
 def plot_brunet_metric(self, min_num_topics=10, max_num_topics=50, iterations=10):
     cophenetic_correlation = self.topic_model.brunet_metric(min_num_topics, max_num_topics, iterations)
     plt.clf()
     plt.plot(range(min_num_topics, max_num_topics + 1), cophenetic_correlation)
     plt.title("Brunet et al. metric")
     plt.xlabel("number of topics")
     plt.ylabel("cophenetic correlation coefficient")
     plt.savefig("output/brunet.png")
     save_topic_number_metrics_data(
         "output/brunet.tsv",
         range_=(min_num_topics, max_num_topics),
         data=cophenetic_correlation,
         metric_type="brunet",
     )
Esempio n. 4
0
 def plot_brunet_metric(self,
                        min_num_topics=10,
                        max_num_topics=50,
                        iterations=10):
     cophenetic_correlation = self.topic_model.brunet_metric(
         min_num_topics, max_num_topics, iterations)
     plt.clf()
     plt.plot(range(min_num_topics, max_num_topics + 1),
              cophenetic_correlation)
     plt.title('Brunet et al. metric')
     plt.xlabel('number of topics')
     plt.ylabel('cophenetic correlation coefficient')
     plt.savefig('output/brunet.png')
     save_topic_number_metrics_data('output/brunet.tsv',
                                    range_=(min_num_topics, max_num_topics),
                                    data=cophenetic_correlation,
                                    metric_type='brunet')
Esempio n. 5
0
 def plot_arun_metric(self,
                      min_num_topics=10,
                      max_num_topics=50,
                      iterations=10):
     symmetric_kl_divergence = self.topic_model.arun_metric(
         min_num_topics, max_num_topics, iterations)
     plt.clf()
     plt.plot(range(min_num_topics, max_num_topics + 1),
              symmetric_kl_divergence)
     plt.title('Arun et al. metric')
     plt.xlabel('number of topics')
     plt.ylabel('symmetric KL divergence')
     plt.savefig('output/arun.png')
     save_topic_number_metrics_data('output/arun.tsv',
                                    range_=(min_num_topics, max_num_topics),
                                    data=symmetric_kl_divergence,
                                    metric_type='arun')
Esempio n. 6
0
 def plot_greene_metric(self,
                        min_num_topics=10,
                        max_num_topics=20,
                        tao=10,
                        step=5,
                        top_n_words=10):
     greene_stability = self.topic_model.greene_metric(
         min_num_topics=min_num_topics,
         max_num_topics=max_num_topics,
         step=step,
         top_n_words=top_n_words,
         tao=tao)
     plt.clf()
     plt.plot(np.arange(min_num_topics, max_num_topics + 1, step),
              greene_stability)
     plt.title('Greene et al. metric')
     plt.xlabel('number of topics')
     plt.ylabel('stability')
     plt.savefig('output/greene.png')
     save_topic_number_metrics_data('output/greene.tsv',
                                    range_=(min_num_topics, max_num_topics),
                                    data=greene_stability,
                                    metric_type='greene')