def _load_phi(self, num_topics: int, subsample_number: int) -> pd.DataFrame: topic_model = TopicModel.load( self._folder_path_model( num_topics, subsample_number=subsample_number ) ) return topic_model.get_phi()
def test_optimize_for_model(self, keep_in_memory, model_family): # Thetaless currently fails # see https://github.com/machine-intelligence-laboratory/TopicNet/issues/79 artm_score_name = 'perplexity_score' artm_score = PerplexityScore( name=artm_score_name, class_ids=[self.main_modality, self.other_modality]) custom_score_name = 'diversity_score' custom_score = DiversityScore(custom_score_name, class_ids=self.main_modality) self.text_collection._set_dataset_kwargs(keep_in_memory=keep_in_memory) min_num_topics = 1 max_num_topics = 2 num_topics_interval = 1 num_fit_iterations = 3 num_search_points = len( list(range(min_num_topics, max_num_topics + 1, num_topics_interval))) num_restarts = 3 experiment_name = model_family.value experiment_folder = self.working_folder_path optimizer = OptimizeScoresMethod( scores=[artm_score, custom_score], model_family=model_family, min_num_topics=min_num_topics, max_num_topics=max_num_topics, num_topics_interval=num_topics_interval, num_fit_iterations=num_fit_iterations, num_restarts=num_restarts, one_model_num_processors=1, separate_thread=False, experiment_name=experiment_name, experiment_directory=experiment_folder, ) optimizer.search_for_optimum(text_collection=self.text_collection) restart_folder_names = os.listdir(experiment_folder) assert len(restart_folder_names) == num_restarts for restart_folder_name in restart_folder_names: assert restart_folder_name.startswith(experiment_name) restart_folder_path = os.path.join(experiment_folder, restart_folder_name) model_folder_names = os.listdir(restart_folder_path) assert len(model_folder_names) == num_search_points for model_folder_name in model_folder_names: topic_model = TopicModel.load( os.path.join(restart_folder_path, model_folder_name)) assert artm_score_name in topic_model.scores assert custom_score_name in topic_model.scores assert len( topic_model.scores[artm_score_name]) == num_fit_iterations assert len(topic_model.scores[custom_score_name]) == 1 assert all( isinstance(v, Number) for v in topic_model.scores[artm_score_name]) assert all( isinstance(v, Number) for v in topic_model.scores[custom_score_name])