def main(): clustered_corpus_path = 'clustered_corpus' clustered_corpus = read_clustered_corpus(clustered_corpus_path) corpus = merge_clustered_corpus_into_a_single_corpus(clustered_corpus) target_file_path = 'target.txt' text = read_text_file(target_file_path) document = Document(text) corpus = Corpus(corpus) clustered_corpus = ClusteredCorpus(clustered_corpus) candidate_to_rank_mapping = {} candidate_to_params_mapping = {} candidate_to_dfs_in_each_cluster_mapping = {} for candidate in document.get_candidates(): tf = math.log(1.0 + document.get_tf_for(candidate), 10.0) # tf = document.get_tf_for(candidate) idf = math.log(1.0 + 1.0 / corpus.get_df_for(candidate), 2.0) cu = clustered_corpus.get_cu_for(candidate) rank = cu # rank = tf * cu # rank = tf * idf dfs_in_each_cluster = clustered_corpus.get_dfs_in_each_cluster_for(candidate) candidate_representative = corpus.get_representative_for(candidate) candidate_to_rank_mapping[candidate_representative] = rank candidate_to_params_mapping[candidate_representative] = (tf, idf, cu) candidate_to_dfs_in_each_cluster_mapping[candidate_representative] = dfs_in_each_cluster table = generate_table_based_on( candidate_to_rank_mapping, candidate_to_params_mapping, candidate_to_dfs_in_each_cluster_mapping ) save_as_file(table) print('Done.')