def __init__(self, text_dict, data_set_name, relscore_type, tf_ver, idf_ver, k1, b):
     self.data_set_name = data_set_name
     # generate relscore by Relscore
     Relscore(text_dict, data_set_name, tf_ver, idf_ver, k1, k1, b, b)
     # generate relscore by make_relscore
     make_relscore(k1, b, self.change_to_num(relscore_type))
     # open relScore and rel_score/..
     relscore_dir_name = "%s_%s_%s" % (relscore_type, str(tf_ver), str(idf_ver))
     if relscore_type == "bm25":
         relscore_dir_name += "_%s_%s" % (str(k1), str(b))
     for file_name in text_dict.keys():
         old_relscore = read_relscore("../relScore", file_name)
         refactor_relscore = read_relscore("../rel_score/%s/%s" % (data_set_name, relscore_dir_name), file_name)
         # compare relscore
         # check relscore.keys()s are equal and relscore.values()'s equality
         self.check_equality(file_name, old_relscore, refactor_relscore)
 def run_evaluation(self, ref, crank_flag, single_rank_flag, expand_rank_flag):
     file_set = set(self.text_dict.keys())
     ref_file_list = ref.keys()
     relscore = dict()
     eval_score = self.init_eval_score()
     for file_name in deepcopy(file_set):
         if file_name in ref_file_list: 
             relscore[file_name] = read_relscore(self.relscore_path, file_name)
         else:
             print 'ref file<%s> does not exist in the given gold_standard data' % (file_name)
             file_set.remove(file_name)
             self.excluded_files.add(file_name)
     if expand_rank_flag:
         graph_dict = dict()
         for winsize in self.winsize_list:
             graph_dict[winsize] = dict()
             for file_name in file_set: 
                 graph_dict[winsize][file_name] = Graph(file_name,
                                                        self.text_dict[file_name], 
                                                        relscore[file_name], 
                                                        winsize, 
                                                        self.lamb_list, 
                                                        self.cutoff_list, 
                                                        return_graph_flag=True, 
                                                        crank_flag=crank_flag, 
                                                        single_rank_flag=single_rank_flag, 
                                                        input_graph=None).graph
             self.update_neighbor_weight(graph_dict[winsize])
     for winsize in self.winsize_list:
         new_gs = dict()
         for file_name in file_set:
             input_graph = None
             if expand_rank_flag:
                 input_graph = graph_dict[winsize][file_name]
             graph = Graph(file_name,
                           self.text_dict[file_name], 
                           relscore[file_name], 
                           winsize, 
                           self.lamb_list, 
                           self.cutoff_list, 
                           return_graph_flag=False, 
                           crank_flag=crank_flag, 
                           single_rank_flag=single_rank_flag, 
                           input_graph=input_graph,
                           lda_dict=self.lda_dict)
             checked_gs_list = self.check_gold_standard(graph, ref[file_name])
             new_gs[file_name] = checked_gs_list
             for cutoff in self.cutoff_list:
                 for lamb in self.lamb_list:
                     candidates = graph.score_candidates(cutoff, lamb)
                     file_path = '../checked_inspec.ref'
                     save_candidates(file_path, winsize, cutoff, lamb, candidates);
                     eval_score[winsize][cutoff][lamb][file_name] = (
                         evaluate(self.stem_list(candidates), self.stem_list(checked_gs_list)))
     write_gs('./checked_inspec.ref', new_gs)
     return eval_score
        if len(relscore) > 4:
            k1 = relscore[3]
            b = relscore[4]
        ft1 = RelscoreFunctionalTest(
            text_dict, data_set_name, relscore[0], int(relscore[1]), int(relscore[2]), float(k1), float(b)
        )
    # check equality of two methods of getting keyphrase_candidate
    for relscore_type in settings["relscore"]:
        for winsize in settings["winsize"]:
            print "relscore_type : %s, winsize : %s" % (relscore_type, winsize)
            old_eval_score = dict()
            refactor_eval_score = dict()
            old_gs = old_read_gs("/Users/KimKR/Desktop/NEXT_LAB/keyword/gold_standard/%s.ref" % (data_set_name))
            refactor_gs = read_gs("/Users/KimKR/Desktop/NEXT_LAB/keyword/gold_standard/%s.ref" % (data_set_name))
            for file_name in text_dict.keys():
                sentences = text_dict[file_name]
                old_relscore = read_relscore(
                    ("/Users/KimKR/Desktop/NEXT_LAB/keyword/rel_score/%s/%s" % (settings["dataset"], relscore_type)),
                    file_name,
                )
                refact_relscore = read_relscore("/Users/KimKR/Desktop/NEXT_LAB/keyword/relScore", file_name)
                # check equality of keyphrase_candidates
                ft = GraphFunctionalTest(file_name, sentences, old_relscore, refact_relscore, int(winsize))
                #                old_gs[file_name] = old_check_gold_standard(sentences, old_gs[file_name])
                #                refactor_gs[file_name] = check_gold_standard(sentences, refactor_gs[file_name])
                old_eval_score[file_name] = evaluate(ft.kokako_graph.score_candidates(), old_gs[file_name])
                refactor_eval_score[file_name] = evaluate(
                    ft.refactored_graph.score_candidates(1, ft.lamb), refactor_gs[file_name]
                )
                dict_check_equality(file_name, old_eval_score[file_name], refactor_eval_score[file_name])