def check_candidate_equality(dir_path1, dir_path2, cutoff_list, lambda_list):
    dir1_dict = dict()
    dir2_dict = dict()
    fileset1 = set(listdir(dir_path1))
    fileset2 = set(listdir(dir_path2))
    for file_name in fileset1:
        file_path = "%s/%s" % (dir_path1, file_name)
        dir1_dict[file_name] = read_candidate(file_path, cutoff_list[0], cutoff_list[-1])
    for file_name in fileset2:
        file_path = "%s/%s" % (dir_path2, file_name)
        dir2_dict[file_name] = read_candidate(file_path, cutoff_list[0], cutoff_list[-1])
    diff = fileset1 - fileset2
    intersection = fileset1.intersection(fileset2)
    if diff:
        print "These files do not exist in %s" % (dir_path2)
        print diff
    else:
        for file_name in intersection:
            for cutoff in cutoff_list:
                for lamb in lambda_list:
                    list_check_equality(
                        file_name, dir1_dict[file_name][cutoff][lamb], dir2_dict[file_name][cutoff][lamb]
                    )
Ejemplo n.º 2
0
 def run_evaluation(self, ref):
     gold_standard = ref 
     print ref.keys()
     eval_score = self.init_eval_score()
     for k1 in self.k1_list:
         for b in self.b_list:
             candidate_dir_path = '../graph_old/%s_%s' % (str(k1),str(b))
             file_set = set(listdir(candidate_dir_path))
             for file_name in file_set:
                 candidates_dict = read_candidate(candidate_dir_path + '/' + file_name, self.cutoff_list[0], self.cutoff_list[-1])
                 file_name = '.'.join(re.split('\.', file_name)[:-1]) 
                 if file_name not in gold_standard.keys():
                     print 'ref file<%s> does not exist in the given gold_standard data' % (file_name)
                     self.excluded_files.add(file_name)
                     continue
                 gs_list = gold_standard[file_name]
                 for cutoff in self.cutoff_list:
                     for lamb in self.lamb_list[1:]:
                         if not candidates_dict[cutoff].get(lamb, 0): continue
                         candidates = candidates_dict[cutoff][lamb] 
                         eval_score[cutoff][lamb][k1][b][file_name] = (
                         evaluate(self.stem_list(candidates), self.stem_list(gs_list)))
     return eval_score