def check_candidate_equality(dir_path1, dir_path2, cutoff_list, lambda_list): dir1_dict = dict() dir2_dict = dict() fileset1 = set(listdir(dir_path1)) fileset2 = set(listdir(dir_path2)) for file_name in fileset1: file_path = "%s/%s" % (dir_path1, file_name) dir1_dict[file_name] = read_candidate(file_path, cutoff_list[0], cutoff_list[-1]) for file_name in fileset2: file_path = "%s/%s" % (dir_path2, file_name) dir2_dict[file_name] = read_candidate(file_path, cutoff_list[0], cutoff_list[-1]) diff = fileset1 - fileset2 intersection = fileset1.intersection(fileset2) if diff: print "These files do not exist in %s" % (dir_path2) print diff else: for file_name in intersection: for cutoff in cutoff_list: for lamb in lambda_list: list_check_equality( file_name, dir1_dict[file_name][cutoff][lamb], dir2_dict[file_name][cutoff][lamb] )
def run_evaluation(self, ref): gold_standard = ref print ref.keys() eval_score = self.init_eval_score() for k1 in self.k1_list: for b in self.b_list: candidate_dir_path = '../graph_old/%s_%s' % (str(k1),str(b)) file_set = set(listdir(candidate_dir_path)) for file_name in file_set: candidates_dict = read_candidate(candidate_dir_path + '/' + file_name, self.cutoff_list[0], self.cutoff_list[-1]) file_name = '.'.join(re.split('\.', file_name)[:-1]) if file_name not in gold_standard.keys(): print 'ref file<%s> does not exist in the given gold_standard data' % (file_name) self.excluded_files.add(file_name) continue gs_list = gold_standard[file_name] for cutoff in self.cutoff_list: for lamb in self.lamb_list[1:]: if not candidates_dict[cutoff].get(lamb, 0): continue candidates = candidates_dict[cutoff][lamb] eval_score[cutoff][lamb][k1][b][file_name] = ( evaluate(self.stem_list(candidates), self.stem_list(gs_list))) return eval_score