Example #1
0
    def evaluate(self, file_name, rank_list, base_dir, relevant_data):
        fa = FileAccess()
        scores = fa.read_score_file(file_name)
        pr_results = []
        ap_results = []
        mrr = []
        p_at_k = {}
        for each in rank_list:
            p_at_k[each] = []
        for each in scores:
            ap = 0
            if each in relevant_data:
                relevant_files = relevant_data[each]
            else:
                continue
            data = scores[each]
            total_retrieved = 1
            total_relevant_retrieved = 0
            for eachdata in data:
                qid = each
                rank = data.index(eachdata) + 1
                docid = eachdata[0]
                doc_score = eachdata[1]
                if docid in relevant_files:
                    if total_relevant_retrieved == 0:
                        mrr.append(1.0/rank)
                    total_relevant_retrieved += 1
                relevance = 1 if docid in relevant_files else 0
                precision = float(total_relevant_retrieved)/total_retrieved
                if rank in rank_list:
                    tup = (qid, precision)
                    p_at_k[rank].append(tup)
                if relevance:
                    ap += precision
                recall = float(total_relevant_retrieved)/len(relevant_files)
                total_retrieved += 1
                tup = (qid, rank, docid, doc_score, str(relevance), precision, recall)
                pr_results.append(tup)
            if total_relevant_retrieved != 0:
                avg_p = float(ap)/total_relevant_retrieved
            else:
                avg_p = 0
            ap_results.append(avg_p)

        mean_avg_pr = sum(ap_results)/len(ap_results)
        mean_rr = sum(mrr)/len(mrr)

        phase2_evaluation = os.path.join(base_dir, 'evaluation_phase2')

        if not os.path.exists(phase2_evaluation):
            os.makedirs(phase2_evaluation, 0755)

        pre_file = file_name.split('.')[0]
        for each in p_at_k:
            pk_file_name = pre_file + '_p@k'+str(each)+'.txt'
            pk_file = open(os.path.join(phase2_evaluation, pk_file_name), 'w')
            for e in p_at_k[each]:
                pk_file.write('{} {}\n'.format(e[0], e[1]))
            pk_file.close()

        mrr_filename = pre_file + '_mrr.txt'
        pr_filename = pre_file + '_precision_recall.txt'
        map_filename = pre_file + '_map_results.txt'

        mrr_file = open(os.path.join(phase2_evaluation, mrr_filename), 'w')
        mrr_file.write(str(mean_rr))
        mrr_file.close()

        map_file = open(os.path.join(phase2_evaluation, map_filename), 'w')
        map_file.write(str(mean_avg_pr))
        map_file.close()

        pr_file = open(os.path.join(phase2_evaluation, pr_filename), 'w')
        for e in pr_results:
            pr_file.write("{} {} {} {} {} {} {}\n".format(e[0], e[1], e[2], e[3], e[4], round(e[5], 3), round(e[6], 3)))
        pr_file.close()

        return