Python compute_metrics Examples

Programming Language: Python

Namespace/Package Name: utils.msmarco_eval

Method/Function: compute_metrics

Examples at hotexamples.com: 3

Python compute_metrics - 3 examples found. These are the top rated real world Python examples of utils.msmarco_eval.compute_metrics extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def compute_mrr(D, I, qids, ref_dict):
    knn_pkl = {"D": D, "I": I}
    all_knn_list = all_gather(knn_pkl)
    mrr = 0.0
    if is_first_worker():
        D_merged = concat_key(all_knn_list, "D", axis=1)
        I_merged = concat_key(all_knn_list, "I", axis=1)
        print(D_merged.shape, I_merged.shape)
        # we pad with negative pids and distance -128 - if they make it to the top we have a problem
        idx = np.argsort(D_merged, axis=1)[:, ::-1][:, :10]
        sorted_I = np.take_along_axis(I_merged, idx, axis=1)
        candidate_dict = {}
        for i, qid in enumerate(qids):
            seen_pids = set()
            if qid not in candidate_dict:
                candidate_dict[qid] = [0] * 1000
            j = 0
            for pid in sorted_I[i]:
                if pid >= 0 and pid not in seen_pids:
                    candidate_dict[qid][j] = pid
                    j += 1
                    seen_pids.add(pid)

        allowed, message = quality_checks_qids(ref_dict, candidate_dict)
        if message != '':
            print(message)

        mrr_metrics = compute_metrics(ref_dict, candidate_dict)
        mrr = mrr_metrics["MRR @10"]
        print(mrr)
    return mrr

Example #2

Show file

def compute_mrr_last(D, I, qids, ref_dict, dev_query_positive_id):
    knn_pkl = {"D": D, "I": I}
    all_knn_list = all_gather(knn_pkl)
    mrr = 0.0
    final_recall = 0.0
    if is_first_worker():
        prediction = {}
        D_merged = concat_key(all_knn_list, "D", axis=1)
        I_merged = concat_key(all_knn_list, "I", axis=1)
        print(D_merged.shape, I_merged.shape)
        # we pad with negative pids and distance -128 - if they make it to the top we have a problem
        idx = np.argsort(D_merged, axis=1)[:, ::-1][:, :1000]
        sorted_I = np.take_along_axis(I_merged, idx, axis=1)
        candidate_dict = {}
        for i, qid in enumerate(qids):
            seen_pids = set()
            if qid not in candidate_dict:
                prediction[qid] = {}
                candidate_dict[qid] = [0] * 1000
            j = 0
            for pid in sorted_I[i]:
                if pid >= 0 and pid not in seen_pids:
                    candidate_dict[qid][j] = pid
                    prediction[qid][pid] = -(j + 1)  #-rank
                    j += 1
                    seen_pids.add(pid)

        # allowed, message = quality_checks_qids(ref_dict, candidate_dict)
        # if message != '':
        #     print(message)

        # mrr_metrics = compute_metrics(ref_dict, candidate_dict)
        # mrr = mrr_metrics["MRR @10"]
        # print(mrr)
        allowed, message = quality_checks_qids(ref_dict, candidate_dict)
        if message != '':
            print(message)

        mrr_metrics = compute_metrics(ref_dict, candidate_dict)
        mrr = mrr_metrics["MRR @10"]
        print(mrr)

        evaluator = pytrec_eval.RelevanceEvaluator(
            convert_to_string_id(dev_query_positive_id), {'recall'})

        eval_query_cnt = 0
        recall = 0
        topN = 1000
        result = evaluator.evaluate(convert_to_string_id(prediction))
        for k in result.keys():
            eval_query_cnt += 1
            recall += result[k]["recall_" + str(topN)]

        final_recall = recall / eval_query_cnt
        print('final_recall: ', final_recall)

    return mrr, final_recall

Example #3

Show file

def EvalDevQuery(query_embedding2id, passage_embedding2id, dev_query_positive_id, I_nearest_neighbor, topN, \
                 rerankTopN=-1, dev_query_embedding=None, passage_embedding=None, lambda_test=None, wt_emb=1.0, f=1):

    if (rerankTopN > 0):
        I_nearest_neighbor, residual_scores_all = rerankWithResidualLearningScore(I_nearest_neighbor, \
                                    rerankTopN, query_embedding2id, passage_embedding2id, dev_query_embedding, \
                                  passage_embedding, lambda_test, wt_emb, fold=f)

    prediction = {
    }  #[qid][docid] = docscore, here we use -rank as score, so the higher the rank (1 > 2), the higher the score (-1 > -2)

    total = 0
    labeled = 0
    Atotal = 0
    Alabeled = 0
    qids_to_ranked_candidate_passages = {}
    for query_idx in range(len(I_nearest_neighbor)):
        seen_pid = set()
        query_id = query_embedding2id[query_idx]
        prediction[query_id] = {}

        top_ann_pid = I_nearest_neighbor[query_idx].copy()
        selected_ann_idx = top_ann_pid[:topN]
        rank = 0

        if query_id in qids_to_ranked_candidate_passages:
            pass
        else:
            # By default, all PIDs in the list of 1000 are 0. Only override those that are given
            tmp = [0] * 1000
            qids_to_ranked_candidate_passages[query_id] = tmp

        for idx in selected_ann_idx:
            pred_pid = passage_embedding2id[idx]

            if not pred_pid in seen_pid:
                # this check handles multiple vector per document
                qids_to_ranked_candidate_passages[query_id][rank] = pred_pid
                Atotal += 1
                if pred_pid not in dev_query_positive_id[query_id]:
                    Alabeled += 1
                if rank < 10:
                    total += 1
                    if pred_pid not in dev_query_positive_id[query_id]:
                        labeled += 1
                rank += 1
                prediction[query_id][pred_pid] = -rank
                seen_pid.add(pred_pid)

    # use out of the box evaluation script
    evaluator = pytrec_eval.RelevanceEvaluator(
        convert_to_string_id(dev_query_positive_id),
        {'map_cut', 'ndcg_cut', 'recip_rank', 'recall'})

    eval_query_cnt = 0
    result = evaluator.evaluate(convert_to_string_id(prediction))

    qids_to_relevant_passageids = {}
    for qid in dev_query_positive_id:
        qid = int(qid)
        if qid in qids_to_relevant_passageids:
            pass
        else:
            qids_to_relevant_passageids[qid] = []
            for pid in dev_query_positive_id[qid]:
                if pid > 0:
                    qids_to_relevant_passageids[qid].append(pid)

    ms_mrr = compute_metrics(qids_to_relevant_passageids,
                             qids_to_ranked_candidate_passages)

    ndcg = 0
    Map = 0
    mrr = 0
    recall = 0
    recall_1000 = 0
    ndcgs = []
    mrrs = []
    recalls = []

    for k in result.keys():
        eval_query_cnt += 1
        ndcg += result[k]["ndcg_cut_10"]
        ndcgs.append(result[k]["ndcg_cut_10"])
        Map += result[k]["map_cut_10"]
        mrr += result[k]["recip_rank"]
        mrrs.append(result[k]["recip_rank"])
        recall += result[k]["recall_" + str(topN)]
        recalls.append(result[k]["recall_" + str(topN)])

    final_ndcg = ndcg / eval_query_cnt
    final_Map = Map / eval_query_cnt
    final_mrr = mrr / eval_query_cnt
    final_recall = recall / eval_query_cnt
    hole_rate = labeled / total
    Ahole_rate = Alabeled / Atotal

    return final_ndcg, ndcgs, mrrs, recalls, eval_query_cnt, final_Map, final_mrr, final_recall, hole_rate, ms_mrr, Ahole_rate, result, prediction