コード例 #1
0
def main():
    if not (os.path.isdir(indexes[0]) and os.path.isdir(indexes[1])
            and os.path.isdir(indexes[2])):
        print('Required indexes do not exist. Please download first.')

    cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round4-cumulative.txt'

    verify_stored_runs(stored_runs)
    perform_runs(5, indexes)
    perform_fusion(5, cumulative_runs, check_md5=True)
    prepare_final_submissions(5, final_runs, check_md5=True)

    evaluate_runs(cumulative_qrels, cumulative_runs, check_md5=True)
コード例 #2
0
def main():
    if not (os.path.isdir(indexes[0]) and os.path.isdir(indexes[1])
            and os.path.isdir(indexes[2])):
        print('Required indexes do not exist. Please download first.')

    os.system(
        'cat src/main/resources/topics-and-qrels/qrels.covid-round1.txt ' +
        'src/main/resources/topics-and-qrels/qrels.covid-round2.txt ' +
        '> src/main/resources/topics-and-qrels/qrels.covid-round2-cumulative.txt'
    )

    round2_cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round2-cumulative.txt'
    round3_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round3.txt'
    round3_cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round3-cumulative.txt'

    verify_stored_runs(stored_runs)
    perform_runs(3, indexes)
    perform_fusion(3, cumulative_runs, check_md5=True)
    prepare_final_submissions(3, final_runs, check_md5=True)

    evaluate_runs(round2_cumulative_qrels, cumulative_runs, check_md5=True)
    evaluate_runs(round3_cumulative_qrels, cumulative_runs, check_md5=True)
    evaluate_runs(round3_qrels, final_runs, check_md5=True)
コード例 #3
0
def main():
    if not (os.path.isdir(indexes[0]) and os.path.isdir(indexes[1])
            and os.path.isdir(indexes[2])):
        print('Required indexes do not exist. Please download first.')

    round4_cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round4-cumulative.txt'
    complete_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-complete.txt'
    round5_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round5.txt'

    # MD5 checksums don't match anymore, see https://github.com/castorini/anserini/issues/1669
    check_md5_flag = False

    verify_stored_runs(stored_runs)
    perform_runs(5, indexes)
    perform_fusion(5, cumulative_runs, check_md5=check_md5_flag)
    prepare_final_submissions(5, final_runs, check_md5=check_md5_flag)

    expected_metrics = {
        'anserini.covid-r5.abstract.qq.bm25.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.4580,
            'judged_cut_10': 0.5880,
            'ndcg_cut_20': 0.4379,
            'judged_cut_20': 0.5940,
            'map': 0.1903,
            'recall_1000': 0.4525,
            'judged_cut_1000': 0.2264
        },
        'anserini.covid-r5.abstract.qdel.bm25.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.4912,
            'judged_cut_10': 0.6240,
            'ndcg_cut_20': 0.4596,
            'judged_cut_20': 0.6040,
            'map': 0.2042,
            'recall_1000': 0.4714,
            'judged_cut_1000': 0.2351
        },
        'anserini.covid-r5.full-text.qq.bm25.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.3240,
            'judged_cut_10': 0.5660,
            'ndcg_cut_20': 0.3055,
            'judged_cut_20': 0.5250,
            'map': 0.1324,
            'recall_1000': 0.3758,
            'judged_cut_1000': 0.2171
        },
        'anserini.covid-r5.full-text.qdel.bm25.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.4634,
            'judged_cut_10': 0.6460,
            'ndcg_cut_20': 0.4387,
            'judged_cut_20': 0.6280,
            'map': 0.1793,
            'recall_1000': 0.4368,
            'judged_cut_1000': 0.2425
        },
        'anserini.covid-r5.paragraph.qq.bm25.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.4077,
            'judged_cut_10': 0.6160,
            'ndcg_cut_20': 0.3907,
            'judged_cut_20': 0.5920,
            'map': 0.1981,
            'recall_1000': 0.4877,
            'judged_cut_1000': 0.2661
        },
        'anserini.covid-r5.paragraph.qdel.bm25.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.4918,
            'judged_cut_10': 0.6440,
            'ndcg_cut_20': 0.4569,
            'judged_cut_20': 0.6250,
            'map': 0.2163,
            'recall_1000': 0.5101,
            'judged_cut_1000': 0.2710
        },
        'anserini.covid-r5.fusion1.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.4696,
            'judged_cut_10': 0.6520,
            'ndcg_cut_20': 0.4539,
            'judged_cut_20': 0.6490,
            'map': 0.2044,
            'recall_1000': 0.5027,
            'judged_cut_1000': 0.2751
        },
        'anserini.covid-r5.fusion2.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.5077,
            'judged_cut_10': 0.6800,
            'ndcg_cut_20': 0.4956,
            'judged_cut_20': 0.6690,
            'map': 0.2304,
            'recall_1000': 0.5378,
            'judged_cut_1000': 0.2851
        },
        'anserini.covid-r5.abstract.qdel.bm25+rm3Rf.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.6177,
            'judged_cut_10': 0.6620,
            'ndcg_cut_20': 0.5738,
            'judged_cut_20': 0.6510,
            'map': 0.2657,
            'recall_1000': 0.5505,
            'judged_cut_1000': 0.2562
        },
    }
    evaluate_runs(round4_cumulative_qrels,
                  cumulative_runs,
                  expected=expected_metrics,
                  check_md5=check_md5_flag)

    expected_metrics = {
        'anserini.covid-r5.abstract.qq.bm25.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.6925,
            'judged_cut_10': 0.9740,
            'ndcg_cut_20': 0.6586,
            'judged_cut_20': 0.9700,
            'map': 0.3010,
            'recall_1000': 0.4636,
            'judged_cut_1000': 0.4159
        },
        'anserini.covid-r5.abstract.qdel.bm25.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.7301,
            'judged_cut_10': 0.9980,
            'ndcg_cut_20': 0.6979,
            'judged_cut_20': 0.9900,
            'map': 0.3230,
            'recall_1000': 0.4839,
            'judged_cut_1000': 0.4286
        },
        'anserini.covid-r5.full-text.qq.bm25.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.4709,
            'judged_cut_10': 0.8920,
            'ndcg_cut_20': 0.4382,
            'judged_cut_20': 0.8370,
            'map': 0.1777,
            'recall_1000': 0.3427,
            'judged_cut_1000': 0.3397
        },
        'anserini.covid-r5.full-text.qdel.bm25.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.6286,
            'judged_cut_10': 0.9840,
            'ndcg_cut_20': 0.5973,
            'judged_cut_20': 0.9630,
            'map': 0.2391,
            'recall_1000': 0.4087,
            'judged_cut_1000': 0.3875
        },
        'anserini.covid-r5.paragraph.qq.bm25.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.5832,
            'judged_cut_10': 0.9600,
            'ndcg_cut_20': 0.5659,
            'judged_cut_20': 0.9390,
            'map': 0.2808,
            'recall_1000': 0.4695,
            'judged_cut_1000': 0.4412
        },
        'anserini.covid-r5.paragraph.qdel.bm25.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.6764,
            'judged_cut_10': 0.9840,
            'ndcg_cut_20': 0.6368,
            'judged_cut_20': 0.9740,
            'map': 0.3089,
            'recall_1000': 0.4949,
            'judged_cut_1000': 0.4542
        },
        'anserini.covid-r5.fusion1.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.6469,
            'judged_cut_10': 0.9860,
            'ndcg_cut_20': 0.6184,
            'judged_cut_20': 0.9800,
            'map': 0.2952,
            'recall_1000': 0.4967,
            'judged_cut_1000': 0.4675
        },
        'anserini.covid-r5.fusion2.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.6972,
            'judged_cut_10': 1.0000,
            'ndcg_cut_20': 0.6785,
            'judged_cut_20': 1.000,
            'map': 0.3329,
            'recall_1000': 0.5313,
            'judged_cut_1000': 0.4869
        },
        'anserini.covid-r5.abstract.qdel.bm25+rm3Rf.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.8395,
            'judged_cut_10': 1.0000,
            'ndcg_cut_20': 0.7955,
            'judged_cut_20': 0.9990,
            'map': 0.3911,
            'recall_1000': 0.5536,
            'judged_cut_1000': 0.4607
        },
    }
    evaluate_runs(complete_qrels,
                  cumulative_runs,
                  expected=expected_metrics,
                  check_md5=check_md5_flag)

    expected_metrics = {
        'anserini.final-r5.fusion1.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.5668,
            'judged_cut_10': 0.9140,
            'ndcg_cut_20': 0.5244,
            'judged_cut_20': 0.8490,
            'map': 0.2302,
            'recall_1000': 0.5615,
            'judged_cut_1000': 0.2148
        },
        'anserini.final-r5.fusion1.post-processed.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.5726,
            'judged_cut_10': 0.9240,
            'ndcg_cut_20': 0.5313,
            'judged_cut_20': 0.8570,
            'map': 0.2314,
            'recall_1000': 0.5615,
            'judged_cut_1000': 0.2151
        },
        'anserini.final-r5.fusion2.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.6366,
            'judged_cut_10': 0.9640,
            'ndcg_cut_20': 0.5941,
            'judged_cut_20': 0.9080,
            'map': 0.2716,
            'recall_1000': 0.6012,
            'judged_cut_1000': 0.2263
        },
        'anserini.final-r5.fusion2.post-processed.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.6474,
            'judged_cut_10': 0.9780,
            'ndcg_cut_20': 0.6007,
            'judged_cut_20': 0.9150,
            'map': 0.2734,
            'recall_1000': 0.6012,
            'judged_cut_1000': 0.2267
        },
        'anserini.final-r5.rf.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.7777,
            'judged_cut_10': 0.9680,
            'ndcg_cut_20': 0.7193,
            'judged_cut_20': 0.9270,
            'map': 0.3235,
            'recall_1000': 0.6378,
            'judged_cut_1000': 0.2197
        },
        'anserini.final-r5.rf.post-processed.txt': {
            'topics': 50,
            'ndcg_cut_10': 0.7944,
            'judged_cut_10': 0.9860,
            'ndcg_cut_20': 0.7346,
            'judged_cut_20': 0.9470,
            'map': 0.3280,
            'recall_1000': 0.6378,
            'judged_cut_1000': 0.2201
        },
    }
    evaluate_runs(round5_qrels,
                  final_runs,
                  expected=expected_metrics,
                  check_md5=check_md5_flag)
コード例 #4
0
def main():
    if not (os.path.isdir(indexes[0]) and os.path.isdir(indexes[1])
            and os.path.isdir(indexes[2])):
        print('Required indexes do not exist. Please download first.')

    os.system(
        'cat src/main/resources/topics-and-qrels/qrels.covid-round1.txt ' +
        'src/main/resources/topics-and-qrels/qrels.covid-round2.txt ' +
        '> src/main/resources/topics-and-qrels/qrels.covid-round2-cumulative.txt'
    )

    round2_cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round2-cumulative.txt'
    round3_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round3.txt'
    round3_cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round3-cumulative.txt'

    # MD5 checksums don't match anymore, see https://github.com/castorini/anserini/issues/1669
    check_md5_flag = False

    verify_stored_runs(stored_runs)
    perform_runs(3, indexes)
    perform_fusion(3, cumulative_runs, check_md5=check_md5_flag)
    prepare_final_submissions(3, final_runs, check_md5=check_md5_flag)

    expected_metrics = {
        'anserini.covid-r3.abstract.qq.bm25.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.2118,
            'judged_cut_10': 0.3300,
            'ndcg_cut_20': 0.2043,
            'judged_cut_20': 0.3150,
            'map': 0.0951,
            'recall_1000': 0.4398,
            'judged_cut_1000': 0.1275
        },
        'anserini.covid-r3.abstract.qdel.bm25.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.2470,
            'judged_cut_10': 0.3375,
            'ndcg_cut_20': 0.2256,
            'judged_cut_20': 0.3175,
            'map': 0.1023,
            'recall_1000': 0.4537,
            'judged_cut_1000': 0.1248
        },
        'anserini.covid-r3.full-text.qq.bm25.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.2337,
            'judged_cut_10': 0.4650,
            'ndcg_cut_20': 0.2259,
            'judged_cut_20': 0.4425,
            'map': 0.1099,
            'recall_1000': 0.4817,
            'judged_cut_1000': 0.1490
        },
        'anserini.covid-r3.full-text.qdel.bm25.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.3430,
            'judged_cut_10': 0.5025,
            'ndcg_cut_20': 0.3077,
            'judged_cut_20': 0.4888,
            'map': 0.1426,
            'recall_1000': 0.5267,
            'judged_cut_1000': 0.1575
        },
        'anserini.covid-r3.paragraph.qq.bm25.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.2848,
            'judged_cut_10': 0.5175,
            'ndcg_cut_20': 0.2734,
            'judged_cut_20': 0.4938,
            'map': 0.1390,
            'recall_1000': 0.5527,
            'judged_cut_1000': 0.1727
        },
        'anserini.covid-r3.paragraph.qdel.bm25.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.3604,
            'judged_cut_10': 0.5050,
            'ndcg_cut_20': 0.3213,
            'judged_cut_20': 0.4875,
            'map': 0.1520,
            'recall_1000': 0.5676,
            'judged_cut_1000': 0.1672
        },
        'anserini.covid-r3.fusion1.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.3093,
            'judged_cut_10': 0.4975,
            'ndcg_cut_20': 0.2933,
            'judged_cut_20': 0.5025,
            'map': 0.1400,
            'recall_1000': 0.5566,
            'judged_cut_1000': 0.1750
        },
        'anserini.covid-r3.fusion2.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.3568,
            'judged_cut_10': 0.5250,
            'ndcg_cut_20': 0.3273,
            'judged_cut_20': 0.4925,
            'map': 0.1564,
            'recall_1000': 0.5769,
            'judged_cut_1000': 0.1715
        },
        'anserini.covid-r3.abstract.qdel.bm25+rm3Rf.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.3633,
            'judged_cut_10': 0.3800,
            'ndcg_cut_20': 0.3175,
            'judged_cut_20': 0.3600,
            'map': 0.1526,
            'recall_1000': 0.5722,
            'judged_cut_1000': 0.1398
        },
    }
    evaluate_runs(round2_cumulative_qrels,
                  cumulative_runs,
                  expected=expected_metrics,
                  check_md5=check_md5_flag)

    expected_metrics = {
        'anserini.covid-r3.abstract.qq.bm25.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.5781,
            'judged_cut_10': 0.8875,
            'ndcg_cut_20': 0.5359,
            'judged_cut_20': 0.8325,
            'map': 0.2348,
            'recall_1000': 0.5040,
            'judged_cut_1000': 0.2351
        },
        'anserini.covid-r3.abstract.qdel.bm25.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.6291,
            'judged_cut_10': 0.9300,
            'ndcg_cut_20': 0.5972,
            'judged_cut_20': 0.8925,
            'map': 0.2525,
            'recall_1000': 0.5215,
            'judged_cut_1000': 0.2370
        },
        'anserini.covid-r3.full-text.qq.bm25.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.3977,
            'judged_cut_10': 0.7500,
            'ndcg_cut_20': 0.3681,
            'judged_cut_20': 0.7213,
            'map': 0.1646,
            'recall_1000': 0.4708,
            'judged_cut_1000': 0.2471
        },
        'anserini.covid-r3.full-text.qdel.bm25.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.5790,
            'judged_cut_10': 0.9050,
            'ndcg_cut_20': 0.5234,
            'judged_cut_20': 0.8525,
            'map': 0.2236,
            'recall_1000': 0.5313,
            'judged_cut_1000': 0.2693
        },
        'anserini.covid-r3.paragraph.qq.bm25.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.5396,
            'judged_cut_10': 0.9425,
            'ndcg_cut_20': 0.5079,
            'judged_cut_20': 0.9050,
            'map': 0.2498,
            'recall_1000': 0.5766,
            'judged_cut_1000': 0.2978
        },
        'anserini.covid-r3.paragraph.qdel.bm25.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.6327,
            'judged_cut_10': 0.9600,
            'ndcg_cut_20': 0.5793,
            'judged_cut_20': 0.9162,
            'map': 0.2753,
            'recall_1000': 0.5923,
            'judged_cut_1000': 0.2956
        },
        'anserini.covid-r3.fusion1.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.5924,
            'judged_cut_10': 0.9625,
            'ndcg_cut_20': 0.5563,
            'judged_cut_20': 0.9362,
            'map': 0.2700,
            'recall_1000': 0.5956,
            'judged_cut_1000': 0.3045
        },
        'anserini.covid-r3.fusion2.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.6515,
            'judged_cut_10': 0.9875,
            'ndcg_cut_20': 0.6200,
            'judged_cut_20': 0.9675,
            'map': 0.3027,
            'recall_1000': 0.6194,
            'judged_cut_1000': 0.3076
        },
        'anserini.covid-r3.abstract.qdel.bm25+rm3Rf.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.7459,
            'judged_cut_10': 0.9875,
            'ndcg_cut_20': 0.7023,
            'judged_cut_20': 0.9637,
            'map': 0.3190,
            'recall_1000': 0.6125,
            'judged_cut_1000': 0.2600
        },
    }
    evaluate_runs(round3_cumulative_qrels,
                  cumulative_runs,
                  expected=expected_metrics,
                  check_md5=check_md5_flag)

    expected_metrics = {
        'anserini.final-r3.fusion1.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.5339,
            'judged_cut_10': 0.8400,
            'ndcg_cut_20': 0.4875,
            'judged_cut_20': 0.7637,
            'map': 0.2283,
            'recall_1000': 0.6160,
            'judged_cut_1000': 0.1370
        },
        'anserini.final-r3.fusion1.post-processed.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.5359,
            'judged_cut_10': 0.8475,
            'ndcg_cut_20': 0.4902,
            'judged_cut_20': 0.7675,
            'map': 0.2293,
            'recall_1000': 0.6160,
            'judged_cut_1000': 0.1373
        },
        'anserini.final-r3.fusion2.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.6072,
            'judged_cut_10': 0.9025,
            'ndcg_cut_20': 0.5599,
            'judged_cut_20': 0.8337,
            'map': 0.2631,
            'recall_1000': 0.6441,
            'judged_cut_1000': 0.1431
        },
        'anserini.final-r3.fusion2.post-processed.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.6100,
            'judged_cut_10': 0.9100,
            'ndcg_cut_20': 0.5617,
            'judged_cut_20': 0.8375,
            'map': 0.2641,
            'recall_1000': 0.6441,
            'judged_cut_1000': 0.1434
        },
        'anserini.final-r3.rf.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.6812,
            'judged_cut_10': 0.9600,
            'ndcg_cut_20': 0.6255,
            'judged_cut_20': 0.8450,
            'map': 0.2787,
            'recall_1000': 0.6399,
            'judged_cut_1000': 0.1246
        },
        'anserini.final-r3.rf.post-processed.txt': {
            'topics': 40,
            'ndcg_cut_10': 0.6883,
            'judged_cut_10': 0.9750,
            'ndcg_cut_20': 0.6321,
            'judged_cut_20': 0.8538,
            'map': 0.2817,
            'recall_1000': 0.6399,
            'judged_cut_1000': 0.1250
        },
    }
    evaluate_runs(round3_qrels,
                  final_runs,
                  expected=expected_metrics,
                  check_md5=check_md5_flag)
コード例 #5
0
def main():
    if not (os.path.isdir(indexes[0]) and os.path.isdir(indexes[1])
            and os.path.isdir(indexes[2])):
        print('Required indexes do not exist. Please download first.')

    round1_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round1.txt'
    round2_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round2.txt'

    # Note that this script was written after this issue was noted: https://github.com/castorini/anserini/issues/1669
    # Thus, no point in checking MD5.
    check_md5_flag = False

    perform_runs()
    perform_fusion(cumulative_runs, check_md5=check_md5_flag)
    prepare_final_submissions(final_runs, check_md5=check_md5_flag)

    expected_metrics = {
        'anserini.covid-r2.abstract.qq.bm25.txt': {
            'topics': 35,
            'ndcg_cut_10': 0.3522,
            'judged_cut_10': 0.5371,
            'ndcg_cut_20': 0.3171,
            'judged_cut_20': 0.5100,
            'map': 0.1752,
            'recall_1000': 0.6601,
            'judged_cut_1000': 0.1013
        },
        'anserini.covid-r2.abstract.qdel.bm25.txt': {
            'topics': 35,
            'ndcg_cut_10': 0.3781,
            'judged_cut_10': 0.5371,
            'ndcg_cut_20': 0.3462,
            'judged_cut_20': 0.4829,
            'map': 0.1804,
            'recall_1000': 0.6485,
            'judged_cut_1000': 0.0958
        },
        'anserini.covid-r2.full-text.qq.bm25.txt': {
            'topics': 35,
            'ndcg_cut_10': 0.2070,
            'judged_cut_10': 0.4286,
            'ndcg_cut_20': 0.1931,
            'judged_cut_20': 0.3929,
            'map': 0.1159,
            'recall_1000': 0.5953,
            'judged_cut_1000': 0.0995
        },
        'anserini.covid-r2.full-text.qdel.bm25.txt': {
            'topics': 35,
            'ndcg_cut_10': 0.3123,
            'judged_cut_10': 0.4229,
            'ndcg_cut_20': 0.2738,
            'judged_cut_20': 0.3929,
            'map': 0.1473,
            'recall_1000': 0.6517,
            'judged_cut_1000': 0.1022
        },
        'anserini.covid-r2.paragraph.qq.bm25.txt': {
            'topics': 35,
            'ndcg_cut_10': 0.2772,
            'judged_cut_10': 0.4400,
            'ndcg_cut_20': 0.2579,
            'judged_cut_20': 0.4529,
            'map': 0.1607,
            'recall_1000': 0.7248,
            'judged_cut_1000': 0.1220
        },
        'anserini.covid-r2.paragraph.qdel.bm25.txt': {
            'topics': 35,
            'ndcg_cut_10': 0.3353,
            'judged_cut_10': 0.4343,
            'ndcg_cut_20': 0.2956,
            'judged_cut_20': 0.4329,
            'map': 0.1772,
            'recall_1000': 0.7196,
            'judged_cut_1000': 0.1136
        },
        'anserini.covid-r2.fusion1.txt': {
            'topics': 35,
            'ndcg_cut_10': 0.3297,
            'judged_cut_10': 0.4657,
            'ndcg_cut_20': 0.3060,
            'judged_cut_20': 0.4643,
            'map': 0.1914,
            'recall_1000': 0.7561,
            'judged_cut_1000': 0.1304
        },
        'anserini.covid-r2.fusion2.txt': {
            'topics': 35,
            'ndcg_cut_10': 0.3679,
            'judged_cut_10': 0.4829,
            'ndcg_cut_20': 0.3360,
            'judged_cut_20': 0.4557,
            'map': 0.2066,
            'recall_1000': 0.7511,
            'judged_cut_1000': 0.1200
        },
    }
    evaluate_runs(round1_qrels,
                  cumulative_runs,
                  expected=expected_metrics,
                  check_md5=check_md5_flag)

    # Note that recall@1k doesn't match the figures reported here:
    # https://github.com/castorini/anserini/blob/master/docs/experiments-covid.md
    expected_metrics = {
        'anserini.final-r2.fusion1.txt': {
            'topics': 35,
            'ndcg_cut_10': 0.4827,
            'judged_cut_10': 0.9543,
            'ndcg_cut_20': 0.4512,
            'judged_cut_20': 0.8614,
            'map': 0.2431,
            'recall_1000': 0.6475,
            'judged_cut_1000': 0.1463
        },
        'anserini.final-r2.fusion2.txt': {
            'topics': 35,
            'ndcg_cut_10': 0.5553,
            'judged_cut_10': 0.9743,
            'ndcg_cut_20': 0.5058,
            'judged_cut_20': 0.8957,
            'map': 0.2739,
            'recall_1000': 0.6832,
            'judged_cut_1000': 0.1528
        },
    }
    evaluate_runs(round2_qrels,
                  final_runs,
                  expected=expected_metrics,
                  check_md5=check_md5_flag)