def main(): if not (os.path.isdir(indexes[0]) and os.path.isdir(indexes[1]) and os.path.isdir(indexes[2])): print('Required indexes do not exist. Please download first.') cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round4-cumulative.txt' verify_stored_runs(stored_runs) perform_runs(5, indexes) perform_fusion(5, cumulative_runs, check_md5=True) prepare_final_submissions(5, final_runs, check_md5=True) evaluate_runs(cumulative_qrels, cumulative_runs, check_md5=True)
def main(): if not (os.path.isdir(indexes[0]) and os.path.isdir(indexes[1]) and os.path.isdir(indexes[2])): print('Required indexes do not exist. Please download first.') os.system( 'cat src/main/resources/topics-and-qrels/qrels.covid-round1.txt ' + 'src/main/resources/topics-and-qrels/qrels.covid-round2.txt ' + '> src/main/resources/topics-and-qrels/qrels.covid-round2-cumulative.txt' ) round2_cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round2-cumulative.txt' round3_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round3.txt' round3_cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round3-cumulative.txt' verify_stored_runs(stored_runs) perform_runs(3, indexes) perform_fusion(3, cumulative_runs, check_md5=True) prepare_final_submissions(3, final_runs, check_md5=True) evaluate_runs(round2_cumulative_qrels, cumulative_runs, check_md5=True) evaluate_runs(round3_cumulative_qrels, cumulative_runs, check_md5=True) evaluate_runs(round3_qrels, final_runs, check_md5=True)
def main(): if not (os.path.isdir(indexes[0]) and os.path.isdir(indexes[1]) and os.path.isdir(indexes[2])): print('Required indexes do not exist. Please download first.') round4_cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round4-cumulative.txt' complete_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-complete.txt' round5_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round5.txt' # MD5 checksums don't match anymore, see https://github.com/castorini/anserini/issues/1669 check_md5_flag = False verify_stored_runs(stored_runs) perform_runs(5, indexes) perform_fusion(5, cumulative_runs, check_md5=check_md5_flag) prepare_final_submissions(5, final_runs, check_md5=check_md5_flag) expected_metrics = { 'anserini.covid-r5.abstract.qq.bm25.txt': { 'topics': 50, 'ndcg_cut_10': 0.4580, 'judged_cut_10': 0.5880, 'ndcg_cut_20': 0.4379, 'judged_cut_20': 0.5940, 'map': 0.1903, 'recall_1000': 0.4525, 'judged_cut_1000': 0.2264 }, 'anserini.covid-r5.abstract.qdel.bm25.txt': { 'topics': 50, 'ndcg_cut_10': 0.4912, 'judged_cut_10': 0.6240, 'ndcg_cut_20': 0.4596, 'judged_cut_20': 0.6040, 'map': 0.2042, 'recall_1000': 0.4714, 'judged_cut_1000': 0.2351 }, 'anserini.covid-r5.full-text.qq.bm25.txt': { 'topics': 50, 'ndcg_cut_10': 0.3240, 'judged_cut_10': 0.5660, 'ndcg_cut_20': 0.3055, 'judged_cut_20': 0.5250, 'map': 0.1324, 'recall_1000': 0.3758, 'judged_cut_1000': 0.2171 }, 'anserini.covid-r5.full-text.qdel.bm25.txt': { 'topics': 50, 'ndcg_cut_10': 0.4634, 'judged_cut_10': 0.6460, 'ndcg_cut_20': 0.4387, 'judged_cut_20': 0.6280, 'map': 0.1793, 'recall_1000': 0.4368, 'judged_cut_1000': 0.2425 }, 'anserini.covid-r5.paragraph.qq.bm25.txt': { 'topics': 50, 'ndcg_cut_10': 0.4077, 'judged_cut_10': 0.6160, 'ndcg_cut_20': 0.3907, 'judged_cut_20': 0.5920, 'map': 0.1981, 'recall_1000': 0.4877, 'judged_cut_1000': 0.2661 }, 'anserini.covid-r5.paragraph.qdel.bm25.txt': { 'topics': 50, 'ndcg_cut_10': 0.4918, 'judged_cut_10': 0.6440, 'ndcg_cut_20': 0.4569, 'judged_cut_20': 0.6250, 'map': 0.2163, 'recall_1000': 0.5101, 'judged_cut_1000': 0.2710 }, 'anserini.covid-r5.fusion1.txt': { 'topics': 50, 'ndcg_cut_10': 0.4696, 'judged_cut_10': 0.6520, 'ndcg_cut_20': 0.4539, 'judged_cut_20': 0.6490, 'map': 0.2044, 'recall_1000': 0.5027, 'judged_cut_1000': 0.2751 }, 'anserini.covid-r5.fusion2.txt': { 'topics': 50, 'ndcg_cut_10': 0.5077, 'judged_cut_10': 0.6800, 'ndcg_cut_20': 0.4956, 'judged_cut_20': 0.6690, 'map': 0.2304, 'recall_1000': 0.5378, 'judged_cut_1000': 0.2851 }, 'anserini.covid-r5.abstract.qdel.bm25+rm3Rf.txt': { 'topics': 50, 'ndcg_cut_10': 0.6177, 'judged_cut_10': 0.6620, 'ndcg_cut_20': 0.5738, 'judged_cut_20': 0.6510, 'map': 0.2657, 'recall_1000': 0.5505, 'judged_cut_1000': 0.2562 }, } evaluate_runs(round4_cumulative_qrels, cumulative_runs, expected=expected_metrics, check_md5=check_md5_flag) expected_metrics = { 'anserini.covid-r5.abstract.qq.bm25.txt': { 'topics': 50, 'ndcg_cut_10': 0.6925, 'judged_cut_10': 0.9740, 'ndcg_cut_20': 0.6586, 'judged_cut_20': 0.9700, 'map': 0.3010, 'recall_1000': 0.4636, 'judged_cut_1000': 0.4159 }, 'anserini.covid-r5.abstract.qdel.bm25.txt': { 'topics': 50, 'ndcg_cut_10': 0.7301, 'judged_cut_10': 0.9980, 'ndcg_cut_20': 0.6979, 'judged_cut_20': 0.9900, 'map': 0.3230, 'recall_1000': 0.4839, 'judged_cut_1000': 0.4286 }, 'anserini.covid-r5.full-text.qq.bm25.txt': { 'topics': 50, 'ndcg_cut_10': 0.4709, 'judged_cut_10': 0.8920, 'ndcg_cut_20': 0.4382, 'judged_cut_20': 0.8370, 'map': 0.1777, 'recall_1000': 0.3427, 'judged_cut_1000': 0.3397 }, 'anserini.covid-r5.full-text.qdel.bm25.txt': { 'topics': 50, 'ndcg_cut_10': 0.6286, 'judged_cut_10': 0.9840, 'ndcg_cut_20': 0.5973, 'judged_cut_20': 0.9630, 'map': 0.2391, 'recall_1000': 0.4087, 'judged_cut_1000': 0.3875 }, 'anserini.covid-r5.paragraph.qq.bm25.txt': { 'topics': 50, 'ndcg_cut_10': 0.5832, 'judged_cut_10': 0.9600, 'ndcg_cut_20': 0.5659, 'judged_cut_20': 0.9390, 'map': 0.2808, 'recall_1000': 0.4695, 'judged_cut_1000': 0.4412 }, 'anserini.covid-r5.paragraph.qdel.bm25.txt': { 'topics': 50, 'ndcg_cut_10': 0.6764, 'judged_cut_10': 0.9840, 'ndcg_cut_20': 0.6368, 'judged_cut_20': 0.9740, 'map': 0.3089, 'recall_1000': 0.4949, 'judged_cut_1000': 0.4542 }, 'anserini.covid-r5.fusion1.txt': { 'topics': 50, 'ndcg_cut_10': 0.6469, 'judged_cut_10': 0.9860, 'ndcg_cut_20': 0.6184, 'judged_cut_20': 0.9800, 'map': 0.2952, 'recall_1000': 0.4967, 'judged_cut_1000': 0.4675 }, 'anserini.covid-r5.fusion2.txt': { 'topics': 50, 'ndcg_cut_10': 0.6972, 'judged_cut_10': 1.0000, 'ndcg_cut_20': 0.6785, 'judged_cut_20': 1.000, 'map': 0.3329, 'recall_1000': 0.5313, 'judged_cut_1000': 0.4869 }, 'anserini.covid-r5.abstract.qdel.bm25+rm3Rf.txt': { 'topics': 50, 'ndcg_cut_10': 0.8395, 'judged_cut_10': 1.0000, 'ndcg_cut_20': 0.7955, 'judged_cut_20': 0.9990, 'map': 0.3911, 'recall_1000': 0.5536, 'judged_cut_1000': 0.4607 }, } evaluate_runs(complete_qrels, cumulative_runs, expected=expected_metrics, check_md5=check_md5_flag) expected_metrics = { 'anserini.final-r5.fusion1.txt': { 'topics': 50, 'ndcg_cut_10': 0.5668, 'judged_cut_10': 0.9140, 'ndcg_cut_20': 0.5244, 'judged_cut_20': 0.8490, 'map': 0.2302, 'recall_1000': 0.5615, 'judged_cut_1000': 0.2148 }, 'anserini.final-r5.fusion1.post-processed.txt': { 'topics': 50, 'ndcg_cut_10': 0.5726, 'judged_cut_10': 0.9240, 'ndcg_cut_20': 0.5313, 'judged_cut_20': 0.8570, 'map': 0.2314, 'recall_1000': 0.5615, 'judged_cut_1000': 0.2151 }, 'anserini.final-r5.fusion2.txt': { 'topics': 50, 'ndcg_cut_10': 0.6366, 'judged_cut_10': 0.9640, 'ndcg_cut_20': 0.5941, 'judged_cut_20': 0.9080, 'map': 0.2716, 'recall_1000': 0.6012, 'judged_cut_1000': 0.2263 }, 'anserini.final-r5.fusion2.post-processed.txt': { 'topics': 50, 'ndcg_cut_10': 0.6474, 'judged_cut_10': 0.9780, 'ndcg_cut_20': 0.6007, 'judged_cut_20': 0.9150, 'map': 0.2734, 'recall_1000': 0.6012, 'judged_cut_1000': 0.2267 }, 'anserini.final-r5.rf.txt': { 'topics': 50, 'ndcg_cut_10': 0.7777, 'judged_cut_10': 0.9680, 'ndcg_cut_20': 0.7193, 'judged_cut_20': 0.9270, 'map': 0.3235, 'recall_1000': 0.6378, 'judged_cut_1000': 0.2197 }, 'anserini.final-r5.rf.post-processed.txt': { 'topics': 50, 'ndcg_cut_10': 0.7944, 'judged_cut_10': 0.9860, 'ndcg_cut_20': 0.7346, 'judged_cut_20': 0.9470, 'map': 0.3280, 'recall_1000': 0.6378, 'judged_cut_1000': 0.2201 }, } evaluate_runs(round5_qrels, final_runs, expected=expected_metrics, check_md5=check_md5_flag)
def main(): if not (os.path.isdir(indexes[0]) and os.path.isdir(indexes[1]) and os.path.isdir(indexes[2])): print('Required indexes do not exist. Please download first.') os.system( 'cat src/main/resources/topics-and-qrels/qrels.covid-round1.txt ' + 'src/main/resources/topics-and-qrels/qrels.covid-round2.txt ' + '> src/main/resources/topics-and-qrels/qrels.covid-round2-cumulative.txt' ) round2_cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round2-cumulative.txt' round3_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round3.txt' round3_cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round3-cumulative.txt' # MD5 checksums don't match anymore, see https://github.com/castorini/anserini/issues/1669 check_md5_flag = False verify_stored_runs(stored_runs) perform_runs(3, indexes) perform_fusion(3, cumulative_runs, check_md5=check_md5_flag) prepare_final_submissions(3, final_runs, check_md5=check_md5_flag) expected_metrics = { 'anserini.covid-r3.abstract.qq.bm25.txt': { 'topics': 40, 'ndcg_cut_10': 0.2118, 'judged_cut_10': 0.3300, 'ndcg_cut_20': 0.2043, 'judged_cut_20': 0.3150, 'map': 0.0951, 'recall_1000': 0.4398, 'judged_cut_1000': 0.1275 }, 'anserini.covid-r3.abstract.qdel.bm25.txt': { 'topics': 40, 'ndcg_cut_10': 0.2470, 'judged_cut_10': 0.3375, 'ndcg_cut_20': 0.2256, 'judged_cut_20': 0.3175, 'map': 0.1023, 'recall_1000': 0.4537, 'judged_cut_1000': 0.1248 }, 'anserini.covid-r3.full-text.qq.bm25.txt': { 'topics': 40, 'ndcg_cut_10': 0.2337, 'judged_cut_10': 0.4650, 'ndcg_cut_20': 0.2259, 'judged_cut_20': 0.4425, 'map': 0.1099, 'recall_1000': 0.4817, 'judged_cut_1000': 0.1490 }, 'anserini.covid-r3.full-text.qdel.bm25.txt': { 'topics': 40, 'ndcg_cut_10': 0.3430, 'judged_cut_10': 0.5025, 'ndcg_cut_20': 0.3077, 'judged_cut_20': 0.4888, 'map': 0.1426, 'recall_1000': 0.5267, 'judged_cut_1000': 0.1575 }, 'anserini.covid-r3.paragraph.qq.bm25.txt': { 'topics': 40, 'ndcg_cut_10': 0.2848, 'judged_cut_10': 0.5175, 'ndcg_cut_20': 0.2734, 'judged_cut_20': 0.4938, 'map': 0.1390, 'recall_1000': 0.5527, 'judged_cut_1000': 0.1727 }, 'anserini.covid-r3.paragraph.qdel.bm25.txt': { 'topics': 40, 'ndcg_cut_10': 0.3604, 'judged_cut_10': 0.5050, 'ndcg_cut_20': 0.3213, 'judged_cut_20': 0.4875, 'map': 0.1520, 'recall_1000': 0.5676, 'judged_cut_1000': 0.1672 }, 'anserini.covid-r3.fusion1.txt': { 'topics': 40, 'ndcg_cut_10': 0.3093, 'judged_cut_10': 0.4975, 'ndcg_cut_20': 0.2933, 'judged_cut_20': 0.5025, 'map': 0.1400, 'recall_1000': 0.5566, 'judged_cut_1000': 0.1750 }, 'anserini.covid-r3.fusion2.txt': { 'topics': 40, 'ndcg_cut_10': 0.3568, 'judged_cut_10': 0.5250, 'ndcg_cut_20': 0.3273, 'judged_cut_20': 0.4925, 'map': 0.1564, 'recall_1000': 0.5769, 'judged_cut_1000': 0.1715 }, 'anserini.covid-r3.abstract.qdel.bm25+rm3Rf.txt': { 'topics': 40, 'ndcg_cut_10': 0.3633, 'judged_cut_10': 0.3800, 'ndcg_cut_20': 0.3175, 'judged_cut_20': 0.3600, 'map': 0.1526, 'recall_1000': 0.5722, 'judged_cut_1000': 0.1398 }, } evaluate_runs(round2_cumulative_qrels, cumulative_runs, expected=expected_metrics, check_md5=check_md5_flag) expected_metrics = { 'anserini.covid-r3.abstract.qq.bm25.txt': { 'topics': 40, 'ndcg_cut_10': 0.5781, 'judged_cut_10': 0.8875, 'ndcg_cut_20': 0.5359, 'judged_cut_20': 0.8325, 'map': 0.2348, 'recall_1000': 0.5040, 'judged_cut_1000': 0.2351 }, 'anserini.covid-r3.abstract.qdel.bm25.txt': { 'topics': 40, 'ndcg_cut_10': 0.6291, 'judged_cut_10': 0.9300, 'ndcg_cut_20': 0.5972, 'judged_cut_20': 0.8925, 'map': 0.2525, 'recall_1000': 0.5215, 'judged_cut_1000': 0.2370 }, 'anserini.covid-r3.full-text.qq.bm25.txt': { 'topics': 40, 'ndcg_cut_10': 0.3977, 'judged_cut_10': 0.7500, 'ndcg_cut_20': 0.3681, 'judged_cut_20': 0.7213, 'map': 0.1646, 'recall_1000': 0.4708, 'judged_cut_1000': 0.2471 }, 'anserini.covid-r3.full-text.qdel.bm25.txt': { 'topics': 40, 'ndcg_cut_10': 0.5790, 'judged_cut_10': 0.9050, 'ndcg_cut_20': 0.5234, 'judged_cut_20': 0.8525, 'map': 0.2236, 'recall_1000': 0.5313, 'judged_cut_1000': 0.2693 }, 'anserini.covid-r3.paragraph.qq.bm25.txt': { 'topics': 40, 'ndcg_cut_10': 0.5396, 'judged_cut_10': 0.9425, 'ndcg_cut_20': 0.5079, 'judged_cut_20': 0.9050, 'map': 0.2498, 'recall_1000': 0.5766, 'judged_cut_1000': 0.2978 }, 'anserini.covid-r3.paragraph.qdel.bm25.txt': { 'topics': 40, 'ndcg_cut_10': 0.6327, 'judged_cut_10': 0.9600, 'ndcg_cut_20': 0.5793, 'judged_cut_20': 0.9162, 'map': 0.2753, 'recall_1000': 0.5923, 'judged_cut_1000': 0.2956 }, 'anserini.covid-r3.fusion1.txt': { 'topics': 40, 'ndcg_cut_10': 0.5924, 'judged_cut_10': 0.9625, 'ndcg_cut_20': 0.5563, 'judged_cut_20': 0.9362, 'map': 0.2700, 'recall_1000': 0.5956, 'judged_cut_1000': 0.3045 }, 'anserini.covid-r3.fusion2.txt': { 'topics': 40, 'ndcg_cut_10': 0.6515, 'judged_cut_10': 0.9875, 'ndcg_cut_20': 0.6200, 'judged_cut_20': 0.9675, 'map': 0.3027, 'recall_1000': 0.6194, 'judged_cut_1000': 0.3076 }, 'anserini.covid-r3.abstract.qdel.bm25+rm3Rf.txt': { 'topics': 40, 'ndcg_cut_10': 0.7459, 'judged_cut_10': 0.9875, 'ndcg_cut_20': 0.7023, 'judged_cut_20': 0.9637, 'map': 0.3190, 'recall_1000': 0.6125, 'judged_cut_1000': 0.2600 }, } evaluate_runs(round3_cumulative_qrels, cumulative_runs, expected=expected_metrics, check_md5=check_md5_flag) expected_metrics = { 'anserini.final-r3.fusion1.txt': { 'topics': 40, 'ndcg_cut_10': 0.5339, 'judged_cut_10': 0.8400, 'ndcg_cut_20': 0.4875, 'judged_cut_20': 0.7637, 'map': 0.2283, 'recall_1000': 0.6160, 'judged_cut_1000': 0.1370 }, 'anserini.final-r3.fusion1.post-processed.txt': { 'topics': 40, 'ndcg_cut_10': 0.5359, 'judged_cut_10': 0.8475, 'ndcg_cut_20': 0.4902, 'judged_cut_20': 0.7675, 'map': 0.2293, 'recall_1000': 0.6160, 'judged_cut_1000': 0.1373 }, 'anserini.final-r3.fusion2.txt': { 'topics': 40, 'ndcg_cut_10': 0.6072, 'judged_cut_10': 0.9025, 'ndcg_cut_20': 0.5599, 'judged_cut_20': 0.8337, 'map': 0.2631, 'recall_1000': 0.6441, 'judged_cut_1000': 0.1431 }, 'anserini.final-r3.fusion2.post-processed.txt': { 'topics': 40, 'ndcg_cut_10': 0.6100, 'judged_cut_10': 0.9100, 'ndcg_cut_20': 0.5617, 'judged_cut_20': 0.8375, 'map': 0.2641, 'recall_1000': 0.6441, 'judged_cut_1000': 0.1434 }, 'anserini.final-r3.rf.txt': { 'topics': 40, 'ndcg_cut_10': 0.6812, 'judged_cut_10': 0.9600, 'ndcg_cut_20': 0.6255, 'judged_cut_20': 0.8450, 'map': 0.2787, 'recall_1000': 0.6399, 'judged_cut_1000': 0.1246 }, 'anserini.final-r3.rf.post-processed.txt': { 'topics': 40, 'ndcg_cut_10': 0.6883, 'judged_cut_10': 0.9750, 'ndcg_cut_20': 0.6321, 'judged_cut_20': 0.8538, 'map': 0.2817, 'recall_1000': 0.6399, 'judged_cut_1000': 0.1250 }, } evaluate_runs(round3_qrels, final_runs, expected=expected_metrics, check_md5=check_md5_flag)
def main(): if not (os.path.isdir(indexes[0]) and os.path.isdir(indexes[1]) and os.path.isdir(indexes[2])): print('Required indexes do not exist. Please download first.') round1_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round1.txt' round2_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round2.txt' # Note that this script was written after this issue was noted: https://github.com/castorini/anserini/issues/1669 # Thus, no point in checking MD5. check_md5_flag = False perform_runs() perform_fusion(cumulative_runs, check_md5=check_md5_flag) prepare_final_submissions(final_runs, check_md5=check_md5_flag) expected_metrics = { 'anserini.covid-r2.abstract.qq.bm25.txt': { 'topics': 35, 'ndcg_cut_10': 0.3522, 'judged_cut_10': 0.5371, 'ndcg_cut_20': 0.3171, 'judged_cut_20': 0.5100, 'map': 0.1752, 'recall_1000': 0.6601, 'judged_cut_1000': 0.1013 }, 'anserini.covid-r2.abstract.qdel.bm25.txt': { 'topics': 35, 'ndcg_cut_10': 0.3781, 'judged_cut_10': 0.5371, 'ndcg_cut_20': 0.3462, 'judged_cut_20': 0.4829, 'map': 0.1804, 'recall_1000': 0.6485, 'judged_cut_1000': 0.0958 }, 'anserini.covid-r2.full-text.qq.bm25.txt': { 'topics': 35, 'ndcg_cut_10': 0.2070, 'judged_cut_10': 0.4286, 'ndcg_cut_20': 0.1931, 'judged_cut_20': 0.3929, 'map': 0.1159, 'recall_1000': 0.5953, 'judged_cut_1000': 0.0995 }, 'anserini.covid-r2.full-text.qdel.bm25.txt': { 'topics': 35, 'ndcg_cut_10': 0.3123, 'judged_cut_10': 0.4229, 'ndcg_cut_20': 0.2738, 'judged_cut_20': 0.3929, 'map': 0.1473, 'recall_1000': 0.6517, 'judged_cut_1000': 0.1022 }, 'anserini.covid-r2.paragraph.qq.bm25.txt': { 'topics': 35, 'ndcg_cut_10': 0.2772, 'judged_cut_10': 0.4400, 'ndcg_cut_20': 0.2579, 'judged_cut_20': 0.4529, 'map': 0.1607, 'recall_1000': 0.7248, 'judged_cut_1000': 0.1220 }, 'anserini.covid-r2.paragraph.qdel.bm25.txt': { 'topics': 35, 'ndcg_cut_10': 0.3353, 'judged_cut_10': 0.4343, 'ndcg_cut_20': 0.2956, 'judged_cut_20': 0.4329, 'map': 0.1772, 'recall_1000': 0.7196, 'judged_cut_1000': 0.1136 }, 'anserini.covid-r2.fusion1.txt': { 'topics': 35, 'ndcg_cut_10': 0.3297, 'judged_cut_10': 0.4657, 'ndcg_cut_20': 0.3060, 'judged_cut_20': 0.4643, 'map': 0.1914, 'recall_1000': 0.7561, 'judged_cut_1000': 0.1304 }, 'anserini.covid-r2.fusion2.txt': { 'topics': 35, 'ndcg_cut_10': 0.3679, 'judged_cut_10': 0.4829, 'ndcg_cut_20': 0.3360, 'judged_cut_20': 0.4557, 'map': 0.2066, 'recall_1000': 0.7511, 'judged_cut_1000': 0.1200 }, } evaluate_runs(round1_qrels, cumulative_runs, expected=expected_metrics, check_md5=check_md5_flag) # Note that recall@1k doesn't match the figures reported here: # https://github.com/castorini/anserini/blob/master/docs/experiments-covid.md expected_metrics = { 'anserini.final-r2.fusion1.txt': { 'topics': 35, 'ndcg_cut_10': 0.4827, 'judged_cut_10': 0.9543, 'ndcg_cut_20': 0.4512, 'judged_cut_20': 0.8614, 'map': 0.2431, 'recall_1000': 0.6475, 'judged_cut_1000': 0.1463 }, 'anserini.final-r2.fusion2.txt': { 'topics': 35, 'ndcg_cut_10': 0.5553, 'judged_cut_10': 0.9743, 'ndcg_cut_20': 0.5058, 'judged_cut_20': 0.8957, 'map': 0.2739, 'recall_1000': 0.6832, 'judged_cut_1000': 0.1528 }, } evaluate_runs(round2_qrels, final_runs, expected=expected_metrics, check_md5=check_md5_flag)