def main(): if not (os.path.isdir(indexes[0]) and os.path.isdir(indexes[1]) and os.path.isdir(indexes[2])): print('Required indexes do not exist. Please download first.') round3_cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round3-cumulative.txt' round4_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round4.txt' round4_cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round4-cumulative.txt' # MD5 checksums don't match anymore, see https://github.com/castorini/anserini/issues/1669 check_md5_flag = False verify_stored_runs(stored_runs) perform_runs(round3_cumulative_qrels) perform_fusion(check_md5=check_md5_flag) prepare_final_submissions(round3_cumulative_qrels, check_md5=check_md5_flag) expected_metrics = { 'expanded.anserini.covid-r4.abstract.qq.bm25.txt': {'topics': 45, 'ndcg_cut_10': 0.6115, 'judged_cut_10': 0.8022, 'ndcg_cut_20': 0.5823, 'judged_cut_20': 0.7900, 'map': 0.2499, 'recall_1000': 0.5038, 'judged_cut_1000': 0.2676}, 'expanded.anserini.covid-r4.abstract.qdel.bm25.txt': {'topics': 45, 'ndcg_cut_10': 0.6321, 'judged_cut_10': 0.8022, 'ndcg_cut_20': 0.5922, 'judged_cut_20': 0.7678, 'map': 0.2528, 'recall_1000': 0.5098, 'judged_cut_1000': 0.2672}, 'expanded.anserini.covid-r4.full-text.qq.bm25.txt': {'topics': 45, 'ndcg_cut_10': 0.6045, 'judged_cut_10': 0.9044, 'ndcg_cut_20': 0.5640, 'judged_cut_20': 0.8522, 'map': 0.2420, 'recall_1000': 0.4996, 'judged_cut_1000': 0.3037}, 'expanded.anserini.covid-r4.full-text.qdel.bm25.txt': {'topics': 45, 'ndcg_cut_10': 0.6514, 'judged_cut_10': 0.9289, 'ndcg_cut_20': 0.5991, 'judged_cut_20': 0.8711, 'map': 0.2665, 'recall_1000': 0.5240, 'judged_cut_1000': 0.3114}, 'expanded.anserini.covid-r4.paragraph.qq.bm25.txt': {'topics': 45, 'ndcg_cut_10': 0.6429, 'judged_cut_10': 0.8622, 'ndcg_cut_20': 0.6080, 'judged_cut_20': 0.8333, 'map': 0.2932, 'recall_1000': 0.5635, 'judged_cut_1000': 0.3256}, 'expanded.anserini.covid-r4.paragraph.qdel.bm25.txt': {'topics': 45, 'ndcg_cut_10': 0.6694, 'judged_cut_10': 0.8622, 'ndcg_cut_20': 0.6229, 'judged_cut_20': 0.8411, 'map': 0.2953, 'recall_1000': 0.5677, 'judged_cut_1000': 0.3232}, 'expanded.anserini.covid-r4.fusion1.txt': {'topics': 45, 'ndcg_cut_10': 0.6739, 'judged_cut_10': 0.8778, 'ndcg_cut_20': 0.6188, 'judged_cut_20': 0.8533, 'map': 0.2914, 'recall_1000': 0.5750, 'judged_cut_1000': 0.3362}, 'expanded.anserini.covid-r4.fusion2.txt': {'topics': 45, 'ndcg_cut_10': 0.6618, 'judged_cut_10': 0.8622, 'ndcg_cut_20': 0.6331, 'judged_cut_20': 0.8444, 'map': 0.2974, 'recall_1000': 0.5847, 'judged_cut_1000': 0.3344}, 'expanded.anserini.covid-r4.abstract.qdel.bm25+rm3Rf.txt': {'topics': 45, 'ndcg_cut_10': 0.7447, 'judged_cut_10': 0.8933, 'ndcg_cut_20': 0.7067, 'judged_cut_20': 0.8589, 'map': 0.3182, 'recall_1000': 0.5812, 'judged_cut_1000': 0.2904}, } evaluate_runs(round4_cumulative_qrels, cumulative_runs, expected=expected_metrics, check_md5=check_md5_flag) expected_metrics = { 'expanded.anserini.final-r4.fusion1.txt': {'topics': 45, 'ndcg_cut_10': 0.5395, 'judged_cut_10': 0.7222, 'ndcg_cut_20': 0.5115, 'judged_cut_20': 0.6944, 'map': 0.2498, 'recall_1000': 0.6717, 'judged_cut_1000': 0.1424}, 'expanded.anserini.final-r4.fusion2.txt': {'topics': 45, 'ndcg_cut_10': 0.5630, 'judged_cut_10': 0.7444, 'ndcg_cut_20': 0.5175, 'judged_cut_20': 0.6911, 'map': 0.2550, 'recall_1000': 0.6800, 'judged_cut_1000': 0.1434}, 'expanded.anserini.final-r4.rf.txt': {'topics': 45, 'ndcg_cut_10': 0.6062, 'judged_cut_10': 0.7378, 'ndcg_cut_20': 0.5606, 'judged_cut_20': 0.6833, 'map': 0.2658, 'recall_1000': 0.6759, 'judged_cut_1000': 0.1284}, } evaluate_runs(round4_qrels, final_runs, expected=expected_metrics, check_md5=check_md5_flag)
def main(): if not (os.path.isdir(indexes[0]) and os.path.isdir(indexes[1]) and os.path.isdir(indexes[2])): print('Required indexes do not exist. Please download first.') cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round4-cumulative.txt' verify_stored_runs(stored_runs) perform_runs() perform_fusion() prepare_final_submissions(cumulative_qrels) evaluate_runs(cumulative_qrels, cumulative_runs)
def main(): if not (os.path.isdir(indexes[0]) and os.path.isdir(indexes[1]) and os.path.isdir(indexes[2])): print('Required indexes do not exist. Please download first.') round4_cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round4-cumulative.txt' complete_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-complete.txt' round5_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round5.txt' verify_stored_runs(stored_runs) perform_runs(5, indexes) perform_fusion(5, cumulative_runs, check_md5=True) prepare_final_submissions(5, final_runs, check_md5=True) evaluate_runs(round4_cumulative_qrels, cumulative_runs, check_md5=True) evaluate_runs(complete_qrels, cumulative_runs, check_md5=True) evaluate_runs(round5_qrels, final_runs, check_md5=True)
def main(): if not (os.path.isdir(indexes[0]) and os.path.isdir(indexes[1]) and os.path.isdir(indexes[2])): print('Required indexes do not exist. Please download first.') os.system( 'cat src/main/resources/topics-and-qrels/qrels.covid-round1.txt ' + 'src/main/resources/topics-and-qrels/qrels.covid-round2.txt ' + '> src/main/resources/topics-and-qrels/qrels.covid-round2-cumulative.txt' ) round2_cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round2-cumulative.txt' round3_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round3.txt' round3_cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round3-cumulative.txt' verify_stored_runs(stored_runs) perform_runs(3, indexes) perform_fusion(3, cumulative_runs, check_md5=True) prepare_final_submissions(3, final_runs, check_md5=True) evaluate_runs(round2_cumulative_qrels, cumulative_runs, check_md5=True) evaluate_runs(round3_cumulative_qrels, cumulative_runs, check_md5=True) evaluate_runs(round3_qrels, final_runs, check_md5=True)
def main(): if not (os.path.isdir(indexes[0]) and os.path.isdir(indexes[1]) and os.path.isdir(indexes[2])): print('Required indexes do not exist. Please download first.') os.system( 'cat src/main/resources/topics-and-qrels/qrels.covid-round1.txt ' + 'src/main/resources/topics-and-qrels/qrels.covid-round2.txt ' + '> src/main/resources/topics-and-qrels/qrels.covid-round12.txt') round3_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round3.txt' round2_cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round12.txt' round3_cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round3-cumulative.txt' verify_stored_runs(stored_runs) perform_runs() perform_fusion() prepare_final_submissions(round2_cumulative_qrels) evaluate_runs(round2_cumulative_qrels, cumulative_runs) evaluate_runs(round3_cumulative_qrels, cumulative_runs) # Download the NIST post-processed runs. print('') download_url( 'https://www.dropbox.com/s/ilqgky1tti0zvez/anserini.final-r3.fusion1.post-processed.txt?dl=1', 'runs', force=True) download_url( 'https://www.dropbox.com/s/ue3z6xxxca9krkb/anserini.final-r3.fusion2.post-processed.txt?dl=1', 'runs', force=True) download_url( 'https://www.dropbox.com/s/95vk831wp1ldnpm/anserini.final-r3.rf.post-processed.txt?dl=1', 'runs', force=True) evaluate_runs(round3_qrels, final_runs)
def main(): if not (os.path.isdir(indexes[0]) and os.path.isdir(indexes[1]) and os.path.isdir(indexes[2])): print('Required indexes do not exist. Please download first.') round4_cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round4-cumulative.txt' complete_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-complete.txt' round5_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round5.txt' # MD5 checksums don't match anymore, see https://github.com/castorini/anserini/issues/1669 check_md5_flag = False verify_stored_runs(stored_runs) perform_runs(5, indexes) perform_fusion(5, cumulative_runs, check_md5=check_md5_flag) prepare_final_submissions(5, final_runs, check_md5=check_md5_flag) expected_metrics = { 'anserini.covid-r5.abstract.qq.bm25.txt': { 'topics': 50, 'ndcg_cut_10': 0.4580, 'judged_cut_10': 0.5880, 'ndcg_cut_20': 0.4379, 'judged_cut_20': 0.5940, 'map': 0.1903, 'recall_1000': 0.4525, 'judged_cut_1000': 0.2264 }, 'anserini.covid-r5.abstract.qdel.bm25.txt': { 'topics': 50, 'ndcg_cut_10': 0.4912, 'judged_cut_10': 0.6240, 'ndcg_cut_20': 0.4596, 'judged_cut_20': 0.6040, 'map': 0.2042, 'recall_1000': 0.4714, 'judged_cut_1000': 0.2351 }, 'anserini.covid-r5.full-text.qq.bm25.txt': { 'topics': 50, 'ndcg_cut_10': 0.3240, 'judged_cut_10': 0.5660, 'ndcg_cut_20': 0.3055, 'judged_cut_20': 0.5250, 'map': 0.1324, 'recall_1000': 0.3758, 'judged_cut_1000': 0.2171 }, 'anserini.covid-r5.full-text.qdel.bm25.txt': { 'topics': 50, 'ndcg_cut_10': 0.4634, 'judged_cut_10': 0.6460, 'ndcg_cut_20': 0.4387, 'judged_cut_20': 0.6280, 'map': 0.1793, 'recall_1000': 0.4368, 'judged_cut_1000': 0.2425 }, 'anserini.covid-r5.paragraph.qq.bm25.txt': { 'topics': 50, 'ndcg_cut_10': 0.4077, 'judged_cut_10': 0.6160, 'ndcg_cut_20': 0.3907, 'judged_cut_20': 0.5920, 'map': 0.1981, 'recall_1000': 0.4877, 'judged_cut_1000': 0.2661 }, 'anserini.covid-r5.paragraph.qdel.bm25.txt': { 'topics': 50, 'ndcg_cut_10': 0.4918, 'judged_cut_10': 0.6440, 'ndcg_cut_20': 0.4569, 'judged_cut_20': 0.6250, 'map': 0.2163, 'recall_1000': 0.5101, 'judged_cut_1000': 0.2710 }, 'anserini.covid-r5.fusion1.txt': { 'topics': 50, 'ndcg_cut_10': 0.4696, 'judged_cut_10': 0.6520, 'ndcg_cut_20': 0.4539, 'judged_cut_20': 0.6490, 'map': 0.2044, 'recall_1000': 0.5027, 'judged_cut_1000': 0.2751 }, 'anserini.covid-r5.fusion2.txt': { 'topics': 50, 'ndcg_cut_10': 0.5077, 'judged_cut_10': 0.6800, 'ndcg_cut_20': 0.4956, 'judged_cut_20': 0.6690, 'map': 0.2304, 'recall_1000': 0.5378, 'judged_cut_1000': 0.2851 }, 'anserini.covid-r5.abstract.qdel.bm25+rm3Rf.txt': { 'topics': 50, 'ndcg_cut_10': 0.6177, 'judged_cut_10': 0.6620, 'ndcg_cut_20': 0.5738, 'judged_cut_20': 0.6510, 'map': 0.2657, 'recall_1000': 0.5505, 'judged_cut_1000': 0.2562 }, } evaluate_runs(round4_cumulative_qrels, cumulative_runs, expected=expected_metrics, check_md5=check_md5_flag) expected_metrics = { 'anserini.covid-r5.abstract.qq.bm25.txt': { 'topics': 50, 'ndcg_cut_10': 0.6925, 'judged_cut_10': 0.9740, 'ndcg_cut_20': 0.6586, 'judged_cut_20': 0.9700, 'map': 0.3010, 'recall_1000': 0.4636, 'judged_cut_1000': 0.4159 }, 'anserini.covid-r5.abstract.qdel.bm25.txt': { 'topics': 50, 'ndcg_cut_10': 0.7301, 'judged_cut_10': 0.9980, 'ndcg_cut_20': 0.6979, 'judged_cut_20': 0.9900, 'map': 0.3230, 'recall_1000': 0.4839, 'judged_cut_1000': 0.4286 }, 'anserini.covid-r5.full-text.qq.bm25.txt': { 'topics': 50, 'ndcg_cut_10': 0.4709, 'judged_cut_10': 0.8920, 'ndcg_cut_20': 0.4382, 'judged_cut_20': 0.8370, 'map': 0.1777, 'recall_1000': 0.3427, 'judged_cut_1000': 0.3397 }, 'anserini.covid-r5.full-text.qdel.bm25.txt': { 'topics': 50, 'ndcg_cut_10': 0.6286, 'judged_cut_10': 0.9840, 'ndcg_cut_20': 0.5973, 'judged_cut_20': 0.9630, 'map': 0.2391, 'recall_1000': 0.4087, 'judged_cut_1000': 0.3875 }, 'anserini.covid-r5.paragraph.qq.bm25.txt': { 'topics': 50, 'ndcg_cut_10': 0.5832, 'judged_cut_10': 0.9600, 'ndcg_cut_20': 0.5659, 'judged_cut_20': 0.9390, 'map': 0.2808, 'recall_1000': 0.4695, 'judged_cut_1000': 0.4412 }, 'anserini.covid-r5.paragraph.qdel.bm25.txt': { 'topics': 50, 'ndcg_cut_10': 0.6764, 'judged_cut_10': 0.9840, 'ndcg_cut_20': 0.6368, 'judged_cut_20': 0.9740, 'map': 0.3089, 'recall_1000': 0.4949, 'judged_cut_1000': 0.4542 }, 'anserini.covid-r5.fusion1.txt': { 'topics': 50, 'ndcg_cut_10': 0.6469, 'judged_cut_10': 0.9860, 'ndcg_cut_20': 0.6184, 'judged_cut_20': 0.9800, 'map': 0.2952, 'recall_1000': 0.4967, 'judged_cut_1000': 0.4675 }, 'anserini.covid-r5.fusion2.txt': { 'topics': 50, 'ndcg_cut_10': 0.6972, 'judged_cut_10': 1.0000, 'ndcg_cut_20': 0.6785, 'judged_cut_20': 1.000, 'map': 0.3329, 'recall_1000': 0.5313, 'judged_cut_1000': 0.4869 }, 'anserini.covid-r5.abstract.qdel.bm25+rm3Rf.txt': { 'topics': 50, 'ndcg_cut_10': 0.8395, 'judged_cut_10': 1.0000, 'ndcg_cut_20': 0.7955, 'judged_cut_20': 0.9990, 'map': 0.3911, 'recall_1000': 0.5536, 'judged_cut_1000': 0.4607 }, } evaluate_runs(complete_qrels, cumulative_runs, expected=expected_metrics, check_md5=check_md5_flag) expected_metrics = { 'anserini.final-r5.fusion1.txt': { 'topics': 50, 'ndcg_cut_10': 0.5668, 'judged_cut_10': 0.9140, 'ndcg_cut_20': 0.5244, 'judged_cut_20': 0.8490, 'map': 0.2302, 'recall_1000': 0.5615, 'judged_cut_1000': 0.2148 }, 'anserini.final-r5.fusion1.post-processed.txt': { 'topics': 50, 'ndcg_cut_10': 0.5726, 'judged_cut_10': 0.9240, 'ndcg_cut_20': 0.5313, 'judged_cut_20': 0.8570, 'map': 0.2314, 'recall_1000': 0.5615, 'judged_cut_1000': 0.2151 }, 'anserini.final-r5.fusion2.txt': { 'topics': 50, 'ndcg_cut_10': 0.6366, 'judged_cut_10': 0.9640, 'ndcg_cut_20': 0.5941, 'judged_cut_20': 0.9080, 'map': 0.2716, 'recall_1000': 0.6012, 'judged_cut_1000': 0.2263 }, 'anserini.final-r5.fusion2.post-processed.txt': { 'topics': 50, 'ndcg_cut_10': 0.6474, 'judged_cut_10': 0.9780, 'ndcg_cut_20': 0.6007, 'judged_cut_20': 0.9150, 'map': 0.2734, 'recall_1000': 0.6012, 'judged_cut_1000': 0.2267 }, 'anserini.final-r5.rf.txt': { 'topics': 50, 'ndcg_cut_10': 0.7777, 'judged_cut_10': 0.9680, 'ndcg_cut_20': 0.7193, 'judged_cut_20': 0.9270, 'map': 0.3235, 'recall_1000': 0.6378, 'judged_cut_1000': 0.2197 }, 'anserini.final-r5.rf.post-processed.txt': { 'topics': 50, 'ndcg_cut_10': 0.7944, 'judged_cut_10': 0.9860, 'ndcg_cut_20': 0.7346, 'judged_cut_20': 0.9470, 'map': 0.3280, 'recall_1000': 0.6378, 'judged_cut_1000': 0.2201 }, } evaluate_runs(round5_qrels, final_runs, expected=expected_metrics, check_md5=check_md5_flag)
def main(): if not (os.path.isdir(indexes[0]) and os.path.isdir(indexes[1]) and os.path.isdir(indexes[2])): print('Required indexes do not exist. Please download first.') os.system( 'cat src/main/resources/topics-and-qrels/qrels.covid-round1.txt ' + 'src/main/resources/topics-and-qrels/qrels.covid-round2.txt ' + '> src/main/resources/topics-and-qrels/qrels.covid-round2-cumulative.txt' ) round2_cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round2-cumulative.txt' round3_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round3.txt' round3_cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round3-cumulative.txt' # MD5 checksums don't match anymore, see https://github.com/castorini/anserini/issues/1669 check_md5_flag = False verify_stored_runs(stored_runs) perform_runs(3, indexes) perform_fusion(3, cumulative_runs, check_md5=check_md5_flag) prepare_final_submissions(3, final_runs, check_md5=check_md5_flag) expected_metrics = { 'anserini.covid-r3.abstract.qq.bm25.txt': { 'topics': 40, 'ndcg_cut_10': 0.2118, 'judged_cut_10': 0.3300, 'ndcg_cut_20': 0.2043, 'judged_cut_20': 0.3150, 'map': 0.0951, 'recall_1000': 0.4398, 'judged_cut_1000': 0.1275 }, 'anserini.covid-r3.abstract.qdel.bm25.txt': { 'topics': 40, 'ndcg_cut_10': 0.2470, 'judged_cut_10': 0.3375, 'ndcg_cut_20': 0.2256, 'judged_cut_20': 0.3175, 'map': 0.1023, 'recall_1000': 0.4537, 'judged_cut_1000': 0.1248 }, 'anserini.covid-r3.full-text.qq.bm25.txt': { 'topics': 40, 'ndcg_cut_10': 0.2337, 'judged_cut_10': 0.4650, 'ndcg_cut_20': 0.2259, 'judged_cut_20': 0.4425, 'map': 0.1099, 'recall_1000': 0.4817, 'judged_cut_1000': 0.1490 }, 'anserini.covid-r3.full-text.qdel.bm25.txt': { 'topics': 40, 'ndcg_cut_10': 0.3430, 'judged_cut_10': 0.5025, 'ndcg_cut_20': 0.3077, 'judged_cut_20': 0.4888, 'map': 0.1426, 'recall_1000': 0.5267, 'judged_cut_1000': 0.1575 }, 'anserini.covid-r3.paragraph.qq.bm25.txt': { 'topics': 40, 'ndcg_cut_10': 0.2848, 'judged_cut_10': 0.5175, 'ndcg_cut_20': 0.2734, 'judged_cut_20': 0.4938, 'map': 0.1390, 'recall_1000': 0.5527, 'judged_cut_1000': 0.1727 }, 'anserini.covid-r3.paragraph.qdel.bm25.txt': { 'topics': 40, 'ndcg_cut_10': 0.3604, 'judged_cut_10': 0.5050, 'ndcg_cut_20': 0.3213, 'judged_cut_20': 0.4875, 'map': 0.1520, 'recall_1000': 0.5676, 'judged_cut_1000': 0.1672 }, 'anserini.covid-r3.fusion1.txt': { 'topics': 40, 'ndcg_cut_10': 0.3093, 'judged_cut_10': 0.4975, 'ndcg_cut_20': 0.2933, 'judged_cut_20': 0.5025, 'map': 0.1400, 'recall_1000': 0.5566, 'judged_cut_1000': 0.1750 }, 'anserini.covid-r3.fusion2.txt': { 'topics': 40, 'ndcg_cut_10': 0.3568, 'judged_cut_10': 0.5250, 'ndcg_cut_20': 0.3273, 'judged_cut_20': 0.4925, 'map': 0.1564, 'recall_1000': 0.5769, 'judged_cut_1000': 0.1715 }, 'anserini.covid-r3.abstract.qdel.bm25+rm3Rf.txt': { 'topics': 40, 'ndcg_cut_10': 0.3633, 'judged_cut_10': 0.3800, 'ndcg_cut_20': 0.3175, 'judged_cut_20': 0.3600, 'map': 0.1526, 'recall_1000': 0.5722, 'judged_cut_1000': 0.1398 }, } evaluate_runs(round2_cumulative_qrels, cumulative_runs, expected=expected_metrics, check_md5=check_md5_flag) expected_metrics = { 'anserini.covid-r3.abstract.qq.bm25.txt': { 'topics': 40, 'ndcg_cut_10': 0.5781, 'judged_cut_10': 0.8875, 'ndcg_cut_20': 0.5359, 'judged_cut_20': 0.8325, 'map': 0.2348, 'recall_1000': 0.5040, 'judged_cut_1000': 0.2351 }, 'anserini.covid-r3.abstract.qdel.bm25.txt': { 'topics': 40, 'ndcg_cut_10': 0.6291, 'judged_cut_10': 0.9300, 'ndcg_cut_20': 0.5972, 'judged_cut_20': 0.8925, 'map': 0.2525, 'recall_1000': 0.5215, 'judged_cut_1000': 0.2370 }, 'anserini.covid-r3.full-text.qq.bm25.txt': { 'topics': 40, 'ndcg_cut_10': 0.3977, 'judged_cut_10': 0.7500, 'ndcg_cut_20': 0.3681, 'judged_cut_20': 0.7213, 'map': 0.1646, 'recall_1000': 0.4708, 'judged_cut_1000': 0.2471 }, 'anserini.covid-r3.full-text.qdel.bm25.txt': { 'topics': 40, 'ndcg_cut_10': 0.5790, 'judged_cut_10': 0.9050, 'ndcg_cut_20': 0.5234, 'judged_cut_20': 0.8525, 'map': 0.2236, 'recall_1000': 0.5313, 'judged_cut_1000': 0.2693 }, 'anserini.covid-r3.paragraph.qq.bm25.txt': { 'topics': 40, 'ndcg_cut_10': 0.5396, 'judged_cut_10': 0.9425, 'ndcg_cut_20': 0.5079, 'judged_cut_20': 0.9050, 'map': 0.2498, 'recall_1000': 0.5766, 'judged_cut_1000': 0.2978 }, 'anserini.covid-r3.paragraph.qdel.bm25.txt': { 'topics': 40, 'ndcg_cut_10': 0.6327, 'judged_cut_10': 0.9600, 'ndcg_cut_20': 0.5793, 'judged_cut_20': 0.9162, 'map': 0.2753, 'recall_1000': 0.5923, 'judged_cut_1000': 0.2956 }, 'anserini.covid-r3.fusion1.txt': { 'topics': 40, 'ndcg_cut_10': 0.5924, 'judged_cut_10': 0.9625, 'ndcg_cut_20': 0.5563, 'judged_cut_20': 0.9362, 'map': 0.2700, 'recall_1000': 0.5956, 'judged_cut_1000': 0.3045 }, 'anserini.covid-r3.fusion2.txt': { 'topics': 40, 'ndcg_cut_10': 0.6515, 'judged_cut_10': 0.9875, 'ndcg_cut_20': 0.6200, 'judged_cut_20': 0.9675, 'map': 0.3027, 'recall_1000': 0.6194, 'judged_cut_1000': 0.3076 }, 'anserini.covid-r3.abstract.qdel.bm25+rm3Rf.txt': { 'topics': 40, 'ndcg_cut_10': 0.7459, 'judged_cut_10': 0.9875, 'ndcg_cut_20': 0.7023, 'judged_cut_20': 0.9637, 'map': 0.3190, 'recall_1000': 0.6125, 'judged_cut_1000': 0.2600 }, } evaluate_runs(round3_cumulative_qrels, cumulative_runs, expected=expected_metrics, check_md5=check_md5_flag) expected_metrics = { 'anserini.final-r3.fusion1.txt': { 'topics': 40, 'ndcg_cut_10': 0.5339, 'judged_cut_10': 0.8400, 'ndcg_cut_20': 0.4875, 'judged_cut_20': 0.7637, 'map': 0.2283, 'recall_1000': 0.6160, 'judged_cut_1000': 0.1370 }, 'anserini.final-r3.fusion1.post-processed.txt': { 'topics': 40, 'ndcg_cut_10': 0.5359, 'judged_cut_10': 0.8475, 'ndcg_cut_20': 0.4902, 'judged_cut_20': 0.7675, 'map': 0.2293, 'recall_1000': 0.6160, 'judged_cut_1000': 0.1373 }, 'anserini.final-r3.fusion2.txt': { 'topics': 40, 'ndcg_cut_10': 0.6072, 'judged_cut_10': 0.9025, 'ndcg_cut_20': 0.5599, 'judged_cut_20': 0.8337, 'map': 0.2631, 'recall_1000': 0.6441, 'judged_cut_1000': 0.1431 }, 'anserini.final-r3.fusion2.post-processed.txt': { 'topics': 40, 'ndcg_cut_10': 0.6100, 'judged_cut_10': 0.9100, 'ndcg_cut_20': 0.5617, 'judged_cut_20': 0.8375, 'map': 0.2641, 'recall_1000': 0.6441, 'judged_cut_1000': 0.1434 }, 'anserini.final-r3.rf.txt': { 'topics': 40, 'ndcg_cut_10': 0.6812, 'judged_cut_10': 0.9600, 'ndcg_cut_20': 0.6255, 'judged_cut_20': 0.8450, 'map': 0.2787, 'recall_1000': 0.6399, 'judged_cut_1000': 0.1246 }, 'anserini.final-r3.rf.post-processed.txt': { 'topics': 40, 'ndcg_cut_10': 0.6883, 'judged_cut_10': 0.9750, 'ndcg_cut_20': 0.6321, 'judged_cut_20': 0.8538, 'map': 0.2817, 'recall_1000': 0.6399, 'judged_cut_1000': 0.1250 }, } evaluate_runs(round3_qrels, final_runs, expected=expected_metrics, check_md5=check_md5_flag)
def main(): if not (os.path.isdir(indexes[0]) and os.path.isdir(indexes[1]) and os.path.isdir(indexes[2])): print('Required indexes do not exist. Please download first.') round4_cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round4-cumulative.txt' complete_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-complete.txt' round5_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round5.txt' # MD5 checksums don't match anymore, see https://github.com/castorini/anserini/issues/1669 check_md5_flag = False verify_stored_runs(stored_runs) perform_runs() perform_fusion(check_md5=check_md5_flag) prepare_final_submissions(round4_cumulative_qrels, check_md5=check_md5_flag) expected_metrics = { 'expanded.anserini.covid-r5.abstract.qq.bm25.txt': {'topics': 50, 'ndcg_cut_10': 0.4635, 'judged_cut_10': 0.5300, 'ndcg_cut_20': 0.4326, 'judged_cut_20': 0.5120, 'map': 0.1728, 'recall_1000': 0.4462, 'judged_cut_1000': 0.2059}, 'expanded.anserini.covid-r5.abstract.qdel.bm25.txt': {'topics': 50, 'ndcg_cut_10': 0.4548, 'judged_cut_10': 0.5000, 'ndcg_cut_20': 0.4260, 'judged_cut_20': 0.4880, 'map': 0.1742, 'recall_1000': 0.4527, 'judged_cut_1000': 0.2051}, 'expanded.anserini.covid-r5.full-text.qq.bm25.txt': {'topics': 50, 'ndcg_cut_10': 0.4450, 'judged_cut_10': 0.6020, 'ndcg_cut_20': 0.4208, 'judged_cut_20': 0.5820, 'map': 0.1801, 'recall_1000': 0.4473, 'judged_cut_1000': 0.2393}, 'expanded.anserini.covid-r5.full-text.qdel.bm25.txt': {'topics': 50, 'ndcg_cut_10': 0.4817, 'judged_cut_10': 0.6040, 'ndcg_cut_20': 0.4598, 'judged_cut_20': 0.5920, 'map': 0.1970, 'recall_1000': 0.4711, 'judged_cut_1000': 0.2439}, 'expanded.anserini.covid-r5.paragraph.qq.bm25.txt': {'topics': 50, 'ndcg_cut_10': 0.4904, 'judged_cut_10': 0.5820, 'ndcg_cut_20': 0.4622, 'judged_cut_20': 0.5630, 'map': 0.2107, 'recall_1000': 0.5004, 'judged_cut_1000': 0.2511}, 'expanded.anserini.covid-r5.paragraph.qdel.bm25.txt': {'topics': 50, 'ndcg_cut_10': 0.4940, 'judged_cut_10': 0.5700, 'ndcg_cut_20': 0.4542, 'judged_cut_20': 0.5420, 'map': 0.2107, 'recall_1000': 0.5070, 'judged_cut_1000': 0.2486}, 'anserini.covid-r5.fusion1.txt': {'topics': 50, 'ndcg_cut_10': 0.4908, 'judged_cut_10': 0.5880, 'ndcg_cut_20': 0.4753, 'judged_cut_20': 0.5800, 'map': 0.2017, 'recall_1000': 0.5119, 'judged_cut_1000': 0.2599}, 'expanded.anserini.covid-r5.fusion2.txt': {'topics': 50, 'ndcg_cut_10': 0.4846, 'judged_cut_10': 0.5740, 'ndcg_cut_20': 0.4565, 'judged_cut_20': 0.5400, 'map': 0.2045, 'recall_1000': 0.5218, 'judged_cut_1000': 0.2578}, 'expanded.anserini.covid-r5.abstract.qdel.bm25+rm3Rf.txt': {'topics': 50, 'ndcg_cut_10': 0.6095, 'judged_cut_10': 0.6320, 'ndcg_cut_20': 0.5693, 'judged_cut_20': 0.5990, 'map': 0.2344, 'recall_1000': 0.5280, 'judged_cut_1000': 0.2257}, } evaluate_runs(round4_cumulative_qrels, cumulative_runs, expected=expected_metrics, check_md5=check_md5_flag) expected_metrics = { 'expanded.anserini.covid-r5.abstract.qq.bm25.txt': {'topics': 50, 'ndcg_cut_10': 0.6808, 'judged_cut_10': 0.9980, 'ndcg_cut_20': 0.6375, 'judged_cut_20': 0.9600, 'map': 0.2718, 'recall_1000': 0.4550, 'judged_cut_1000': 0.3845}, 'expanded.anserini.covid-r5.abstract.qdel.bm25.txt': {'topics': 50, 'ndcg_cut_10': 0.6939, 'judged_cut_10': 0.9920, 'ndcg_cut_20': 0.6524, 'judged_cut_20': 0.9610, 'map': 0.2752, 'recall_1000': 0.4595, 'judged_cut_1000': 0.3825}, 'expanded.anserini.covid-r5.full-text.qq.bm25.txt': {'topics': 50, 'ndcg_cut_10': 0.6300, 'judged_cut_10': 0.9680, 'ndcg_cut_20': 0.5843, 'judged_cut_20': 0.9260, 'map': 0.2475, 'recall_1000': 0.4201, 'judged_cut_1000': 0.3921}, 'expanded.anserini.covid-r5.full-text.qdel.bm25.txt': {'topics': 50, 'ndcg_cut_10': 0.6611, 'judged_cut_10': 0.9800, 'ndcg_cut_20': 0.6360, 'judged_cut_20': 0.9610, 'map': 0.2746, 'recall_1000': 0.4496, 'judged_cut_1000': 0.4073}, 'expanded.anserini.covid-r5.paragraph.qq.bm25.txt': {'topics': 50, 'ndcg_cut_10': 0.6827, 'judged_cut_10': 0.9800, 'ndcg_cut_20': 0.6477, 'judged_cut_20': 0.9670, 'map': 0.3080, 'recall_1000': 0.4936, 'judged_cut_1000': 0.4360}, 'expanded.anserini.covid-r5.paragraph.qdel.bm25.txt': {'topics': 50, 'ndcg_cut_10': 0.7067, 'judged_cut_10': 0.9960, 'ndcg_cut_20': 0.6614, 'judged_cut_20': 0.9760, 'map': 0.3127, 'recall_1000': 0.4985, 'judged_cut_1000': 0.4328}, 'expanded.anserini.covid-r5.fusion1.txt': {'topics': 50, 'ndcg_cut_10': 0.7072, 'judged_cut_10': 1.0000, 'ndcg_cut_20': 0.6731, 'judged_cut_20': 0.9920, 'map': 0.2964, 'recall_1000': 0.5063, 'judged_cut_1000': 0.4528}, 'expanded.anserini.covid-r5.fusion2.txt': {'topics': 50, 'ndcg_cut_10': 0.7131, 'judged_cut_10': 1.0000, 'ndcg_cut_20': 0.6755, 'judged_cut_20': 0.9910, 'map': 0.3036, 'recall_1000': 0.5166, 'judged_cut_1000': 0.4518}, 'expanded.anserini.covid-r5.abstract.qdel.bm25+rm3Rf.txt': {'topics': 50, 'ndcg_cut_10': 0.8160, 'judged_cut_10': 1.0000, 'ndcg_cut_20': 0.7787, 'judged_cut_20': 0.9960, 'map': 0.3421, 'recall_1000': 0.5249, 'judged_cut_1000': 0.4107}, } evaluate_runs(complete_qrels, cumulative_runs, expected=expected_metrics, check_md5=check_md5_flag) expected_metrics = { 'expanded.anserini.final-r5.fusion1.txt': {'topics': 50, 'ndcg_cut_10': 0.5789, 'judged_cut_10': 0.9520, 'ndcg_cut_20': 0.5374, 'judged_cut_20': 0.8530, 'map': 0.2236, 'recall_1000': 0.5798, 'judged_cut_1000': 0.2132}, 'expanded.anserini.final-r5.fusion1.post-processed.txt': {'topics': 50, 'ndcg_cut_10': 0.5817, 'judged_cut_10': 0.9580, 'ndcg_cut_20': 0.5414, 'judged_cut_20': 0.8610, 'map': 0.2246, 'recall_1000': 0.5798, 'judged_cut_1000': 0.2135}, 'expanded.anserini.final-r5.fusion2.txt': {'topics': 50, 'ndcg_cut_10': 0.5801, 'judged_cut_10': 0.9620, 'ndcg_cut_20': 0.5393, 'judged_cut_20': 0.8650, 'map': 0.2310, 'recall_1000': 0.5861, 'judged_cut_1000': 0.2135}, 'expanded.anserini.final-r5.fusion2.post-processed.txt': {'topics': 50, 'ndcg_cut_10': 0.5825, 'judged_cut_10': 0.9680, 'ndcg_cut_20': 0.5436, 'judged_cut_20': 0.8700, 'map': 0.2319, 'recall_1000': 0.5861, 'judged_cut_1000': 0.2138}, 'expanded.anserini.final-r5.rf.txt': {'topics': 50, 'ndcg_cut_10': 0.6628, 'judged_cut_10': 0.9460, 'ndcg_cut_20': 0.6040, 'judged_cut_20': 0.8370, 'map': 0.2410, 'recall_1000': 0.6039, 'judged_cut_1000': 0.1995}, 'expanded.anserini.final-r5.rf.post-processed.txt': {'topics': 50, 'ndcg_cut_10': 0.6757, 'judged_cut_10': 0.9620, 'ndcg_cut_20': 0.6124, 'judged_cut_20': 0.8470, 'map': 0.2433, 'recall_1000': 0.6039, 'judged_cut_1000': 0.1998}, } evaluate_runs(round5_qrels, final_runs, expected=expected_metrics, check_md5=check_md5_flag)