def test_robust05_rrf_rm3(self): pyserini_topics = 'robust05' lr_cmd = f'{self.pyserini_search_cmd} --index {self.robust05_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/robust05_lr_rm3.txt ' \ + f'--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.3 --rm3' status = os.system(lr_cmd) self.assertEqual(status, 0) svm_cmd = f'{self.pyserini_search_cmd} --index {self.robust05_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/robust05_svm_rm3.txt ' \ + f'--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.3 --rm3' status = os.system(svm_cmd) self.assertEqual(status, 0) rrf_cmd = f'{self.pyserini_fusion_cmd} ' \ + f'--runs {self.tmp}/robust05_lr_rm3.txt {self.tmp}/robust05_svm_rm3.txt ' \ + f'--output {self.tmp}/robust05_rrf_rm3.txt --resort' status = os.system(rrf_cmd) self.assertEqual(status, 0) score_cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust05.txt \ {self.tmp}/robust05_rrf_rm3.txt' status = os.system(score_cmd) stdout, stderr = run_command(score_cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2788, delta=0.0001)
def run(self, runtag: str, pyserini_extras: str, actualscore: float, tokenizer=None): print('-------------------------') print(f'Running {runtag}:') print('-------------------------') pyserini_output = f'verify.pyserini.{runtag}.txt' pyserini_cmd = f'{self.pyserini_base_cmd} --index {self.index_path} ' \ + f'--topics {self.pyserini_topics} --output {pyserini_output} {pyserini_extras}' if tokenizer != None: pyserini_cmd = pyserini_cmd + f' --tokenizer {tokenizer}' status = os.system(pyserini_cmd) if not status == 0: return False eval_cmd = f'{self.eval_base_cmd} {self.qrels} {pyserini_output}' status = os.system(eval_cmd) if not status == 0: return False stdout, stderr = run_command(eval_cmd) score = parse_score(stdout, "map") if actualscore != score: self._cleanup([pyserini_output]) return False self._cleanup([pyserini_output]) return True
def test_core18_rrf(self): pyserini_topics = 'core18' lr_cmd = f'{self.pyserini_search_cmd} --index {self.core18_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/core18_lr.txt ' \ + f'--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.6' status = os.system(lr_cmd) self.assertEqual(status, 0) svm_cmd = f'{self.pyserini_search_cmd} --index {self.core18_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/core18_svm.txt ' \ + f'--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.6' status = os.system(svm_cmd) self.assertEqual(status, 0) rrf_cmd = f'python {self.anserini_root}/src/main/python/fusion.py ' \ + f'--runs {self.tmp}/core18_lr.txt {self.tmp}/core18_svm.txt ' \ + f'--out {self.tmp}/core18_rrf.txt' status = os.system(rrf_cmd) self.assertEqual(status, 0) score_cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core18.txt \ {self.tmp}/core18_rrf.txt' status = os.system(score_cmd) stdout, stderr = run_command(score_cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2881, delta=0.0001)
def test_dpr_nq_test_bf_bm25_hybrid_otf(self): output_file = 'test_run.dpr.nq-test.multi.bf.otf.bm25.trec' retrieval_file = 'test_run.dpr.nq-test.multi.bf.otf.bm25.json' self.temp_files.extend([output_file, retrieval_file]) cmd1 = f'python -m pyserini.hsearch dense --index wikipedia-dpr-multi-bf \ --encoder facebook/dpr-question_encoder-multiset-base \ sparse --index wikipedia-dpr \ fusion --alpha 1.3 \ run --topics dpr-nq-test \ --batch-size {self.batch_size} --threads {self.threads} \ --output {output_file} ' cmd2 = f'python -m pyserini.eval.convert_trec_run_to_dpr_retrieval_run --topics dpr-nq-test \ --index wikipedia-dpr \ --input {output_file} \ --output {retrieval_file}' cmd3 = f'python -m pyserini.eval.evaluate_dpr_retrieval --retrieval {retrieval_file} --topk 20' status1 = os.system(cmd1) status2 = os.system(cmd2) stdout, stderr = run_command(cmd3) score = parse_score(stdout, "Top20") self.assertEqual(status1, 0) self.assertEqual(status2, 0) self.assertAlmostEqual(score, 0.8260, places=4)
def test_cross_validation(self): pyserini_topics = 'core17' os.mkdir(f'{self.tmp}/core17') for alpha in [x / 10.0 for x in range(0, 11)]: run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core17_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/core17/core17_lr_A{alpha}_bm25.txt ' \ + f'--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha {alpha}' status = os.system(run_file_cmd) self.assertEqual(status, 0) os.system( f'python {self.pyserini_root}/scripts/classifier_prf/cross_validate.py \ --anserini {self.anserini_root} --run_file {self.tmp} --pyserini {self.pyserini_root} \ --collection core17 --output {self.tmp}/core17_lr.txt --classifier lr ' ) cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core17.txt \ {self.tmp}/core17_lr.txt' status = os.system(cmd) stdout, stderr = run_command(cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2462, delta=0.0001)
def test_sum_aggregation(self): os.system('python -m pyserini.search.lucene.irst \ --qrels tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt \ --tran_path irst_test/ibm_model_1_bert_tok_20211117/ \ --query_path irst_test/queries.dev.small.json \ --index msmarco-passage-ltr \ --output irst_test/regression_test_sum.txt \ --alpha 0.1 ') score_cmd = f'{self.pyserini_root}/tools/eval/trec_eval.9.0.4/trec_eval \ -c -M1000 -m map -m ndcg_cut.20 {self.qrels_path} irst_test/regression_test_sum.txt' status = os.system(score_cmd) stdout, stderr = run_command(score_cmd) map_score = parse_score(stdout, "map") ndcg_score = parse_score(stdout, "ndcg") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertEqual(map_score, 0.2294) self.assertEqual(ndcg_score, 0.2997)
def test_msmarco_passage_tct_colbert_hnsw_otf(self): output_file = 'test_run.msmarco-passage.tct_colbert.hnsw-otf.tsv' self.temp_files.append(output_file) cmd1 = f'python -m pyserini.dsearch --topics msmarco-passage-dev-subset \ --index msmarco-passage-tct_colbert-hnsw \ --encoder castorini/tct_colbert-msmarco \ --output {output_file} \ --output-format msmarco ' cmd2 = f'python -m pyserini.eval.msmarco_passage_eval msmarco-passage-dev-subset {output_file}' status = os.system(cmd1) stdout, stderr = run_command(cmd2) score = parse_score(stdout, "MRR @10") self.assertEqual(status, 0) self.assertAlmostEqual(score, 0.3345, places=4)
def test_msmarco_passage_sbert_bf_otf(self): output_file = 'test_run.msmarco-passage.sbert.bf.otf.tsv' self.temp_files.append(output_file) cmd1 = f'python -m pyserini.search.faiss --topics msmarco-passage-dev-subset \ --index msmarco-passage-sbert-bf \ --encoder sentence-transformers/msmarco-distilbert-base-v3 \ --batch-size {self.batch_size} \ --threads {self.threads} \ --output {output_file} \ --output-format msmarco' cmd2 = f'python -m pyserini.eval.msmarco_passage_eval msmarco-passage-dev-subset {output_file}' status = os.system(cmd1) stdout, stderr = run_command(cmd2) score = parse_score(stdout, "MRR @10") self.assertEqual(status, 0) self.assertAlmostEqual(score, 0.3314, delta=0.0001)
def test_msmarco_passage_tct_colbert_hnsw(self): output_file = 'test_run.msmarco-passage.tct_colbert.hnsw.tsv' self.temp_files.append(output_file) cmd1 = f'python -m pyserini.dsearch --topics msmarco-passage-dev-subset \ --index msmarco-passage-tct_colbert-hnsw \ --output {output_file} \ --msmarco ' cmd2 = f'python tools/scripts/msmarco/msmarco_passage_eval.py \ tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt \ {output_file}' status = os.system(cmd1) stdout, stderr = run_command(cmd2) score = parse_score(stdout, "MRR @10") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.3345, places=4)
def test_msmarco_passage_tct_colbert_v2_hnp_otf(self): output_file = 'test_run.msmarco-passage.tct_colbert-v2-hnp.bf-otf.tsv' self.temp_files.append(output_file) cmd1 = f'python -m pyserini.search.faiss --topics msmarco-passage-dev-subset \ --index msmarco-passage-tct_colbert-v2-hnp-bf \ --encoder castorini/tct_colbert-v2-hnp-msmarco \ --batch-size {self.batch_size} \ --threads {self.threads} \ --output {output_file} \ --output-format msmarco' cmd2 = f'python -m pyserini.eval.msmarco_passage_eval msmarco-passage-dev-subset {output_file}' status = os.system(cmd1) stdout, stderr = run_command(cmd2) score = parse_score(stdout, "MRR @10") self.assertEqual(status, 0) self.assertAlmostEqual(score, 0.3585, delta=0.0001)
def test_msmarco_passage_ance_bf_otf(self): output_file = 'test_run.msmarco-passage.ance.bf.otf.tsv' self.temp_files.append(output_file) cmd1 = f'python -m pyserini.dsearch --topics msmarco-passage-dev-subset \ --index msmarco-passage-ance-bf \ --encoder castorini/ance-msmarco-passage \ --batch-size {self.batch_size} \ --threads {self.threads} \ --output {output_file} \ --output-format msmarco' cmd2 = f'python -m pyserini.eval.msmarco_passage_eval msmarco-passage-dev-subset {output_file}' status = os.system(cmd1) stdout, stderr = run_command(cmd2) score = parse_score(stdout, "MRR @10") self.assertEqual(status, 0) self.assertAlmostEqual(score, 0.3302, delta=0.0001)
def test_msmarco_passage_distilbert_kd_bf_otf(self): output_file = 'test_run.msmarco-passage.distilbert-dot-margin_mse-T2.bf.tsv' self.temp_files.append(output_file) cmd1 = f'python -m pyserini.dsearch --topics msmarco-passage-dev-subset \ --index msmarco-passage-distilbert-dot-margin_mse-T2-bf \ --encoder sebastian-hofstaetter/distilbert-dot-margin_mse-T2-msmarco \ --batch-size {self.batch_size} \ --threads {self.threads} \ --output {output_file} \ --output-format msmarco' cmd2 = f'python -m pyserini.eval.msmarco_passage_eval msmarco-passage-dev-subset {output_file}' status = os.system(cmd1) stdout, stderr = run_command(cmd2) score = parse_score(stdout, "MRR @10") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.3251, delta=0.0001)
def test_msmarco_passage_tct_colbert_bf_otf(self): output_file = 'test_run.msmarco-passage.tct_colbert.bf-otf.tsv' self.temp_files.append(output_file) cmd1 = f'python -m pyserini.dsearch --topics msmarco-passage-dev-subset \ --index msmarco-passage-tct_colbert-bf \ --encoder castorini/tct_colbert-msmarco \ --batch-size {self.batch_size} \ --threads {self.threads} \ --output {output_file} \ --output-format msmarco' cmd2 = f'python -m pyserini.eval.msmarco_passage_eval msmarco-passage-dev-subset {output_file}' status = os.system(cmd1) stdout, stderr = run_command(cmd2) score = parse_score(stdout, "MRR @10") self.assertEqual(status, 0) # We get a small difference in scores on macOS vs. Linux, better way to check: self.assertAlmostEqual(score, 0.3350, delta=0.0001)
def test_msmarco_passage_tct_colbert_bf_d2q_hybrid_otf(self): output_file = 'test_run.msmarco-passage.tct_colbert.bf-otf.doc2queryT5.tsv' self.temp_files.append(output_file) cmd1 = f'python -m pyserini.hsearch dense --index msmarco-passage-tct_colbert-bf \ --encoder castorini/tct_colbert-msmarco \ sparse --index msmarco-passage-expanded \ fusion --alpha 0.22 \ run --topics msmarco-passage-dev-subset \ --output {output_file} \ --batch-size {self.batch_size} --threads {self.threads} \ --output-format msmarco' cmd2 = f'python -m pyserini.eval.msmarco_passage_eval msmarco-passage-dev-subset {output_file}' status = os.system(cmd1) stdout, stderr = run_command(cmd2) score = parse_score(stdout, "MRR @10") self.assertEqual(status, 0) self.assertAlmostEqual(score, 0.3647, places=4)
def test_msmarco_passage_tilde_otf(self): output_file = 'test_run.msmarco-passage.tilde.otf.tsv' self.temp_files.append(output_file) cmd1 = f'python -m pyserini.search.lucene --topics msmarco-passage-dev-subset \ --encoder ielab/unicoil-tilde200-msmarco-passage \ --index msmarco-passage-unicoil-tilde \ --output {output_file} \ --impact \ --hits 1000 --batch {self.batch_size} --threads {self.threads} \ --output-format msmarco' cmd2 = f'python -m pyserini.eval.msmarco_passage_eval msmarco-passage-dev-subset {output_file}' status = os.system(cmd1) stdout, stderr = run_command(cmd2) score = parse_score(stdout, "MRR @10") self.assertEqual(status, 0) self.assertAlmostEqual(score, 0.3495, delta=0.0001)
def test_core17_lr(self): os.system( f'python {self.pyserini_root}/scripts/classifier_prf/cross_validate.py \ --anserini {self.anserini_root} --run_file {self.tmp} --pyserini {self.pyserini_root} \ --collection core17 --output {self.tmp}/core17_lr.txt --classifier lr ' ) cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core17.txt {self.tmp}/core17_lr.txt' status = os.system(cmd) stdout, stderr = run_command(cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2462, delta=0.0001)
def test_robust05_rrf_rm3(self): os.system( f'python {self.pyserini_root}/scripts/classifier_prf/cross_validate.py \ --anserini {self.anserini_root} --run_file {self.tmp} --pyserini {self.pyserini_root} \ --collection robust05 --output {self.tmp}/robust05_rrf_rm3.txt --classifier rrf -rm3' ) cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust05.txt \ {self.tmp}/robust05_rrf_rm3.txt' status = os.system(cmd) stdout, stderr = run_command(cmd) score = parse_score(stdout, 'map') self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2870, delta=0.0001)
def test_msmarco_doc_tct_colbert_bf_d2q_hybrid_otf(self): output_file = 'test_run.msmarco-doc.tct_colbert.bf-otf.doc2queryT5.tsv' self.temp_files.append(output_file) cmd1 = f'python -m pyserini.hsearch dense --index msmarco-doc-tct_colbert-bf \ --encoder castorini/tct_colbert-msmarco \ sparse --index msmarco-doc-expanded-per-passage \ fusion --alpha 0.32 \ run --topics msmarco-doc-dev \ --output {output_file} \ --hits 1000 --max-passage --max-passage-hits 100 \ --batch-size {self.batch_size} --threads {self.threads} \ --output-format msmarco' cmd2 = f'python -m pyserini.eval.msmarco_doc_eval --judgments msmarco-doc-dev --run {output_file}' status = os.system(cmd1) stdout, stderr = run_command(cmd2) score = parse_score(stdout, "MRR @100") self.assertEqual(status, 0) self.assertAlmostEqual(score, 0.3784, places=4)
def test_msmarco_passage_ance_avg_prf_otf(self): output_file = 'test_run.dl2019.ance.avg-prf.otf.trec' self.temp_files.append(output_file) cmd1 = f'python -m pyserini.search.faiss --topics dl19-passage \ --index msmarco-passage-ance-bf \ --encoder castorini/ance-msmarco-passage \ --batch-size {self.batch_size} \ --threads {self.threads} \ --output {output_file} \ --prf-depth 3 \ --prf-method avg' cmd2 = f'python -m pyserini.eval.trec_eval -l 2 -m map dl19-passage {output_file}' status = os.system(cmd1) stdout, stderr = run_command(cmd2) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertAlmostEqual(score, 0.4247, delta=0.0001)
def test_msmarco_passage_tct_colbert_bf_bm25_hybrid(self): output_file = 'test_run.msmarco-passage.tct_colbert.bf.bm25.tsv' self.temp_files.append(output_file) cmd1 = f'python -m pyserini.hsearch dense --index msmarco-passage-tct_colbert-bf \ sparse --index msmarco-passage \ fusion --alpha 0.12 \ run --topics msmarco-passage-dev-subset \ --output {output_file} \ --batch-size {self.batch_size} --threads {self.threads} \ --msmarco' cmd2 = f'python -m pyserini.eval.msmarco_passage_eval msmarco-passage-dev-subset {output_file}' status = os.system(cmd1) stdout, stderr = run_command(cmd2) score = parse_score(stdout, "MRR @10") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.3529, places=4)
def test_msmarco_doc_unicoil_d2q_otf(self): output_file = 'test_run.msmarco-doc.unicoil-d2q.otf.tsv' self.temp_files.append(output_file) cmd1 = f'python -m pyserini.search.lucene --topics msmarco-doc-dev \ --encoder castorini/unicoil-msmarco-passage \ --index msmarco-doc-per-passage-unicoil-d2q \ --output {output_file} \ --impact \ --hits 1000 --batch {self.batch_size} --threads {self.threads} \ --max-passage --max-passage-hits 100 \ --output-format msmarco' cmd2 = f'python -m pyserini.eval.msmarco_doc_eval --judgments msmarco-doc-dev --run {output_file}' status = os.system(cmd1) stdout, stderr = run_command(cmd2) score = parse_score(stdout, "MRR @10") self.assertEqual(status, 0) self.assertAlmostEqual(score, 0.3531, delta=0.0001)
def test_msmarco_v2_passage_unicoil_noexp_otf(self): output_file = 'test_run.msmarco-v2-passage.unicoil-noexp.0shot.otf.tsv' self.temp_files.append(output_file) cmd1 = f'python -m pyserini.search.lucene --topics msmarco-v2-passage-dev \ --encoder castorini/unicoil-noexp-msmarco-passage \ --index msmarco-v2-passage-unicoil-noexp-0shot \ --output {output_file} \ --impact \ --hits 1000 \ --batch {self.batch_size} \ --threads {self.threads} \ --min-idf 1' cmd2 = f'python -m pyserini.eval.trec_eval -c -M 100 -m map -m recip_rank msmarco-v2-passage-dev {output_file}' status = os.system(cmd1) stdout, stderr = run_command(cmd2) score = parse_score(stdout, "recip_rank") self.assertEqual(status, 0) self.assertAlmostEqual(score, 0.1314, delta=0.0001)
def test_msmarco_passage_tct_colbert_bf_otf(self): output_file = 'test_run.msmarco-passage.tct_colbert.bf-otf.tsv' self.temp_files.append(output_file) cmd1 = f'python -m pyserini.dsearch --topics msmarco-passage-dev-subset \ --index msmarco-passage-tct_colbert-bf \ --encoder castorini/tct_colbert-msmarco \ --batch-size {self.batch_size} \ --threads {self.threads} \ --output {output_file} \ --msmarco' cmd2 = f'python tools/scripts/msmarco/msmarco_passage_eval.py \ tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt \ {output_file}' status = os.system(cmd1) stdout, stderr = run_command(cmd2) score = parse_score(stdout, "MRR @10") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.3350, places=4)
def test_msmarco_passage_tct_colbert_bf_d2q_hybrid(self): output_file = 'test_run.msmarco-passage.tct_colbert.bf.doc2queryT5.tsv' self.temp_files.append(output_file) cmd1 = f'python -m pyserini.hsearch dense --index msmarco-passage-tct_colbert-bf \ sparse --index msmarco-passage-expanded \ fusion --alpha 0.22 \ run --topics msmarco-passage-dev-subset \ --output {output_file} \ --batch-size {self.batch_size} --threads {self.threads} \ --msmarco' cmd2 = f'python tools/scripts/msmarco/msmarco_passage_eval.py \ tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt \ {output_file}' status = os.system(cmd1) stdout, stderr = run_command(cmd2) score = parse_score(stdout, "MRR @10") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.3647, places=4)
def test_msmarco_doc_tct_colbert_bf_otf(self): output_file = 'test_run.msmarco-doc.passage.tct_colbert-otf.txt' self.temp_files.append(output_file) cmd1 = f'python -m pyserini.dsearch --topics msmarco-doc-dev \ --index msmarco-doc-tct_colbert-bf \ --encoder castorini/tct_colbert-msmarco \ --output {output_file} \ --hits 1000 \ --max-passage \ --max-passage-hits 100 \ --output-format msmarco \ --batch-size {self.batch_size} \ --threads {self.threads}' cmd2 = f'python -m pyserini.eval.msmarco_doc_eval --judgments msmarco-doc-dev --run {output_file}' status = os.system(cmd1) stdout, stderr = run_command(cmd2) score = parse_score(stdout, "MRR @100") self.assertEqual(status, 0) self.assertAlmostEqual(score, 0.3323, places=4)
def test_msmarco_doc_ance_bf_otf(self): output_file = 'test_run.msmarco-doc.passage.ance-maxp.otf.txt ' self.temp_files.append(output_file) cmd1 = f'python -m pyserini.dsearch --topics msmarco-doc-dev \ --index msmarco-doc-ance-maxp-bf \ --encoder castorini/ance-msmarco-doc-maxp \ --output {output_file}\ --hits 1000 \ --max-passage \ --max-passage-hits 100 \ --output-format msmarco \ --batch-size {self.batch_size} \ --threads {self.threads}' cmd2 = f'python -m pyserini.eval.msmarco_doc_eval --judgments msmarco-doc-dev --run {output_file}' status = os.system(cmd1) stdout, stderr = run_command(cmd2) score = parse_score(stdout, "MRR @100") self.assertEqual(status, 0) # We get a small difference, 0.3794 on macOS. self.assertAlmostEqual(score, 0.3797, delta=0.0003)
def test_msmarco_doc_tct_colbert_bf_bm25_hybrid(self): output_file = 'test_run.msmarco-doc.tct_colbert.bf.bm25.tsv' self.temp_files.append(output_file) cmd1 = f'python -m pyserini.hsearch dense --index msmarco-doc-tct_colbert-bf \ --encoder castorini/tct_colbert-msmarco \ sparse --index msmarco-doc-per-passage \ fusion --alpha 0.25 \ run --topics msmarco-doc-dev \ --output {output_file} \ --hits 1000 --max-passage --max-passage-hits 100 \ --batch-size {self.batch_size} --threads {self.threads} \ --msmarco' cmd2 = f'python tools/scripts/msmarco/msmarco_doc_eval.py \ --judgments tools/topics-and-qrels/qrels.msmarco-doc.dev.txt \ --run {output_file}' status = os.system(cmd1) stdout, stderr = run_command(cmd2) score = parse_score(stdout, "MRR @100") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.3701, places=4)
def test_robust05_avg(self): pyserini_topics = 'robust05' run_file_cmd = f'{self.pyserini_search_cmd} --index {self.robust05_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/robust05_avg.txt ' \ + f'--prcl lr svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.8' status = os.system(run_file_cmd) self.assertEqual(status, 0) score_cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust05.txt \ {self.tmp}/robust05_avg.txt' status = os.system(score_cmd) stdout, stderr = run_command(score_cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2485, delta=0.0001)
def test_dpr_trivia_test_bf(self): output_file = 'test_run.dpr.trivia-test.multi.bf.trec' retrieval_file = 'test_run.dpr.trivia-test.multi.bf.json' self.temp_files.extend([output_file, retrieval_file]) cmd1 = f'python -m pyserini.dsearch --topics dpr-trivia-test \ --index wikipedia-dpr-multi-bf \ --output {output_file} \ --batch-size {self.batch_size} --threads {self.threads}' cmd2 = f'python scripts/dpr/convert_trec_run_to_retrieval_json.py --topics dpr-trivia-test \ --index wikipedia-dpr \ --input {output_file} \ --output {retrieval_file}' cmd3 = f'python tools/scripts/dpr/evaluate_retrieval.py --retrieval {retrieval_file} --topk 20' status1 = os.system(cmd1) status2 = os.system(cmd2) stdout, stderr = run_command(cmd3) score = parse_score(stdout, "Top20") self.assertEqual(status1, 0) self.assertEqual(status2, 0) self.assertAlmostEqual(score, 0.7887, places=4)
def test_msmarco_passage_tct_colbert_bf(self): output_file = 'test_run.msmarco-passage.tct_colbert.bf.tsv' self.temp_files.append(output_file) cmd1 = f'python -m pyserini.dsearch --topics msmarco-passage-dev-subset \ --index msmarco-passage-tct_colbert-bf \ --batch-size {self.batch_size} \ --threads {self.threads} \ --output {output_file} \ --msmarco' cmd2 = f'python tools/scripts/msmarco/msmarco_passage_eval.py \ tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt \ {output_file}' status = os.system(cmd1) stdout, stderr = run_command(cmd2) score = parse_score(stdout, "MRR @10") self.assertEqual(status, 0) self.assertEqual(stderr, '') # We get a small difference in scores on macOS (vs. Linux): if platform.system() == 'Darwin': self.assertAlmostEqual(score, 0.3349, places=4) else: self.assertAlmostEqual(score, 0.3350, places=4)