Example #1
0
    def test_msmarco_doc_ance_bf_otf(self):
        output_file = 'test_run.msmarco-doc.passage.ance-maxp.otf.txt '
        self.temp_files.append(output_file)
        cmd1 = f'python -m pyserini.dsearch --topics msmarco-doc-dev \
                             --index msmarco-doc-ance-maxp-bf \
                             --encoder castorini/ance-msmarco-doc-maxp \
                             --output {output_file}\
                             --hits 1000 \
                             --max-passage \
                             --max-passage-hits 100 \
                             --msmarco \
                             --batch-size {self.batch_size} \
                             --threads {self.threads}'

        cmd2 = f'python -m pyserini.eval.msmarco_doc_eval --judgments msmarco-doc-dev --run {output_file}'
        status = os.system(cmd1)
        stdout, stderr = run_command(cmd2)
        score = parse_score(stdout, "MRR @100")
        self.assertEqual(status, 0)
        # We get a small difference, 0.3794 on macOS.
        self.assertAlmostEqual(score, 0.3797, delta=0.0003)
Example #2
0
    def test_core17_lr(self):
        pyserini_topics = 'core17'

        run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core17_index_path} ' \
                       + f'--topics {pyserini_topics} --output {self.tmp}/core17_lr.txt ' \
                       + f'--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.7'

        status = os.system(run_file_cmd)
        self.assertEqual(status, 0)

        score_cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \
                {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core17.txt \
                {self.tmp}/core17_lr.txt'

        status = os.system(score_cmd)
        stdout, stderr = run_command(score_cmd)
        score = parse_score(stdout, "map")

        self.assertEqual(status, 0)
        self.assertEqual(stderr, '')
        self.assertAlmostEqual(score, 0.2473, delta=0.0001)
Example #3
0
    def test_msmarco_doc_tct_colbert_bf_otf(self):
        output_file = 'test_run.msmarco-doc.passage.tct_colbert.txt'
        self.temp_files.append(output_file)
        cmd1 = f'python -m pyserini.dsearch --topics msmarco-doc-dev \
                             --index msmarco-doc-tct_colbert-bf \
                             --encoder castorini/tct_colbert-msmarco \
                             --output {output_file} \
                             --hits 1000 \
                             --max-passage \
                             --max-passage-hits 100 \
                             --msmarco \
                             --batch-size {self.batch_size} \
                             --threads {self.threads}'

        cmd2 = f'python -m pyserini.eval.msmarco_doc_eval --judgments msmarco-doc-dev --run {output_file}'
        status = os.system(cmd1)
        stdout, stderr = run_command(cmd2)
        score = parse_score(stdout, "MRR @100")
        self.assertEqual(status, 0)
        self.assertEqual(stderr, '')
        self.assertAlmostEqual(score, 0.3323, places=4)
    def test_sum_aggregation(self):
        os.system('python -m pyserini.search.lucene.irst \
            --qrels tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt \
            --tran_path irst_test/ibm_model_1_bert_tok_20211117/ \
            --query_path irst_test/queries.dev.small.json \
            --index msmarco-passage-ltr \
            --output irst_test/regression_test_sum.txt \
            --alpha 0.1 ')

        score_cmd = f'{self.pyserini_root}/tools/eval/trec_eval.9.0.4/trec_eval \
                -c -M1000 -m map -m ndcg_cut.20 {self.qrels_path} irst_test/regression_test_sum.txt'

        status = os.system(score_cmd)
        stdout, stderr = run_command(score_cmd)
        map_score = parse_score(stdout, "map")
        ndcg_score = parse_score(stdout, "ndcg")

        self.assertEqual(status, 0)
        self.assertEqual(stderr, '')
        self.assertEqual(map_score, 0.2294)
        self.assertEqual(ndcg_score, 0.2997)
Example #5
0
    def test_msmarco_v2_doc_unicoil_noexp_otf(self):
        output_file = 'test_run.msmarco-v2-doc.unicoil-noexp.0shot.otf.tsv'
        self.temp_files.append(output_file)
        cmd1 = f'python -m pyserini.search.lucene --topics msmarco-v2-doc-dev \
                          --encoder castorini/unicoil-noexp-msmarco-passage \
                          --index msmarco-v2-doc-per-passage-unicoil-noexp-0shot  \
                          --output {output_file} \
                          --impact \
                          --hits 10000 \
                          --batch {self.batch_size} \
                          --threads {self.threads} \
                          --max-passage-hits 1000 \
                          --max-passage \
                          --min-idf 1'

        cmd2 = f'python -m pyserini.eval.trec_eval -c -M 100 -m map -m recip_rank msmarco-v2-doc-dev {output_file}'
        status = os.system(cmd1)
        stdout, stderr = run_command(cmd2)
        score = parse_score(stdout, "recip_rank")
        self.assertEqual(status, 0)
        self.assertAlmostEqual(score, 0.2032, delta=0.0001)
    def test_dpr_trivia_test_bf(self):
        output_file = 'test_run.dpr.trivia-test.multi.bf.trec'
        retrieval_file = 'test_run.dpr.trivia-test.multi.bf.json'
        self.temp_files.extend([output_file, retrieval_file])
        cmd1 = f'python -m pyserini.dsearch --topics dpr-trivia-test \
                             --index wikipedia-dpr-multi-bf \
                             --output {output_file} \
                             --batch-size {self.batch_size} --threads {self.threads}'

        cmd2 = f'python scripts/dpr/convert_trec_run_to_retrieval_json.py --topics dpr-trivia-test \
                                                           --index wikipedia-dpr \
                                                           --input {output_file} \
                                                           --output {retrieval_file}'

        cmd3 = f'python tools/scripts/dpr/evaluate_retrieval.py --retrieval {retrieval_file} --topk 20'
        status1 = os.system(cmd1)
        status2 = os.system(cmd2)
        stdout, stderr = run_command(cmd3)
        score = parse_score(stdout, "Top20")
        self.assertEqual(status1, 0)
        self.assertEqual(status2, 0)
        self.assertAlmostEqual(score, 0.7887, places=4)
Example #7
0
 def test_msmarco_passage_tct_colbert_bf(self):
     output_file = 'test_run.msmarco-passage.tct_colbert.bf.tsv'
     self.temp_files.append(output_file)
     cmd1 = f'python -m pyserini.dsearch --topics msmarco-passage-dev-subset \
                          --index msmarco-passage-tct_colbert-bf \
                          --batch-size {self.batch_size} \
                          --threads {self.threads} \
                          --output {output_file} \
                          --msmarco'
     cmd2 = f'python tools/scripts/msmarco/msmarco_passage_eval.py \
                 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt \
                 {output_file}'
     status = os.system(cmd1)
     stdout, stderr = run_command(cmd2)
     score = parse_score(stdout, "MRR @10")
     self.assertEqual(status, 0)
     self.assertEqual(stderr, '')
     # We get a small difference in scores on macOS (vs. Linux):
     if platform.system() == 'Darwin':
         self.assertAlmostEqual(score, 0.3349, places=4)
     else:
         self.assertAlmostEqual(score, 0.3350, places=4)
Example #8
0
    def test_nq_test_ance_bf_otf(self):
        output_file = 'test_run.ance.nq-test.multi.bf.otf.trec'
        retrieval_file = 'test_run.ance.nq-test.multi.bf.otf.json'
        self.temp_files.extend([output_file, retrieval_file])
        cmd1 = f'python -m pyserini.dsearch --topics dpr-nq-test \
                             --index wikipedia-ance-multi-bf \
                             --encoder castorini/ance-dpr-question-multi \
                             --output {output_file} \
                             --batch-size {self.batch_size} --threads {self.threads}'

        cmd2 = f'python -m pyserini.eval.convert_trec_run_to_dpr_retrieval_run --topics dpr-nq-test \
                                                           --index wikipedia-dpr \
                                                           --input {output_file} \
                                                           --output {retrieval_file}'

        cmd3 = f'python -m pyserini.eval.evaluate_dpr_retrieval --retrieval {retrieval_file} --topk 20'
        status1 = os.system(cmd1)
        status2 = os.system(cmd2)
        stdout, stderr = run_command(cmd3)
        score = parse_score(stdout, "Top20")
        self.assertEqual(status1, 0)
        self.assertEqual(status2, 0)
        self.assertAlmostEqual(score, 0.8224, places=4)
Example #9
0
    def test_msmarco_passage_ance_rocchio_prf_otf(self):
        output_file = 'test_run.dl2019.ance.rocchio-prf.otf.trec'
        self.temp_files.append(output_file)
        cmd1 = f'python -m pyserini.search.faiss --topics dl19-passage \
                                     --index msmarco-passage-ance-bf \
                                     --encoder castorini/ance-msmarco-passage \
                                     --batch-size {self.batch_size} \
                                     --threads {self.threads} \
                                     --output {output_file} \
                                     --prf-depth 5 \
                                     --prf-method rocchio \
                                     --threads {self.threads} \
                                     --rocchio-alpha {self.rocchio_alpha} \
                                     --rocchio-beta {self.rocchio_beta}'

        cmd2 = f'python -m pyserini.eval.trec_eval -l 2 -m map dl19-passage {output_file}'
        print(cmd1)
        print(cmd2)
        status = os.system(cmd1)
        stdout, stderr = run_command(cmd2)
        score = parse_score(stdout, "map")
        self.assertEqual(status, 0)
        self.assertAlmostEqual(score, 0.4211, delta=0.0001)
Example #10
0
    def test_dpr_curated_test_bf_otf(self):
        output_file = 'test_run.dpr.curated-test.multi.bf.otf.trec'
        retrieval_file = 'test_run.dpr.curated-test.multi.bf.otf.json'
        self.temp_files.extend([output_file, retrieval_file])
        cmd1 = f'python -m pyserini.dsearch --topics dpr-curated-test \
                             --index wikipedia-dpr-multi-bf \
                             --encoder facebook/dpr-question_encoder-multiset-base \
                             --output {output_file} \
                             --batch-size {self.batch_size} --threads {self.threads}'

        cmd2 = f'python -m pyserini.eval.convert_trec_run_to_dpr_retrieval_run --topics dpr-curated-test \
                                                           --index wikipedia-dpr \
                                                           --input {output_file} \
                                                           --output {retrieval_file}'

        cmd3 = f'python -m pyserini.eval.evaluate_dpr_retrieval --retrieval {retrieval_file} --topk 20 --regex'
        status1 = os.system(cmd1)
        status2 = os.system(cmd2)
        stdout, stderr = run_command(cmd3)
        score = parse_score(stdout, "Top20")
        self.assertEqual(status1, 0)
        self.assertEqual(status2, 0)
        self.assertAlmostEqual(score, 0.8876, places=4)
Example #11
0
    def run(self, runtag: str, pyserini_extras: str, actualscore: float):
        print('-------------------------')
        print(f'Running {runtag}:')
        print('-------------------------')

        pyserini_output = f'verify.pyserini.{runtag}.txt'

        pyserini_cmd = f'{self.pyserini_base_cmd} --index {self.index_path} ' \
                       + f'--topics {self.pyserini_topics} --output {pyserini_output} {pyserini_extras}'

        status = os.system(pyserini_cmd)
        if not status == 0:
            return False

        eval_cmd = f'{self.eval_base_cmd} {self.qrels} {pyserini_output}'
        status = os.system(eval_cmd)
        if not status == 0:
            return False
        stdout, stderr = run_command(eval_cmd)
        score = parse_score(stdout, "map")
        if actualscore != score:
            return False
        return True
    def test_dpr_curated_test_bf_bm25_hybrid(self):
        output_file = 'test_run.dpr.curated-test.multi.bf.bm25.trec'
        retrieval_file = 'test_run.dpr.curated-test.multi.bf.bm25.json'
        self.temp_files.extend([output_file, retrieval_file])
        cmd1 = f'python -m pyserini.hsearch dense  --index wikipedia-dpr-multi-bf \
                             sparse --index wikipedia-dpr \
                             fusion --alpha 1.05 \
                             run    --topics dpr-curated-test \
                                    --batch-size {self.batch_size} --threads {self.threads} \
                                    --output {output_file} '

        cmd2 = f'python scripts/dpr/convert_trec_run_to_retrieval_json.py --topics dpr-curated-test \
                                                           --index wikipedia-dpr \
                                                           --input {output_file} \
                                                           --output {retrieval_file}'

        cmd3 = f'python tools/scripts/dpr/evaluate_retrieval.py --retrieval {retrieval_file} --topk 20 --regex'
        status1 = os.system(cmd1)
        status2 = os.system(cmd2)
        stdout, stderr = run_command(cmd3)
        score = parse_score(stdout, "Top20")
        self.assertEqual(status1, 0)
        self.assertEqual(status2, 0)
        self.assertAlmostEqual(score, 0.9006, places=4)